Fix configfs leak

[safe/jmp/linux-2.6] / fs / ubifs / file.c
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c

index 51cf511..16a6444 100644 (file)
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -21,34 +21,32 @@
   */
  
  /*
- * This file implements VFS file and inode operations of regular files, device
+ * This file implements VFS file and inode operations for regular files, device
   * nodes and symlinks as well as address space operations.
   *
- * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
- * page is dirty and is used for budgeting purposes - dirty pages should not be
- * budgeted. The PG_checked flag is set if full budgeting is required for the
- * page e.g., when it corresponds to a file hole or it is just beyond the file
- * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
- * fail in this function, and the budget is released in 'ubifs_write_end()'. So
- * the PG_private and PG_checked flags carry the information about how the page
- * was budgeted, to make it possible to release the budget properly.
+ * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
+ * the page is dirty and is used for optimization purposes - dirty pages are
+ * not budgeted so the flag shows that 'ubifs_write_end()' should not release
+ * the budget for this page. The @PG_checked flag is set if full budgeting is
+ * required for the page e.g., when it corresponds to a file hole or it is
+ * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
+ * it is OK to fail in this function, and the budget is released in
+ * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
+ * information about how the page was budgeted, to make it possible to release
+ * the budget properly.
   *
- * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
- * we implement. However, this is not true for '->writepage()', which might be
- * called with 'i_mutex' unlocked. For example, when pdflush is performing
- * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
- * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
- * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
- * path'. So, in '->writepage()' we are only guaranteed that the page is
- * locked.
+ * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
+ * implement. However, this is not true for 'ubifs_writepage()', which may be
+ * called with @i_mutex unlocked. For example, when pdflush is doing background
+ * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
+ * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
+ * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
+ * we are only guaranteed that the page is locked.
   *
- * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
- * readahead path does not have it locked ("sys_read -> generic_file_aio_read
- * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
- * not set as well. However, UBIFS disables readahead.
- *
- * This, for example means that there might be 2 concurrent '->writepage()'
- * calls for the same inode, but different inode dirty pages.
+ * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
+ * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
+ * set as well. However, UBIFS disables readahead.
   */
  
  #include "ubifs.h"
@@ -72,8 +70,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
                 return err;
         }
  
-       ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum);
-
+       ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
+                    ubifs_inode(inode)->creat_sqnum);
         len = le32_to_cpu(dn->size);
         if (len <= 0 || len > UBIFS_BLOCK_SIZE)
                 goto dump;
@@ -219,7 +217,8 @@ static void release_existing_page_budget(struct ubifs_info *c)
  }
  
  static int write_begin_slow(struct address_space *mapping,
-                           loff_t pos, unsigned len, struct page **pagep)
+                           loff_t pos, unsigned len, struct page **pagep,
+                           unsigned flags)
  {
         struct inode *inode = mapping->host;
         struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -247,14 +246,14 @@ static int write_begin_slow(struct address_space *mapping,
         if (unlikely(err))
                 return err;
  
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
         if (unlikely(!page)) {
                 ubifs_release_budget(c, &req);
                 return -ENOMEM;
         }
  
         if (!PageUptodate(page)) {
-               if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+               if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
                         SetPageChecked(page);
                 else {
                         err = do_readpage(page);
@@ -429,31 +428,32 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
         struct ubifs_inode *ui = ubifs_inode(inode);
         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
         int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+       int skipped_read = 0;
         struct page *page;
  
-
         ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
  
         if (unlikely(c->ro_media))
                 return -EROFS;
  
         /* Try out the fast-path part first */
-       page = __grab_cache_page(mapping, index);
+       page = grab_cache_page_write_begin(mapping, index, flags);
         if (unlikely(!page))
                 return -ENOMEM;
  
         if (!PageUptodate(page)) {
                 /* The page is not loaded from the flash */
-               if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+               if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
                         /*
                          * We change whole page so no need to load it. But we
                          * have to set the @PG_checked flag to make the further
-                        * code the page is new. This might be not true, but it
-                        * is better to budget more that to read the page from
-                        * the media.
+                        * code know that the page is new. This might be not
+                        * true, but it is better to budget more than to read
+                        * the page from the media.
                          */
                         SetPageChecked(page);
-               else {
+                       skipped_read = 1;
+               } else {
                         err = do_readpage(page);
                         if (err) {
                                 unlock_page(page);
@@ -470,6 +470,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
         if (unlikely(err)) {
                 ubifs_assert(err == -ENOSPC);
                 /*
+                * If we skipped reading the page because we were going to
+                * write all of it, then it is not up to date.
+                */
+               if (skipped_read) {
+                       ClearPageChecked(page);
+                       ClearPageUptodate(page);
+               }
+               /*
                  * Budgeting failed which means it would have to force
                  * write-back but didn't, because we set the @fast flag in the
                  * request. Write-back cannot be done now, while we have the
@@ -483,12 +491,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
                 unlock_page(page);
                 page_cache_release(page);
  
-               return write_begin_slow(mapping, pos, len, pagep);
+               return write_begin_slow(mapping, pos, len, pagep, flags);
         }
  
         /*
-        * Whee, we aquired budgeting quickly - without involving
-        * garbage-collection, committing or forceing write-back. We return
+        * Whee, we acquired budgeting quickly - without involving
+        * garbage-collection, committing or forcing write-back. We return
          * with @ui->ui_mutex locked if we are appending pages, and unlocked
          * otherwise. This is an optimization (slightly hacky though).
          */
@@ -552,7 +560,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
  
                 /*
                  * Return 0 to force VFS to repeat the whole operation, or the
-                * error code if 'do_readpage()' failes.
+                * error code if 'do_readpage()' fails.
                  */
                 copied = do_readpage(page);
                 goto out;
@@ -626,7 +634,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
  
                         dn = bu->buf + (bu->zbranch[nn].offs - offs);
  
-                       ubifs_assert(dn->ch.sqnum >
+                       ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
                                      ubifs_inode(inode)->creat_sqnum);
  
                         len = le32_to_cpu(dn->size);
@@ -691,32 +699,22 @@ out_err:
  /**
   * ubifs_do_bulk_read - do bulk-read.
   * @c: UBIFS file-system description object
- * @page1: first page
+ * @bu: bulk-read information
+ * @page1: first page to read
   *
   * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
   */
-static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
+static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
+                             struct page *page1)
  {
         pgoff_t offset = page1->index, end_index;
         struct address_space *mapping = page1->mapping;
         struct inode *inode = mapping->host;
         struct ubifs_inode *ui = ubifs_inode(inode);
-       struct bu_info *bu;
         int err, page_idx, page_cnt, ret = 0, n = 0;
+       int allocate = bu->buf ? 0 : 1;
         loff_t isize;
  
-       bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
-       if (!bu)
-               return 0;
-
-       bu->buf_len = c->bulk_read_buf_size;
-       bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
-       if (!bu->buf)
-               goto out_free;
-
-       data_key_init(c, &bu->key, inode->i_ino,
-                     offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
-
         err = ubifs_tnc_get_bu_keys(c, bu);
         if (err)
                 goto out_warn;
@@ -735,12 +733,25 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
                  * together. If all the pages were like this, bulk-read would
                  * reduce performance, so we turn it off for a while.
                  */
-               ui->read_in_a_row = 0;
-               ui->bulk_read = 0;
-               goto out_free;
+               goto out_bu_off;
         }
  
         if (bu->cnt) {
+               if (allocate) {
+                       /*
+                        * Allocate bulk-read buffer depending on how many data
+                        * nodes we are going to read.
+                        */
+                       bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
+                                     bu->zbranch[bu->cnt - 1].len -
+                                     bu->zbranch[0].offs;
+                       ubifs_assert(bu->buf_len > 0);
+                       ubifs_assert(bu->buf_len <= c->leb_size);
+                       bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
+                       if (!bu->buf)
+                               goto out_bu_off;
+               }
+
                 err = ubifs_tnc_bulk_read(c, bu);
                 if (err)
                         goto out_warn;
@@ -779,13 +790,17 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
         ui->last_page_read = offset + page_idx - 1;
  
  out_free:
-       kfree(bu->buf);
-       kfree(bu);
+       if (allocate)
+               kfree(bu->buf);
         return ret;
  
  out_warn:
         ubifs_warn("ignoring error %d and skipping bulk-read", err);
         goto out_free;
+
+out_bu_off:
+       ui->read_in_a_row = ui->bulk_read = 0;
+       goto out_free;
  }
  
  /**
@@ -803,18 +818,20 @@ static int ubifs_bulk_read(struct page *page)
         struct ubifs_info *c = inode->i_sb->s_fs_info;
         struct ubifs_inode *ui = ubifs_inode(inode);
         pgoff_t index = page->index, last_page_read = ui->last_page_read;
-       int ret = 0;
+       struct bu_info *bu;
+       int err = 0, allocated = 0;
  
         ui->last_page_read = index;
-
         if (!c->bulk_read)
                 return 0;
+
         /*
-        * Bulk-read is protected by ui_mutex, but it is an optimization, so
-        * don't bother if we cannot lock the mutex.
+        * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
+        * so don't bother if we cannot lock the mutex.
          */
         if (!mutex_trylock(&ui->ui_mutex))
                 return 0;
+
         if (index != last_page_read + 1) {
                 /* Turn off bulk-read if we stop reading sequentially */
                 ui->read_in_a_row = 1;
@@ -822,6 +839,7 @@ static int ubifs_bulk_read(struct page *page)
                         ui->bulk_read = 0;
                 goto out_unlock;
         }
+
         if (!ui->bulk_read) {
                 ui->read_in_a_row += 1;
                 if (ui->read_in_a_row < 3)
@@ -829,10 +847,35 @@ static int ubifs_bulk_read(struct page *page)
                 /* Three reads in a row, so switch on bulk-read */
                 ui->bulk_read = 1;
         }
-       ret = ubifs_do_bulk_read(c, page);
+
+       /*
+        * If possible, try to use pre-allocated bulk-read information, which
+        * is protected by @c->bu_mutex.
+        */
+       if (mutex_trylock(&c->bu_mutex))
+               bu = &c->bu;
+       else {
+               bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
+               if (!bu)
+                       goto out_unlock;
+
+               bu->buf = NULL;
+               allocated = 1;
+       }
+
+       bu->buf_len = c->max_bu_buf_len;
+       data_key_init(c, &bu->key, inode->i_ino,
+                     page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+       err = ubifs_do_bulk_read(c, bu, page);
+
+       if (!allocated)
+               mutex_unlock(&c->bu_mutex);
+       else
+               kfree(bu);
+
  out_unlock:
         mutex_unlock(&ui->ui_mutex);
-       return ret;
+       return err;
  }
  
  static int ubifs_readpage(struct file *file, struct page *page)
@@ -914,7 +957,7 @@ static int do_writepage(struct page *page, int len)
   * whole index and correct all inode sizes, which is long an unacceptable.
   *
   * To prevent situations like this, UBIFS writes pages back only if they are
- * within last synchronized inode size, i.e. the the size which has been
+ * within the last synchronized inode size, i.e. the size which has been
   * written to the flash media last time. Otherwise, UBIFS forces inode
   * write-back, thus making sure the on-flash inode contains current inode size,
   * and then keeps writing pages back.
@@ -1130,11 +1173,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
         ui->ui_size = inode->i_size;
         /* Truncation changes inode [mc]time */
         inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
-       /* The other attributes may be changed at the same time as well */
+       /* Other attributes may be changed at the same time as well */
         do_attr_changes(inode, attr);
-
         err = ubifs_jnl_truncate(c, inode, old_size, new_size);
         mutex_unlock(&ui->ui_mutex);
+
  out_budg:
         if (budgeted)
                 ubifs_release_budget(c, &req);
@@ -1346,7 +1389,6 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
                                unsigned long nr_segs, loff_t pos)
  {
         int err;
-       ssize_t ret;
         struct inode *inode = iocb->ki_filp->f_mapping->host;
         struct ubifs_info *c = inode->i_sb->s_fs_info;
  
@@ -1354,17 +1396,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
         if (err)
                 return err;
  
-       ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
-       if (ret < 0)
-               return ret;
-
-       if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
-               err = ubifs_sync_wbufs_by_inode(c, inode);
-               if (err)
-                       return err;
-       }
-
-       return ret;
+       return generic_file_aio_write(iocb, iov, nr_segs, pos);
  }
  
  static int ubifs_set_page_dirty(struct page *page)
@@ -1399,8 +1431,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
   * mmap()d file has taken write protection fault and is being made
   * writable. UBIFS must ensure page is budgeted for.
   */
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
+       struct page *page = vmf->page;
         struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
         struct ubifs_info *c = inode->i_sb->s_fs_info;
         struct timespec now = ubifs_current_time(inode);
@@ -1412,7 +1445,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
         ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
  
         if (unlikely(c->ro_media))
-               return -EROFS;
+               return VM_FAULT_SIGBUS; /* -EROFS */
  
         /*
          * We have not locked @page so far so we may budget for changing the
@@ -1445,7 +1478,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                 if (err == -ENOSPC)
                         ubifs_warn("out of space for mmapped file "
                                    "(inode number %lu)", inode->i_ino);
-               return err;
+               return VM_FAULT_SIGBUS;
         }
  
         lock_page(page);
@@ -1485,10 +1518,12 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
  out_unlock:
         unlock_page(page);
         ubifs_release_budget(c, &req);
+       if (err)
+               err = VM_FAULT_SIGBUS;
         return err;
  }
  
-static struct vm_operations_struct ubifs_file_vm_ops = {
+static const struct vm_operations_struct ubifs_file_vm_ops = {
         .fault        = filemap_fault,
         .page_mkwrite = ubifs_vm_page_mkwrite,
  };
@@ -1505,7 +1540,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
         return 0;
  }
  
-struct address_space_operations ubifs_file_address_operations = {
+const struct address_space_operations ubifs_file_address_operations = {
         .readpage       = ubifs_readpage,
         .writepage      = ubifs_writepage,
         .write_begin    = ubifs_write_begin,
@@ -1515,7 +1550,7 @@ struct address_space_operations ubifs_file_address_operations = {
         .releasepage    = ubifs_releasepage,
  };
  
-struct inode_operations ubifs_file_inode_operations = {
+const struct inode_operations ubifs_file_inode_operations = {
         .setattr     = ubifs_setattr,
         .getattr     = ubifs_getattr,
  #ifdef CONFIG_UBIFS_FS_XATTR
@@ -1526,14 +1561,14 @@ struct inode_operations ubifs_file_inode_operations = {
  #endif
  };
  
-struct inode_operations ubifs_symlink_inode_operations = {
+const struct inode_operations ubifs_symlink_inode_operations = {
         .readlink    = generic_readlink,
         .follow_link = ubifs_follow_link,
         .setattr     = ubifs_setattr,
         .getattr     = ubifs_getattr,
  };
  
-struct file_operations ubifs_file_operations = {
+const struct file_operations ubifs_file_operations = {
         .llseek         = generic_file_llseek,
         .read           = do_sync_read,
         .write          = do_sync_write,