[MIPS] WRPPMC serial support move to platform device

[safe/jmp/linux-2.6] / fs / buffer.c
diff --git a/fs/buffer.c b/fs/buffer.c

index a4b8242..0e5ec37 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -24,7 +24,6 @@
  #include <linux/mm.h>
  #include <linux/percpu.h>
  #include <linux/slab.h>
-#include <linux/smp_lock.h>
  #include <linux/capability.h>
  #include <linux/blkdev.h>
  #include <linux/file.h>
@@ -44,7 +43,6 @@
  #include <linux/bit_spinlock.h>
  
  static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
-static void invalidate_bh_lrus(void);
  
  #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
  
@@ -333,7 +331,7 @@ out:
     we think the disk contains more recent information than the buffercache.
     The update == 1 pass marks the buffers we need to update, the update == 2
     pass does the actual I/O. */
-void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
+void invalidate_bdev(struct block_device *bdev)
  {
         struct address_space *mapping = bdev->bd_inode->i_mapping;
  
@@ -341,11 +339,6 @@ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
                 return;
  
         invalidate_bh_lrus();
-       /*
-        * FIXME: what about destroy_dirty_buffers?
-        * We really want to use invalidate_inode_pages2() for
-        * that, but not until that's cleaned up.
-        */
         invalidate_mapping_pages(mapping, 0, -1);
  }
  
@@ -363,7 +356,7 @@ static void free_more_memory(void)
         for_each_online_pgdat(pgdat) {
                 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
                 if (*zones)
-                       try_to_free_pages(zones, GFP_NOFS);
+                       try_to_free_pages(zones, 0, GFP_NOFS);
         }
  }
  
@@ -683,6 +676,39 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
  EXPORT_SYMBOL(mark_buffer_dirty_inode);
  
  /*
+ * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
+ * dirty.
+ *
+ * If warn is true, then emit a warning if the page is not uptodate and has
+ * not been truncated.
+ */
+static int __set_page_dirty(struct page *page,
+               struct address_space *mapping, int warn)
+{
+       if (unlikely(!mapping))
+               return !TestSetPageDirty(page);
+
+       if (TestSetPageDirty(page))
+               return 0;
+
+       write_lock_irq(&mapping->tree_lock);
+       if (page->mapping) {    /* Race with truncate? */
+               WARN_ON_ONCE(warn && !PageUptodate(page));
+
+               if (mapping_cap_account_dirty(mapping)) {
+                       __inc_zone_page_state(page, NR_FILE_DIRTY);
+                       task_io_account_write(PAGE_CACHE_SIZE);
+               }
+               radix_tree_tag_set(&mapping->page_tree,
+                               page_index(page), PAGECACHE_TAG_DIRTY);
+       }
+       write_unlock_irq(&mapping->tree_lock);
+       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+       return 1;
+}
+
+/*
   * Add a page to the dirty page list.
   *
   * It is a sad fact of life that this function is called from several places
@@ -709,7 +735,7 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
   */
  int __set_page_dirty_buffers(struct page *page)
  {
-       struct address_space * const mapping = page_mapping(page);
+       struct address_space *mapping = page_mapping(page);
  
         if (unlikely(!mapping))
                 return !TestSetPageDirty(page);
@@ -726,21 +752,7 @@ int __set_page_dirty_buffers(struct page *page)
         }
         spin_unlock(&mapping->private_lock);
  
-       if (TestSetPageDirty(page))
-               return 0;
-
-       write_lock_irq(&mapping->tree_lock);
-       if (page->mapping) {    /* Race with truncate? */
-               if (mapping_cap_account_dirty(mapping)) {
-                       __inc_zone_page_state(page, NR_FILE_DIRTY);
-                       task_io_account_write(PAGE_CACHE_SIZE);
-               }
-               radix_tree_tag_set(&mapping->page_tree,
-                               page_index(page), PAGECACHE_TAG_DIRTY);
-       }
-       write_unlock_irq(&mapping->tree_lock);
-       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-       return 1;
+       return __set_page_dirty(page, mapping, 1);
  }
  EXPORT_SYMBOL(__set_page_dirty_buffers);
  
@@ -988,7 +1000,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
         struct page *page;
         struct buffer_head *bh;
  
-       page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+       page = find_or_create_page(inode->i_mapping, index,
+               (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
         if (!page)
                 return NULL;
  
@@ -1032,11 +1045,6 @@ failed:
  /*
   * Create buffers for the specified block device block's page.  If
   * that page was dirty, the buffers are set dirty also.
- *
- * Except that's a bug.  Attaching dirty buffers to a dirty
- * blockdev's page can result in filesystem corruption, because
- * some of those buffers may be aliases of filesystem data.
- * grow_dev_page() will go BUG() if this happens.
   */
  static int
  grow_buffers(struct block_device *bdev, sector_t block, int size)
@@ -1143,8 +1151,9 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
   */
  void fastcall mark_buffer_dirty(struct buffer_head *bh)
  {
+       WARN_ON_ONCE(!buffer_uptodate(bh));
         if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
-               __set_page_dirty_nobuffers(bh->b_page);
+               __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
  }
  
  /*
@@ -1283,11 +1292,11 @@ static void bh_lru_install(struct buffer_head *bh)
   * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
   */
  static struct buffer_head *
-lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
+lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
  {
         struct buffer_head *ret = NULL;
         struct bh_lru *lru;
-       int i;
+       unsigned int i;
  
         check_irqs_on();
         bh_lru_lock();
@@ -1319,7 +1328,7 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
   * NULL
   */
  struct buffer_head *
-__find_get_block(struct block_device *bdev, sector_t block, int size)
+__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
  {
         struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
  
@@ -1347,7 +1356,7 @@ EXPORT_SYMBOL(__find_get_block);
   * attempt is failing.  FIXME, perhaps?
   */
  struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, int size)
+__getblk(struct block_device *bdev, sector_t block, unsigned size)
  {
         struct buffer_head *bh = __find_get_block(bdev, block, size);
  
@@ -1361,7 +1370,7 @@ EXPORT_SYMBOL(__getblk);
  /*
   * Do async read-ahead on a buffer..
   */
-void __breadahead(struct block_device *bdev, sector_t block, int size)
+void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
  {
         struct buffer_head *bh = __getblk(bdev, block, size);
         if (likely(bh)) {
@@ -1381,7 +1390,7 @@ EXPORT_SYMBOL(__breadahead);
   *  It returns NULL if the block was unreadable.
   */
  struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, int size)
+__bread(struct block_device *bdev, sector_t block, unsigned size)
  {
         struct buffer_head *bh = __getblk(bdev, block, size);
  
@@ -1408,7 +1417,7 @@ static void invalidate_bh_lru(void *arg)
         put_cpu_var(bh_lrus);
  }
         
-static void invalidate_bh_lrus(void)
+void invalidate_bh_lrus(void)
  {
         on_each_cpu(invalidate_bh_lru, NULL, 1, 1);
  }
@@ -1700,17 +1709,8 @@ done:
                  * clean.  Someone wrote them back by hand with
                  * ll_rw_block/submit_bh.  A rare case.
                  */
-               int uptodate = 1;
-               do {
-                       if (!buffer_uptodate(bh)) {
-                               uptodate = 0;
-                               break;
-                       }
-                       bh = bh->b_this_page;
-               } while (bh != head);
-               if (uptodate)
-                       SetPageUptodate(page);
                 end_page_writeback(page);
+
                 /*
                  * The page and buffer_heads can be released at any time from
                  * here on.
@@ -1742,8 +1742,8 @@ recover:
         } while ((bh = bh->b_this_page) != head);
         SetPageError(page);
         BUG_ON(PageWriteback(page));
+       mapping_set_error(page->mapping, err);
         set_page_writeback(page);
-       unlock_page(page);
         do {
                 struct buffer_head *next = bh->b_this_page;
                 if (buffer_async_write(bh)) {
@@ -1753,6 +1753,7 @@ recover:
                 }
                 bh = next;
         } while (bh != head);
+       unlock_page(page);
         goto done;
  }
  
@@ -1861,13 +1862,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
                 if (block_start >= to)
                         break;
                 if (buffer_new(bh)) {
-                       void *kaddr;
-
                         clear_buffer_new(bh);
-                       kaddr = kmap_atomic(page, KM_USER0);
-                       memset(kaddr+block_start, 0, bh->b_size);
-                       flush_dcache_page(page);
-                       kunmap_atomic(kaddr, KM_USER0);
+                       zero_user_page(page, block_start, bh->b_size, KM_USER0);
                         set_buffer_uptodate(bh);
                         mark_buffer_dirty(bh);
                 }
@@ -1955,10 +1951,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
                                         SetPageError(page);
                         }
                         if (!buffer_mapped(bh)) {
-                               void *kaddr = kmap_atomic(page, KM_USER0);
-                               memset(kaddr + i * blocksize, 0, blocksize);
-                               flush_dcache_page(page);
-                               kunmap_atomic(kaddr, KM_USER0);
+                               zero_user_page(page, i * blocksize, blocksize,
+                                               KM_USER0);
                                 if (!err)
                                         set_buffer_uptodate(bh);
                                 continue;
@@ -2101,7 +2095,6 @@ int cont_prepare_write(struct page *page, unsigned offset,
         long status;
         unsigned zerofrom;
         unsigned blocksize = 1 << inode->i_blkbits;
-       void *kaddr;
  
         while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
                 status = -ENOMEM;
@@ -2123,10 +2116,8 @@ int cont_prepare_write(struct page *page, unsigned offset,
                                                 PAGE_CACHE_SIZE, get_block);
                 if (status)
                         goto out_unmap;
-               kaddr = kmap_atomic(new_page, KM_USER0);
-               memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
-               flush_dcache_page(new_page);
-               kunmap_atomic(kaddr, KM_USER0);
+               zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
+                               KM_USER0);
                 generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
                 unlock_page(new_page);
                 page_cache_release(new_page);
@@ -2153,10 +2144,7 @@ int cont_prepare_write(struct page *page, unsigned offset,
         if (status)
                 goto out1;
         if (zerofrom < offset) {
-               kaddr = kmap_atomic(page, KM_USER0);
-               memset(kaddr+zerofrom, 0, offset-zerofrom);
-               flush_dcache_page(page);
-               kunmap_atomic(kaddr, KM_USER0);
+               zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0);
                 __block_commit_write(inode, page, zerofrom, offset);
         }
         return 0;
@@ -2206,6 +2194,52 @@ int generic_commit_write(struct file *file, struct page *page,
         return 0;
  }
  
+/*
+ * block_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF.  Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int
+block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+                  get_block_t get_block)
+{
+       struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+       unsigned long end;
+       loff_t size;
+       int ret = -EINVAL;
+
+       lock_page(page);
+       size = i_size_read(inode);
+       if ((page->mapping != inode->i_mapping) ||
+           (page_offset(page) > size)) {
+               /* page got truncated out from underneath us */
+               goto out_unlock;
+       }
+
+       /* page is wholly or partially inside EOF */
+       if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+               end = size & ~PAGE_CACHE_MASK;
+       else
+               end = PAGE_CACHE_SIZE;
+
+       ret = block_prepare_write(page, 0, end, get_block);
+       if (!ret)
+               ret = block_commit_write(page, 0, end);
+
+out_unlock:
+       unlock_page(page);
+       return ret;
+}
  
  /*
   * nobh_prepare_write()'s prereads are special: the buffer_heads are freed
@@ -2248,7 +2282,6 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
         int i;
         int ret = 0;
         int is_mapped_to_disk = 1;
-       int dirtied_it = 0;
  
         if (PageMappedToDisk(page))
                 return 0;
@@ -2285,14 +2318,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
                         continue;
                 if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
                         kaddr = kmap_atomic(page, KM_USER0);
-                       if (block_start < from) {
+                       if (block_start < from)
                                 memset(kaddr+block_start, 0, from-block_start);
-                               dirtied_it = 1;
-                       }
-                       if (block_end > to) {
+                       if (block_end > to)
                                 memset(kaddr + to, 0, block_end - to);
-                               dirtied_it = 1;
-                       }
                         flush_dcache_page(page);
                         kunmap_atomic(kaddr, KM_USER0);
                         continue;
@@ -2347,17 +2376,6 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
  
         if (is_mapped_to_disk)
                 SetPageMappedToDisk(page);
-       SetPageUptodate(page);
-
-       /*
-        * Setting the page dirty here isn't necessary for the prepare_write
-        * function - commit_write will do that.  But if/when this function is
-        * used within the pagefault handler to ensure that all mmapped pages
-        * have backing space in the filesystem, we will need to dirty the page
-        * if its contents were altered.
-        */
-       if (dirtied_it)
-               set_page_dirty(page);
  
         return 0;
  
@@ -2371,22 +2389,24 @@ failed:
          * Error recovery is pretty slack.  Clear the page and mark it dirty
          * so we'll later zero out any blocks which _were_ allocated.
          */
-       kaddr = kmap_atomic(page, KM_USER0);
-       memset(kaddr, 0, PAGE_CACHE_SIZE);
-       flush_dcache_page(page);
-       kunmap_atomic(kaddr, KM_USER0);
+       zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
         SetPageUptodate(page);
         set_page_dirty(page);
         return ret;
  }
  EXPORT_SYMBOL(nobh_prepare_write);
  
+/*
+ * Make sure any changes to nobh_commit_write() are reflected in
+ * nobh_truncate_page(), since it doesn't call commit_write().
+ */
  int nobh_commit_write(struct file *file, struct page *page,
                 unsigned from, unsigned to)
  {
         struct inode *inode = page->mapping->host;
         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
  
+       SetPageUptodate(page);
         set_page_dirty(page);
         if (pos > inode->i_size) {
                 i_size_write(inode, pos);
@@ -2408,7 +2428,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
         loff_t i_size = i_size_read(inode);
         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
         unsigned offset;
-       void *kaddr;
         int ret;
  
         /* Is the page fully inside i_size? */
@@ -2439,10 +2458,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
          * the  page size, the remaining memory is zeroed when mapped, and
          * writes to that region are not written out to the file."
          */
-       kaddr = kmap_atomic(page, KM_USER0);
-       memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
-       flush_dcache_page(page);
-       kunmap_atomic(kaddr, KM_USER0);
+       zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
  out:
         ret = mpage_writepage(page, get_block, wbc);
         if (ret == -EAGAIN)
@@ -2463,7 +2479,6 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
         unsigned to;
         struct page *page;
         const struct address_space_operations *a_ops = mapping->a_ops;
-       char *kaddr;
         int ret = 0;
  
         if ((offset & (blocksize - 1)) == 0)
@@ -2477,10 +2492,13 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
         to = (offset + blocksize) & ~(blocksize - 1);
         ret = a_ops->prepare_write(NULL, page, offset, to);
         if (ret == 0) {
-               kaddr = kmap_atomic(page, KM_USER0);
-               memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
-               flush_dcache_page(page);
-               kunmap_atomic(kaddr, KM_USER0);
+               zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
+                               KM_USER0);
+               /*
+                * It would be more correct to call aops->commit_write()
+                * here, but this is more efficient.
+                */
+               SetPageUptodate(page);
                 set_page_dirty(page);
         }
         unlock_page(page);
@@ -2501,7 +2519,6 @@ int block_truncate_page(struct address_space *mapping,
         struct inode *inode = mapping->host;
         struct page *page;
         struct buffer_head *bh;
-       void *kaddr;
         int err;
  
         blocksize = 1 << inode->i_blkbits;
@@ -2555,11 +2572,7 @@ int block_truncate_page(struct address_space *mapping,
                         goto unlock;
         }
  
-       kaddr = kmap_atomic(page, KM_USER0);
-       memset(kaddr + offset, 0, length);
-       flush_dcache_page(page);
-       kunmap_atomic(kaddr, KM_USER0);
-
+       zero_user_page(page, offset, length, KM_USER0);
         mark_buffer_dirty(bh);
         err = 0;
  
@@ -2580,7 +2593,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
         loff_t i_size = i_size_read(inode);
         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
         unsigned offset;
-       void *kaddr;
  
         /* Is the page fully inside i_size? */
         if (page->index < end_index)
@@ -2606,10 +2618,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
          * the  page size, the remaining memory is zeroed when mapped, and
          * writes to that region are not written out to the file."
          */
-       kaddr = kmap_atomic(page, KM_USER0);
-       memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
-       flush_dcache_page(page);
-       kunmap_atomic(kaddr, KM_USER0);
+       zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
         return __block_write_full_page(inode, page, get_block, wbc);
  }
  
@@ -2951,8 +2960,9 @@ static void recalc_bh_state(void)
         
  struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
  {
-       struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
+       struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
         if (ret) {
+               INIT_LIST_HEAD(&ret->b_assoc_buffers);
                 get_cpu_var(bh_accounting).nr++;
                 recalc_bh_state();
                 put_cpu_var(bh_accounting);
@@ -2971,18 +2981,6 @@ void free_buffer_head(struct buffer_head *bh)
  }
  EXPORT_SYMBOL(free_buffer_head);
  
-static void
-init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
-{
-       if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-                           SLAB_CTOR_CONSTRUCTOR) {
-               struct buffer_head * bh = (struct buffer_head *)data;
-
-               memset(bh, 0, sizeof(*bh));
-               INIT_LIST_HEAD(&bh->b_assoc_buffers);
-       }
-}
-
  static void buffer_exit_cpu(int cpu)
  {
         int i;
@@ -3000,7 +2998,7 @@ static void buffer_exit_cpu(int cpu)
  static int buffer_cpu_notify(struct notifier_block *self,
                               unsigned long action, void *hcpu)
  {
-       if (action == CPU_DEAD)
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
                 buffer_exit_cpu((unsigned long)hcpu);
         return NOTIFY_OK;
  }
@@ -3009,12 +3007,8 @@ void __init buffer_init(void)
  {
         int nrpages;
  
-       bh_cachep = kmem_cache_create("buffer_head",
-                                       sizeof(struct buffer_head), 0,
-                                       (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
-                                       SLAB_MEM_SPREAD),
-                                       init_buffer_head,
-                                       NULL);
+       bh_cachep = KMEM_CACHE(buffer_head,
+                       SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
  
         /*
          * Limit the bh occupancy to 10% of ZONE_NORMAL
@@ -3029,6 +3023,7 @@ EXPORT_SYMBOL(__brelse);
  EXPORT_SYMBOL(__wait_on_buffer);
  EXPORT_SYMBOL(block_commit_write);
  EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(block_page_mkwrite);
  EXPORT_SYMBOL(block_read_full_page);
  EXPORT_SYMBOL(block_sync_page);
  EXPORT_SYMBOL(block_truncate_page);