fs: dcache fix LRU ordering
[safe/jmp/linux-2.6] / fs / buffer.c
index 6e35762..aed2977 100644 (file)
@@ -360,7 +360,7 @@ still_busy:
  * Completion handler for block_write_full_page() - pages which are unlocked
  * during I/O, and which have PageWriteback cleared upon I/O completion.
  */
-static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
+void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 {
        char b[BDEVNAME_SIZE];
        unsigned long flags;
@@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh)
        set_buffer_async_read(bh);
 }
 
-void mark_buffer_async_write(struct buffer_head *bh)
+void mark_buffer_async_write_endio(struct buffer_head *bh,
+                                  bh_end_io_t *handler)
 {
-       bh->b_end_io = end_buffer_async_write;
+       bh->b_end_io = handler;
        set_buffer_async_write(bh);
 }
+
+void mark_buffer_async_write(struct buffer_head *bh)
+{
+       mark_buffer_async_write_endio(bh, end_buffer_async_write);
+}
 EXPORT_SYMBOL(mark_buffer_async_write);
 
 
@@ -547,7 +553,7 @@ repeat:
        return err;
 }
 
-void do_thaw_all(unsigned long unused)
+void do_thaw_all(struct work_struct *work)
 {
        struct super_block *sb;
        char b[BDEVNAME_SIZE];
@@ -567,6 +573,7 @@ restart:
                        goto restart;
        }
        spin_unlock(&sb_lock);
+       kfree(work);
        printk(KERN_WARNING "Emergency Thaw complete\n");
 }
 
@@ -577,7 +584,13 @@ restart:
  */
 void emergency_thaw_all(void)
 {
-       pdflush_operation(do_thaw_all, 0);
+       struct work_struct *work;
+
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (work) {
+               INIT_WORK(work, do_thaw_all);
+               schedule_work(work);
+       }
 }
 
 /**
@@ -1596,9 +1609,20 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
  * locked buffer.   This only can happen if someone has written the buffer
  * directly, with submit_bh().  At the address_space level PageWriteback
  * prevents this contention from occurring.
+ *
+ * If block_write_full_page() is called with wbc->sync_mode ==
+ * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
+ * causes the writes to be flagged as synchronous writes, but the
+ * block device queue will NOT be unplugged, since usually many pages
+ * will be pushed to the out before the higher-level caller actually
+ * waits for the writes to be completed.  The various wait functions,
+ * such as wait_on_writeback_range() will ultimately call sync_page()
+ * which will ultimately call blk_run_backing_dev(), which will end up
+ * unplugging the device queue.
  */
 static int __block_write_full_page(struct inode *inode, struct page *page,
-                       get_block_t *get_block, struct writeback_control *wbc)
+                       get_block_t *get_block, struct writeback_control *wbc,
+                       bh_end_io_t *handler)
 {
        int err;
        sector_t block;
@@ -1606,7 +1630,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
        struct buffer_head *bh, *head;
        const unsigned blocksize = 1 << inode->i_blkbits;
        int nr_underway = 0;
-       int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+       int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
+                       WRITE_SYNC_PLUG : WRITE);
 
        BUG_ON(!PageLocked(page));
 
@@ -1682,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
                        continue;
                }
                if (test_clear_buffer_dirty(bh)) {
-                       mark_buffer_async_write(bh);
+                       mark_buffer_async_write_endio(bh, handler);
                } else {
                        unlock_buffer(bh);
                }
@@ -1735,7 +1760,7 @@ recover:
                if (buffer_mapped(bh) && buffer_dirty(bh) &&
                    !buffer_delay(bh)) {
                        lock_buffer(bh);
-                       mark_buffer_async_write(bh);
+                       mark_buffer_async_write_endio(bh, handler);
                } else {
                        /*
                         * The buffer may have been set dirty during
@@ -2372,7 +2397,8 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        if ((page->mapping != inode->i_mapping) ||
            (page_offset(page) > size)) {
                /* page got truncated out from underneath us */
-               goto out_unlock;
+               unlock_page(page);
+               goto out;
        }
 
        /* page is wholly or partially inside EOF */
@@ -2386,14 +2412,15 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                ret = block_commit_write(page, 0, end);
 
        if (unlikely(ret)) {
+               unlock_page(page);
                if (ret == -ENOMEM)
                        ret = VM_FAULT_OOM;
                else /* -ENOSPC, -EIO, etc */
                        ret = VM_FAULT_SIGBUS;
-       }
+       } else
+               ret = VM_FAULT_LOCKED;
 
-out_unlock:
-       unlock_page(page);
+out:
        return ret;
 }
 
@@ -2661,7 +2688,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
 out:
        ret = mpage_writepage(page, get_block, wbc);
        if (ret == -EAGAIN)
-               ret = __block_write_full_page(inode, page, get_block, wbc);
+               ret = __block_write_full_page(inode, page, get_block, wbc,
+                                             end_buffer_async_write);
        return ret;
 }
 EXPORT_SYMBOL(nobh_writepage);
@@ -2819,9 +2847,10 @@ out:
 
 /*
  * The generic ->writepage function for buffer-backed address_spaces
+ * this form passes in the end_io handler used to finish the IO.
  */
-int block_write_full_page(struct page *page, get_block_t *get_block,
-                       struct writeback_control *wbc)
+int block_write_full_page_endio(struct page *page, get_block_t *get_block,
+                       struct writeback_control *wbc, bh_end_io_t *handler)
 {
        struct inode * const inode = page->mapping->host;
        loff_t i_size = i_size_read(inode);
@@ -2830,7 +2859,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 
        /* Is the page fully inside i_size? */
        if (page->index < end_index)
-               return __block_write_full_page(inode, page, get_block, wbc);
+               return __block_write_full_page(inode, page, get_block, wbc,
+                                              handler);
 
        /* Is the page fully outside i_size? (truncate in progress) */
        offset = i_size & (PAGE_CACHE_SIZE-1);
@@ -2853,9 +2883,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
         * writes to that region are not written out to the file."
         */
        zero_user_segment(page, offset, PAGE_CACHE_SIZE);
-       return __block_write_full_page(inode, page, get_block, wbc);
+       return __block_write_full_page(inode, page, get_block, wbc, handler);
+}
+
+/*
+ * The generic ->writepage function for buffer-backed address_spaces
+ */
+int block_write_full_page(struct page *page, get_block_t *get_block,
+                       struct writeback_control *wbc)
+{
+       return block_write_full_page_endio(page, get_block, wbc,
+                                          end_buffer_async_write);
 }
 
+
 sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
                            get_block_t *get_block)
 {
@@ -3324,9 +3365,11 @@ EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_sync_page);
 EXPORT_SYMBOL(block_truncate_page);
 EXPORT_SYMBOL(block_write_full_page);
+EXPORT_SYMBOL(block_write_full_page_endio);
 EXPORT_SYMBOL(cont_write_begin);
 EXPORT_SYMBOL(end_buffer_read_sync);
 EXPORT_SYMBOL(end_buffer_write_sync);
+EXPORT_SYMBOL(end_buffer_async_write);
 EXPORT_SYMBOL(file_fsync);
 EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_cont_expand_simple);