[PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE
authorZach Brown <zach.brown@oracle.com>
Thu, 15 Dec 2005 22:28:17 +0000 (14:28 -0800)
committerJoel Becker <joel.becker@oracle.com>
Tue, 3 Jan 2006 19:45:42 +0000 (11:45 -0800)
readpage(), prepare_write(), and commit_write() callers are updated to
understand the special return code AOP_TRUNCATED_PAGE in the style of
writepage() and WRITEPAGE_ACTIVATE.  AOP_TRUNCATED_PAGE tells the caller that
the callee has unlocked the page and that the operation should be tried again
with a new page.  OCFS2 uses this to detect and work around a lock inversion in
its aop methods.  There should be no change in behaviour for methods that don't
return AOP_TRUNCATED_PAGE.

WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are
made enums so that kerneldoc can be used to document their semantics.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
drivers/block/loop.c
drivers/block/rd.c
fs/mpage.c
include/linux/fs.h
include/linux/writeback.h
mm/filemap.c
mm/readahead.c
mm/shmem.c
mm/vmscan.c

index 96c664a..a452b13 100644 (file)
@@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
        struct address_space_operations *aops = mapping->a_ops;
        pgoff_t index;
        unsigned offset, bv_offs;
-       int len, ret = 0;
+       int len, ret;
 
        down(&mapping->host->i_sem);
        index = pos >> PAGE_CACHE_SHIFT;
@@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
                page = grab_cache_page(mapping, index);
                if (unlikely(!page))
                        goto fail;
-               if (unlikely(aops->prepare_write(file, page, offset,
-                               offset + size)))
+               ret = aops->prepare_write(file, page, offset,
+                                         offset + size);
+               if (unlikely(ret)) {
+                       if (ret == AOP_TRUNCATED_PAGE) {
+                               page_cache_release(page);
+                               continue;
+                       }
                        goto unlock;
+               }
                transfer_result = lo_do_transfer(lo, WRITE, page, offset,
                                bvec->bv_page, bv_offs, size, IV);
                if (unlikely(transfer_result)) {
@@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
                        kunmap_atomic(kaddr, KM_USER0);
                }
                flush_dcache_page(page);
-               if (unlikely(aops->commit_write(file, page, offset,
-                               offset + size)))
+               ret = aops->commit_write(file, page, offset,
+                                        offset + size);
+               if (unlikely(ret)) {
+                       if (ret == AOP_TRUNCATED_PAGE) {
+                               page_cache_release(page);
+                               continue;
+                       }
                        goto unlock;
+               }
                if (unlikely(transfer_result))
                        goto unlock;
                bv_offs += size;
@@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
                unlock_page(page);
                page_cache_release(page);
        }
+       ret = 0;
 out:
        up(&mapping->host->i_sem);
        return ret;
index 68c60a5..ffd6abd 100644 (file)
@@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page,
 
 /*
  * ->writepage to the the blockdev's mapping has to redirty the page so that the
- * VM doesn't go and steal it.  We return WRITEPAGE_ACTIVATE so that the VM
+ * VM doesn't go and steal it.  We return AOP_WRITEPAGE_ACTIVATE so that the VM
  * won't try to (pointlessly) write the page again for a while.
  *
  * Really, these pages should not be on the LRU at all.
@@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
                make_page_uptodate(page);
        SetPageDirty(page);
        if (wbc->for_reclaim)
-               return WRITEPAGE_ACTIVATE;
+               return AOP_WRITEPAGE_ACTIVATE;
        unlock_page(page);
        return 0;
 }
index c5adcdd..f1d2d02 100644 (file)
@@ -721,7 +721,7 @@ retry:
                                                &last_block_in_bio, &ret, wbc,
                                                page->mapping->a_ops->writepage);
                        }
-                       if (unlikely(ret == WRITEPAGE_ACTIVATE))
+                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
                                unlock_page(page);
                        if (ret || (--(wbc->nr_to_write) <= 0))
                                done = 1;
index cc35b6a..ed9a41a 100644 (file)
@@ -302,6 +302,37 @@ struct iattr {
  */
 #include <linux/quota.h>
 
+/** 
+ * enum positive_aop_returns - aop return codes with specific semantics
+ *
+ * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
+ *                         completed, that the page is still locked, and
+ *                         should be considered active.  The VM uses this hint
+ *                         to return the page to the active list -- it won't
+ *                         be a candidate for writeback again in the near
+ *                         future.  Other callers must be careful to unlock
+ *                         the page if they get this return.  Returned by
+ *                         writepage(); 
+ *
+ * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
+ *                     unlocked it and the page might have been truncated.
+ *                     The caller should back up to acquiring a new page and
+ *                     trying again.  The aop will be taking reasonable
+ *                     precautions not to livelock.  If the caller held a page
+ *                     reference, it should drop it before retrying.  Returned
+ *                     by readpage(), prepare_write(), and commit_write().
+ *
+ * address_space_operation functions return these large constants to indicate
+ * special semantics to the caller.  These are much larger than the bytes in a
+ * page to allow for functions that return the number of bytes operated on in a
+ * given page.
+ */
+
+enum positive_aop_returns {
+       AOP_WRITEPAGE_ACTIVATE  = 0x80000,
+       AOP_TRUNCATED_PAGE      = 0x80001,
+};
+
 /*
  * oh the beauties of C type declarations.
  */
index 343d883..64a36ba 100644 (file)
@@ -60,12 +60,6 @@ struct writeback_control {
 };
 
 /*
- * ->writepage() return values (make these much larger than a pagesize, in
- * case some fs is returning number-of-bytes-written from writepage)
- */
-#define WRITEPAGE_ACTIVATE     0x80000 /* IO was not started: activate page */
-
-/*
  * fs/fs-writeback.c
  */    
 void writeback_inodes(struct writeback_control *wbc);
index 33a28bf..6e1d08a 100644 (file)
@@ -831,8 +831,13 @@ readpage:
                /* Start the actual read. The read will unlock the page. */
                error = mapping->a_ops->readpage(filp, page);
 
-               if (unlikely(error))
+               if (unlikely(error)) {
+                       if (error == AOP_TRUNCATED_PAGE) {
+                               page_cache_release(page);
+                               goto find_page;
+                       }
                        goto readpage_error;
+               }
 
                if (!PageUptodate(page)) {
                        lock_page(page);
@@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
 {
        struct address_space *mapping = file->f_mapping;
        struct page *page; 
-       int error;
+       int ret;
 
-       page = page_cache_alloc_cold(mapping);
-       if (!page)
-               return -ENOMEM;
+       do {
+               page = page_cache_alloc_cold(mapping);
+               if (!page)
+                       return -ENOMEM;
+
+               ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
+               if (ret == 0)
+                       ret = mapping->a_ops->readpage(file, page);
+               else if (ret == -EEXIST)
+                       ret = 0; /* losing race to add is OK */
 
-       error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
-       if (!error) {
-               error = mapping->a_ops->readpage(file, page);
                page_cache_release(page);
-               return error;
-       }
 
-       /*
-        * We arrive here in the unlikely event that someone 
-        * raced with us and added our page to the cache first
-        * or we are out of memory for radix-tree nodes.
-        */
-       page_cache_release(page);
-       return error == -EEXIST ? 0 : error;
+       } while (ret == AOP_TRUNCATED_PAGE);
+               
+       return ret;
 }
 
 #define MMAP_LOTSAMISS  (100)
@@ -1331,10 +1334,14 @@ page_not_uptodate:
                goto success;
        }
 
-       if (!mapping->a_ops->readpage(file, page)) {
+       error = mapping->a_ops->readpage(file, page);
+       if (!error) {
                wait_on_page_locked(page);
                if (PageUptodate(page))
                        goto success;
+       } else if (error == AOP_TRUNCATED_PAGE) {
+               page_cache_release(page);
+               goto retry_find;
        }
 
        /*
@@ -1358,10 +1365,14 @@ page_not_uptodate:
                goto success;
        }
        ClearPageError(page);
-       if (!mapping->a_ops->readpage(file, page)) {
+       error = mapping->a_ops->readpage(file, page);
+       if (!error) {
                wait_on_page_locked(page);
                if (PageUptodate(page))
                        goto success;
+       } else if (error == AOP_TRUNCATED_PAGE) {
+               page_cache_release(page);
+               goto retry_find;
        }
 
        /*
@@ -1444,10 +1455,14 @@ page_not_uptodate:
                goto success;
        }
 
-       if (!mapping->a_ops->readpage(file, page)) {
+       error = mapping->a_ops->readpage(file, page);
+       if (!error) {
                wait_on_page_locked(page);
                if (PageUptodate(page))
                        goto success;
+       } else if (error == AOP_TRUNCATED_PAGE) {
+               page_cache_release(page);
+               goto retry_find;
        }
 
        /*
@@ -1470,10 +1485,14 @@ page_not_uptodate:
        }
 
        ClearPageError(page);
-       if (!mapping->a_ops->readpage(file, page)) {
+       error = mapping->a_ops->readpage(file, page);
+       if (!error) {
                wait_on_page_locked(page);
                if (PageUptodate(page))
                        goto success;
+       } else if (error == AOP_TRUNCATED_PAGE) {
+               page_cache_release(page);
+               goto retry_find;
        }
 
        /*
@@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                status = a_ops->prepare_write(file, page, offset, offset+bytes);
                if (unlikely(status)) {
                        loff_t isize = i_size_read(inode);
+
+                       if (status != AOP_TRUNCATED_PAGE)
+                               unlock_page(page);
+                       page_cache_release(page);
+                       if (status == AOP_TRUNCATED_PAGE)
+                               continue;
                        /*
                         * prepare_write() may have instantiated a few blocks
                         * outside i_size.  Trim these off again.
                         */
-                       unlock_page(page);
-                       page_cache_release(page);
                        if (pos + bytes > isize)
                                vmtruncate(inode, isize);
                        break;
@@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                                                cur_iov, iov_base, bytes);
                flush_dcache_page(page);
                status = a_ops->commit_write(file, page, offset, offset+bytes);
+               if (status == AOP_TRUNCATED_PAGE) {
+                       page_cache_release(page);
+                       continue;
+               }
                if (likely(copied > 0)) {
                        if (!status)
                                status = copied;
index 72e7adb..8d6eeaa 100644 (file)
@@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 {
        unsigned page_idx;
        struct pagevec lru_pvec;
-       int ret = 0;
+       int ret;
 
        if (mapping->a_ops->readpages) {
                ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
@@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp,
                list_del(&page->lru);
                if (!add_to_page_cache(page, mapping,
                                        page->index, GFP_KERNEL)) {
-                       mapping->a_ops->readpage(filp, page);
-                       if (!pagevec_add(&lru_pvec, page))
-                               __pagevec_lru_add(&lru_pvec);
-               } else {
-                       page_cache_release(page);
+                       ret = mapping->a_ops->readpage(filp, page);
+                       if (ret != AOP_TRUNCATED_PAGE) {
+                               if (!pagevec_add(&lru_pvec, page))
+                                       __pagevec_lru_add(&lru_pvec);
+                               continue;
+                       } /* else fall through to release */
                }
+               page_cache_release(page);
        }
        pagevec_lru_add(&lru_pvec);
+       ret = 0;
 out:
        return ret;
 }
index dc25565..d9fc277 100644 (file)
@@ -855,7 +855,7 @@ unlock:
        swap_free(swap);
 redirty:
        set_page_dirty(page);
-       return WRITEPAGE_ACTIVATE;      /* Return with the page locked */
+       return AOP_WRITEPAGE_ACTIVATE;  /* Return with the page locked */
 }
 
 #ifdef CONFIG_NUMA
index b0cd81c..795a050 100644 (file)
@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
                res = mapping->a_ops->writepage(page, &wbc);
                if (res < 0)
                        handle_write_error(mapping, page, res);
-               if (res == WRITEPAGE_ACTIVATE) {
+               if (res == AOP_WRITEPAGE_ACTIVATE) {
                        ClearPageReclaim(page);
                        return PAGE_ACTIVATE;
                }