move page writeback acounting out of macros
[safe/jmp/linux-2.6] / include / linux / page-flags.h
index f34767c..209d3a4 100644 (file)
@@ -5,9 +5,8 @@
 #ifndef PAGE_FLAGS_H
 #define PAGE_FLAGS_H
 
-#include <linux/percpu.h>
-#include <linux/cache.h>
-#include <asm/pgtable.h>
+#include <linux/types.h>
+#include <linux/mm_types.h>
 
 /*
  * Various page->flags bits:
  * PG_reserved is set for special pages, which can never be swapped out. Some
  * of them might not even exist (eg empty_bad_page)...
  *
- * The PG_private bitflag is set if page->private contains a valid value.
+ * The PG_private bitflag is set on pagecache pages if they contain filesystem
+ * specific data (which is normally at page->private). It can be used by
+ * private allocations for its own usage.
  *
- * During disk I/O, PG_locked is used. This bit is set before I/O and
- * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
- * waiting for the I/O on this page to complete.
+ * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
+ * and cleared when writeback _starts_ or when read _completes_. PG_writeback
+ * is set before writeback starts and cleared when it finishes.
+ *
+ * PG_locked also pins a page in pagecache, and blocks truncation of the file
+ * while it is held.
+ *
+ * page_waitqueue(page) is a wait queue of all tasks waiting for the page
+ * to become unlocked.
  *
  * PG_uptodate tells whether the page's contents is valid.  When a read
  * completes, the page becomes uptodate, unless a disk I/O error happened.
  *
- * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
- * which is set any time the system accesses that page through the (mapping,
- * index) hash table.  This referenced bit, together with the referenced bit
- * in the page tables, is used to manipulate page->age and move the page across
- * the active, inactive_dirty and inactive_clean lists.
- *
- * Note that the referenced bit, the page->lru list_head and the active,
- * inactive_dirty and inactive_clean lists are protected by the
- * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
+ * file-backed pagecache (see mm/vmscan.c).
  *
  * PG_error is set to indicate that an I/O error occurred on this page.
  *
  * space, they need to be kmapped separately for doing IO on the pages.  The
  * struct page (these bits with information) are always mapped into kernel
  * address space...
+ *
+ * PG_buddy is set to indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
  */
 
 /*
  * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
- * locked- and dirty-page accounting.  The top eight bits of page->flags are
- * used for page->zone, so putting flag bits there doesn't work.
+ * locked- and dirty-page accounting.
+ *
+ * The page flags field is split into two parts, the main flags area
+ * which extends from the low bits upwards, and the fields area which
+ * extends from the high bits downwards.
+ *
+ *  | FIELD | ... | FLAGS |
+ *  N-1     ^             0
+ *          (N-FLAGS_RESERVED)
+ *
+ * The fields area is reserved for fields mapping zone, node and SPARSEMEM
+ * section.  The boundry between these two areas is defined by
+ * FLAGS_RESERVED which defines the width of the fields section
+ * (see linux/mmzone.h).  New flags must _not_ overlap with this area.
  */
 #define PG_locked               0      /* Page is locked. Don't touch. */
 #define PG_error                1
 #define PG_active               6
 #define PG_slab                         7      /* slab debug (Suparna wants this) */
 
-#define PG_checked              8      /* kill me in 2.5.<early>. */
+#define PG_owner_priv_1                 8      /* Owner use. If pagecache, fs may use*/
 #define PG_arch_1               9
 #define PG_reserved            10
-#define PG_private             11      /* Has something at ->private */
+#define PG_private             11      /* If pagecache, has fs-private data */
 
 #define PG_writeback           12      /* Page is under writeback */
-#define PG_nosave              13      /* Used for system suspend/resume */
 #define PG_compound            14      /* Part of a compound page */
 #define PG_swapcache           15      /* Swap page: swp_entry_t in private */
 
 #define PG_mappedtodisk                16      /* Has blocks allocated on-disk */
 #define PG_reclaim             17      /* To be reclaimed asap */
-#define PG_nosave_free         18      /* Free, should not be written */
-#define PG_uncached            19      /* Page has been mapped as uncached */
+#define PG_buddy               19      /* Page is free, on buddy lists */
+
+/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
+#define PG_readahead           PG_reclaim /* Reminder to do async read-ahead */
 
+/* PG_owner_priv_1 users should have descriptive aliases */
+#define PG_checked             PG_owner_priv_1 /* Used by some filesystems */
+#define PG_pinned              PG_owner_priv_1 /* Xen pinned pagetable */
+
+#if (BITS_PER_LONG > 32)
 /*
- * Global page accounting.  One instance per CPU.  Only unsigned longs are
- * allowed.
+ * 64-bit-only flags build down from bit 31
+ *
+ * 32 bit  -------------------------------| FIELDS |       FLAGS         |
+ * 64 bit  |           FIELDS             | ??????         FLAGS         |
+ *         63                            32                              0
  */
-struct page_state {
-       unsigned long nr_dirty;         /* Dirty writeable pages */
-       unsigned long nr_writeback;     /* Pages under writeback */
-       unsigned long nr_unstable;      /* NFS unstable pages */
-       unsigned long nr_page_table_pages;/* Pages used for pagetables */
-       unsigned long nr_mapped;        /* mapped into pagetables */
-       unsigned long nr_slab;          /* In slab */
-#define GET_PAGE_STATE_LAST nr_slab
-
-       /*
-        * The below are zeroed by get_page_state().  Use get_full_page_state()
-        * to add up all these.
-        */
-       unsigned long pgpgin;           /* Disk reads */
-       unsigned long pgpgout;          /* Disk writes */
-       unsigned long pswpin;           /* swap reads */
-       unsigned long pswpout;          /* swap writes */
-       unsigned long pgalloc_high;     /* page allocations */
-
-       unsigned long pgalloc_normal;
-       unsigned long pgalloc_dma;
-       unsigned long pgfree;           /* page freeings */
-       unsigned long pgactivate;       /* pages moved inactive->active */
-       unsigned long pgdeactivate;     /* pages moved active->inactive */
-
-       unsigned long pgfault;          /* faults (major+minor) */
-       unsigned long pgmajfault;       /* faults (major only) */
-       unsigned long pgrefill_high;    /* inspected in refill_inactive_zone */
-       unsigned long pgrefill_normal;
-       unsigned long pgrefill_dma;
-
-       unsigned long pgsteal_high;     /* total highmem pages reclaimed */
-       unsigned long pgsteal_normal;
-       unsigned long pgsteal_dma;
-       unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
-       unsigned long pgscan_kswapd_normal;
-
-       unsigned long pgscan_kswapd_dma;
-       unsigned long pgscan_direct_high;/* total highmem pages scanned */
-       unsigned long pgscan_direct_normal;
-       unsigned long pgscan_direct_dma;
-       unsigned long pginodesteal;     /* pages reclaimed via inode freeing */
-
-       unsigned long slabs_scanned;    /* slab objects scanned */
-       unsigned long kswapd_steal;     /* pages reclaimed by kswapd */
-       unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
-       unsigned long pageoutrun;       /* kswapd's calls to page reclaim */
-       unsigned long allocstall;       /* direct reclaim calls */
-
-       unsigned long pgrotated;        /* pages rotated to tail of the LRU */
-       unsigned long nr_bounce;        /* pages for bounce buffers */
-};
-
-extern void get_page_state(struct page_state *ret);
-extern void get_page_state_node(struct page_state *ret, int node);
-extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned long offset);
-extern void __mod_page_state(unsigned long offset, unsigned long delta);
-
-#define read_page_state(member) \
-       __read_page_state(offsetof(struct page_state, member))
-
-#define mod_page_state(member, delta)  \
-       __mod_page_state(offsetof(struct page_state, member), (delta))
-
-#define inc_page_state(member) mod_page_state(member, 1UL)
-#define dec_page_state(member) mod_page_state(member, 0UL - 1)
-#define add_page_state(member,delta) mod_page_state(member, (delta))
-#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
-
-#define mod_page_state_zone(zone, member, delta)                               \
-       do {                                                                    \
-               unsigned offset;                                                \
-               if (is_highmem(zone))                                           \
-                       offset = offsetof(struct page_state, member##_high);    \
-               else if (is_normal(zone))                                       \
-                       offset = offsetof(struct page_state, member##_normal);  \
-               else                                                            \
-                       offset = offsetof(struct page_state, member##_dma);     \
-               __mod_page_state(offset, (delta));                              \
-       } while (0)
+#define PG_uncached            31      /* Page has been mapped as uncached */
+#endif
 
 /*
  * Manipulation of page state flags
@@ -186,7 +132,13 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta);
 #define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
 
 #define PageUptodate(page)     test_bit(PG_uptodate, &(page)->flags)
-#ifndef SetPageUptodate
+#ifdef CONFIG_S390
+static inline void SetPageUptodate(struct page *page)
+{
+       if (!test_and_set_bit(PG_uptodate, &page->flags))
+               page_clear_dirty(page);
+}
+#else
 #define SetPageUptodate(page)  set_bit(PG_uptodate, &(page)->flags)
 #endif
 #define ClearPageUptodate(page)        clear_bit(PG_uptodate, &(page)->flags)
@@ -198,22 +150,19 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta);
 #define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags)
 #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags)
 
-#define SetPageLRU(page)       set_bit(PG_lru, &(page)->flags)
 #define PageLRU(page)          test_bit(PG_lru, &(page)->flags)
-#define TestSetPageLRU(page)   test_and_set_bit(PG_lru, &(page)->flags)
-#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags)
+#define SetPageLRU(page)       set_bit(PG_lru, &(page)->flags)
+#define ClearPageLRU(page)     clear_bit(PG_lru, &(page)->flags)
+#define __ClearPageLRU(page)   __clear_bit(PG_lru, &(page)->flags)
 
 #define PageActive(page)       test_bit(PG_active, &(page)->flags)
 #define SetPageActive(page)    set_bit(PG_active, &(page)->flags)
 #define ClearPageActive(page)  clear_bit(PG_active, &(page)->flags)
-#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags)
-#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags)
+#define __ClearPageActive(page)        __clear_bit(PG_active, &(page)->flags)
 
 #define PageSlab(page)         test_bit(PG_slab, &(page)->flags)
-#define SetPageSlab(page)      set_bit(PG_slab, &(page)->flags)
-#define ClearPageSlab(page)    clear_bit(PG_slab, &(page)->flags)
-#define TestClearPageSlab(page)        test_and_clear_bit(PG_slab, &(page)->flags)
-#define TestSetPageSlab(page)  test_and_set_bit(PG_slab, &(page)->flags)
+#define __SetPageSlab(page)    __set_bit(PG_slab, &(page)->flags)
+#define __ClearPageSlab(page)  __clear_bit(PG_slab, &(page)->flags)
 
 #ifdef CONFIG_HIGHMEM
 #define PageHighMem(page)      is_highmem(page_zone(page))
@@ -225,6 +174,10 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta);
 #define SetPageChecked(page)   set_bit(PG_checked, &(page)->flags)
 #define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags)
 
+#define PagePinned(page)       test_bit(PG_pinned, &(page)->flags)
+#define SetPagePinned(page)    set_bit(PG_pinned, &(page)->flags)
+#define ClearPagePinned(page)  clear_bit(PG_pinned, &(page)->flags)
+
 #define PageReserved(page)     test_bit(PG_reserved, &(page)->flags)
 #define SetPageReserved(page)  set_bit(PG_reserved, &(page)->flags)
 #define ClearPageReserved(page)        clear_bit(PG_reserved, &(page)->flags)
@@ -236,64 +189,64 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta);
 #define __SetPagePrivate(page)  __set_bit(PG_private, &(page)->flags)
 #define __ClearPagePrivate(page) __clear_bit(PG_private, &(page)->flags)
 
+/*
+ * Only test-and-set exist for PG_writeback.  The unconditional operators are
+ * risky: they bypass page accounting.
+ */
 #define PageWriteback(page)    test_bit(PG_writeback, &(page)->flags)
-#define SetPageWriteback(page)                                         \
-       do {                                                            \
-               if (!test_and_set_bit(PG_writeback,                     \
-                               &(page)->flags))                        \
-                       inc_page_state(nr_writeback);                   \
-       } while (0)
-#define TestSetPageWriteback(page)                                     \
-       ({                                                              \
-               int ret;                                                \
-               ret = test_and_set_bit(PG_writeback,                    \
-                                       &(page)->flags);                \
-               if (!ret)                                               \
-                       inc_page_state(nr_writeback);                   \
-               ret;                                                    \
-       })
-#define ClearPageWriteback(page)                                       \
-       do {                                                            \
-               if (test_and_clear_bit(PG_writeback,                    \
-                               &(page)->flags))                        \
-                       dec_page_state(nr_writeback);                   \
-       } while (0)
-#define TestClearPageWriteback(page)                                   \
-       ({                                                              \
-               int ret;                                                \
-               ret = test_and_clear_bit(PG_writeback,                  \
-                               &(page)->flags);                        \
-               if (ret)                                                \
-                       dec_page_state(nr_writeback);                   \
-               ret;                                                    \
-       })
-
-#define PageNosave(page)       test_bit(PG_nosave, &(page)->flags)
-#define SetPageNosave(page)    set_bit(PG_nosave, &(page)->flags)
-#define TestSetPageNosave(page)        test_and_set_bit(PG_nosave, &(page)->flags)
-#define ClearPageNosave(page)          clear_bit(PG_nosave, &(page)->flags)
-#define TestClearPageNosave(page)      test_and_clear_bit(PG_nosave, &(page)->flags)
-
-#define PageNosaveFree(page)   test_bit(PG_nosave_free, &(page)->flags)
-#define SetPageNosaveFree(page)        set_bit(PG_nosave_free, &(page)->flags)
-#define ClearPageNosaveFree(page)              clear_bit(PG_nosave_free, &(page)->flags)
+#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback,      \
+                                                       &(page)->flags)
+#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback,  \
+                                                       &(page)->flags)
+
+#define PageBuddy(page)                test_bit(PG_buddy, &(page)->flags)
+#define __SetPageBuddy(page)   __set_bit(PG_buddy, &(page)->flags)
+#define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags)
 
 #define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
 #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
 #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
 
+#define PageReadahead(page)    test_bit(PG_readahead, &(page)->flags)
+#define SetPageReadahead(page) set_bit(PG_readahead, &(page)->flags)
+#define ClearPageReadahead(page) clear_bit(PG_readahead, &(page)->flags)
+
 #define PageReclaim(page)      test_bit(PG_reclaim, &(page)->flags)
 #define SetPageReclaim(page)   set_bit(PG_reclaim, &(page)->flags)
 #define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags)
 #define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
 
-#ifdef CONFIG_HUGETLB_PAGE
 #define PageCompound(page)     test_bit(PG_compound, &(page)->flags)
-#else
-#define PageCompound(page)     0
-#endif
-#define SetPageCompound(page)  set_bit(PG_compound, &(page)->flags)
-#define ClearPageCompound(page)        clear_bit(PG_compound, &(page)->flags)
+#define __SetPageCompound(page)        __set_bit(PG_compound, &(page)->flags)
+#define __ClearPageCompound(page) __clear_bit(PG_compound, &(page)->flags)
+
+/*
+ * PG_reclaim is used in combination with PG_compound to mark the
+ * head and tail of a compound page
+ *
+ * PG_compound & PG_reclaim    => Tail page
+ * PG_compound & ~PG_reclaim   => Head page
+ */
+
+#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
+
+#define PageTail(page) ((page->flags & PG_head_tail_mask) \
+                               == PG_head_tail_mask)
+
+static inline void __SetPageTail(struct page *page)
+{
+       page->flags |= PG_head_tail_mask;
+}
+
+static inline void __ClearPageTail(struct page *page)
+{
+       page->flags &= ~PG_head_tail_mask;
+}
+
+#define PageHead(page) ((page->flags & PG_head_tail_mask) \
+                               == (1L << PG_compound))
+#define __SetPageHead(page)    __SetPageCompound(page)
+#define __ClearPageHead(page)  __ClearPageCompound(page)
 
 #ifdef CONFIG_SWAP
 #define PageSwapCache(page)    test_bit(PG_swapcache, &(page)->flags)
@@ -309,15 +262,11 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta);
 
 struct page;   /* forward declaration */
 
-int test_clear_page_dirty(struct page *page);
+extern void cancel_dirty_page(struct page *page, unsigned int account_size);
+
 int test_clear_page_writeback(struct page *page);
 int test_set_page_writeback(struct page *page);
 
-static inline void clear_page_dirty(struct page *page)
-{
-       test_clear_page_dirty(page);
-}
-
 static inline void set_page_writeback(struct page *page)
 {
        test_set_page_writeback(page);