e1000e: Use the instance of net_device_stats from net_device.

[safe/jmp/linux-2.6] / mm / swapfile.c
diff --git a/mm/swapfile.c b/mm/swapfile.c

index 7632107..a1bc6b9 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -33,6 +33,7 @@
  #include <asm/pgtable.h>
  #include <asm/tlbflush.h>
  #include <linux/swapops.h>
+#include <linux/page_cgroup.h>
  
  static DEFINE_SPINLOCK(swap_lock);
  static unsigned int nr_swapfiles;
@@ -52,6 +53,59 @@ static struct swap_info_struct swap_info[MAX_SWAPFILES];
  
  static DEFINE_MUTEX(swapon_mutex);
  
+/* For reference count accounting in swap_map */
+/* enum for swap_map[] handling. internal use only */
+enum {
+       SWAP_MAP = 0,   /* ops for reference from swap users */
+       SWAP_CACHE,     /* ops for reference from swap cache */
+};
+
+static inline int swap_count(unsigned short ent)
+{
+       return ent & SWAP_COUNT_MASK;
+}
+
+static inline bool swap_has_cache(unsigned short ent)
+{
+       return !!(ent & SWAP_HAS_CACHE);
+}
+
+static inline unsigned short encode_swapmap(int count, bool has_cache)
+{
+       unsigned short ret = count;
+
+       if (has_cache)
+               return SWAP_HAS_CACHE | ret;
+       return ret;
+}
+
+/* returnes 1 if swap entry is freed */
+static int
+__try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
+{
+       int type = si - swap_info;
+       swp_entry_t entry = swp_entry(type, offset);
+       struct page *page;
+       int ret = 0;
+
+       page = find_get_page(&swapper_space, entry.val);
+       if (!page)
+               return 0;
+       /*
+        * This function is called from scan_swap_map() and it's called
+        * by vmscan.c at reclaiming pages. So, we hold a lock on a page, here.
+        * We have to use trylock for avoiding deadlock. This is a special
+        * case and you should use try_to_free_swap() with explicit lock_page()
+        * in usual operations.
+        */
+       if (trylock_page(page)) {
+               ret = try_to_free_swap(page);
+               unlock_page(page);
+       }
+       page_cache_release(page);
+       return ret;
+}
+
  /*
   * We need this because the bdev->unplug_fn can sleep and we cannot
   * hold swap_lock while calling the unplug_fn. And swap_lock
@@ -96,7 +150,7 @@ static int discard_swap(struct swap_info_struct *si)
  
         list_for_each_entry(se, &si->extent_list, list) {
                 sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
-               pgoff_t nr_blocks = se->nr_pages << (PAGE_SHIFT - 9);
+               sector_t nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
  
                 if (se->start_page == 0) {
                         /* Do not discard the swap header page! */
@@ -107,7 +161,8 @@ static int discard_swap(struct swap_info_struct *si)
                 }
  
                 err = blkdev_issue_discard(si->bdev, start_block,
-                                               nr_blocks, GFP_KERNEL);
+                                               nr_blocks, GFP_KERNEL,
+                                               DISCARD_FL_BARRIER);
                 if (err)
                         break;
  
@@ -133,7 +188,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
                     start_page < se->start_page + se->nr_pages) {
                         pgoff_t offset = start_page - se->start_page;
                         sector_t start_block = se->start_block + offset;
-                       pgoff_t nr_blocks = se->nr_pages - offset;
+                       sector_t nr_blocks = se->nr_pages - offset;
  
                         if (nr_blocks > nr_pages)
                                 nr_blocks = nr_pages;
@@ -146,7 +201,8 @@ static void discard_swap_cluster(struct swap_info_struct *si,
                         start_block <<= PAGE_SHIFT - 9;
                         nr_blocks <<= PAGE_SHIFT - 9;
                         if (blkdev_issue_discard(si->bdev, start_block,
-                                                       nr_blocks, GFP_NOIO))
+                                                       nr_blocks, GFP_NOIO,
+                                                       DISCARD_FL_BARRIER))
                                 break;
                 }
  
@@ -166,7 +222,8 @@ static int wait_for_discard(void *word)
  #define SWAPFILE_CLUSTER       256
  #define LATENCY_LIMIT          256
  
-static inline unsigned long scan_swap_map(struct swap_info_struct *si)
+static inline unsigned long scan_swap_map(struct swap_info_struct *si,
+                                         int cache)
  {
         unsigned long offset;
         unsigned long scan_base;
@@ -272,6 +329,19 @@ checks:
                 goto no_page;
         if (offset > si->highest_bit)
                 scan_base = offset = si->lowest_bit;
+
+       /* reuse swap entry of cache-only swap if not busy. */
+       if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+               int swap_was_freed;
+               spin_unlock(&swap_lock);
+               swap_was_freed = __try_to_reclaim_swap(si, offset);
+               spin_lock(&swap_lock);
+               /* entry was freed successfully, try to use this again */
+               if (swap_was_freed)
+                       goto checks;
+               goto scan; /* check next one */
+       }
+
         if (si->swap_map[offset])
                 goto scan;
  
@@ -284,7 +354,10 @@ checks:
                 si->lowest_bit = si->max;
                 si->highest_bit = 0;
         }
-       si->swap_map[offset] = 1;
+       if (cache == SWAP_CACHE) /* at usual swap-out via vmscan.c */
+               si->swap_map[offset] = encode_swapmap(0, true);
+       else /* at suspend */
+               si->swap_map[offset] = encode_swapmap(1, false);
         si->cluster_next = offset + 1;
         si->flags -= SWP_SCANNING;
  
@@ -350,6 +423,10 @@ scan:
                         spin_lock(&swap_lock);
                         goto checks;
                 }
+               if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+                       spin_lock(&swap_lock);
+                       goto checks;
+               }
                 if (unlikely(--latency_ration < 0)) {
                         cond_resched();
                         latency_ration = LATENCY_LIMIT;
@@ -361,6 +438,10 @@ scan:
                         spin_lock(&swap_lock);
                         goto checks;
                 }
+               if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+                       spin_lock(&swap_lock);
+                       goto checks;
+               }
                 if (unlikely(--latency_ration < 0)) {
                         cond_resched();
                         latency_ration = LATENCY_LIMIT;
@@ -400,7 +481,8 @@ swp_entry_t get_swap_page(void)
                         continue;
  
                 swap_list.next = next;
-               offset = scan_swap_map(si);
+               /* This is called for allocating swap entry for cache */
+               offset = scan_swap_map(si, SWAP_CACHE);
                 if (offset) {
                         spin_unlock(&swap_lock);
                         return swp_entry(type, offset);
@@ -414,6 +496,7 @@ noswap:
         return (swp_entry_t) {0};
  }
  
+/* The only caller of this function is now susupend routine */
  swp_entry_t get_swap_page_of_type(int type)
  {
         struct swap_info_struct *si;
@@ -423,7 +506,8 @@ swp_entry_t get_swap_page_of_type(int type)
         si = swap_info + type;
         if (si->flags & SWP_WRITEOK) {
                 nr_swap_pages--;
-               offset = scan_swap_map(si);
+               /* This is called for allocating swap entry, not cache */
+               offset = scan_swap_map(si, SWAP_MAP);
                 if (offset) {
                         spin_unlock(&swap_lock);
                         return swp_entry(type, offset);
@@ -470,24 +554,40 @@ out:
         return NULL;
  }
  
-static int swap_entry_free(struct swap_info_struct *p, unsigned long offset)
+static int swap_entry_free(struct swap_info_struct *p,
+                          swp_entry_t ent, int cache)
  {
-       int count = p->swap_map[offset];
-
-       if (count < SWAP_MAP_MAX) {
-               count--;
-               p->swap_map[offset] = count;
-               if (!count) {
-                       if (offset < p->lowest_bit)
-                               p->lowest_bit = offset;
-                       if (offset > p->highest_bit)
-                               p->highest_bit = offset;
-                       if (p->prio > swap_info[swap_list.next].prio)
-                               swap_list.next = p - swap_info;
-                       nr_swap_pages++;
-                       p->inuse_pages--;
+       unsigned long offset = swp_offset(ent);
+       int count = swap_count(p->swap_map[offset]);
+       bool has_cache;
+
+       has_cache = swap_has_cache(p->swap_map[offset]);
+
+       if (cache == SWAP_MAP) { /* dropping usage count of swap */
+               if (count < SWAP_MAP_MAX) {
+                       count--;
+                       p->swap_map[offset] = encode_swapmap(count, has_cache);
                 }
+       } else { /* dropping swap cache flag */
+               VM_BUG_ON(!has_cache);
+               p->swap_map[offset] = encode_swapmap(count, false);
+
+       }
+       /* return code. */
+       count = p->swap_map[offset];
+       /* free if no reference */
+       if (!count) {
+               if (offset < p->lowest_bit)
+                       p->lowest_bit = offset;
+               if (offset > p->highest_bit)
+                       p->highest_bit = offset;
+               if (p->prio > swap_info[swap_list.next].prio)
+                       swap_list.next = p - swap_info;
+               nr_swap_pages++;
+               p->inuse_pages--;
         }
+       if (!swap_count(count))
+               mem_cgroup_uncharge_swap(ent);
         return count;
  }
  
@@ -501,12 +601,36 @@ void swap_free(swp_entry_t entry)
  
         p = swap_info_get(entry);
         if (p) {
-               swap_entry_free(p, swp_offset(entry));
+               swap_entry_free(p, entry, SWAP_MAP);
                 spin_unlock(&swap_lock);
         }
  }
  
  /*
+ * Called after dropping swapcache to decrease refcnt to swap entries.
+ */
+void swapcache_free(swp_entry_t entry, struct page *page)
+{
+       struct swap_info_struct *p;
+       int ret;
+
+       p = swap_info_get(entry);
+       if (p) {
+               ret = swap_entry_free(p, entry, SWAP_CACHE);
+               if (page) {
+                       bool swapout;
+                       if (ret)
+                               swapout = true; /* the end of swap out */
+                       else
+                               swapout = false; /* no more swap users! */
+                       mem_cgroup_uncharge_swapcache(page, entry, swapout);
+               }
+               spin_unlock(&swap_lock);
+       }
+       return;
+}
+
+/*
   * How many references to page are currently swapped out?
   */
  static inline int page_swapcount(struct page *page)
@@ -518,8 +642,7 @@ static inline int page_swapcount(struct page *page)
         entry.val = page_private(page);
         p = swap_info_get(entry);
         if (p) {
-               /* Subtract the 1 for the swap cache itself */
-               count = p->swap_map[swp_offset(entry)] - 1;
+               count = swap_count(p->swap_map[swp_offset(entry)]);
                 spin_unlock(&swap_lock);
         }
         return count;
@@ -571,17 +694,17 @@ int try_to_free_swap(struct page *page)
   * Free the swap entry like above, but also try to
   * free the page cache entry if it is the last user.
   */
-void free_swap_and_cache(swp_entry_t entry)
+int free_swap_and_cache(swp_entry_t entry)
  {
-       struct swap_info_struct * p;
+       struct swap_info_struct *p;
         struct page *page = NULL;
  
-       if (is_migration_entry(entry))
-               return;
+       if (non_swap_entry(entry))
+               return 1;
  
         p = swap_info_get(entry);
         if (p) {
-               if (swap_entry_free(p, swp_offset(entry)) == 1) {
+               if (swap_entry_free(p, entry, SWAP_MAP) == SWAP_HAS_CACHE) {
                         page = find_get_page(&swapper_space, entry.val);
                         if (page && !trylock_page(page)) {
                                 page_cache_release(page);
@@ -603,6 +726,7 @@ void free_swap_and_cache(swp_entry_t entry)
                 unlock_page(page);
                 page_cache_release(page);
         }
+       return p != NULL;
  }
  
  #ifdef CONFIG_HIBERNATION
@@ -631,7 +755,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
  
                 if (!bdev) {
                         if (bdev_p)
-                               *bdev_p = sis->bdev;
+                               *bdev_p = bdgrab(sis->bdev);
  
                         spin_unlock(&swap_lock);
                         return i;
@@ -643,7 +767,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
                                         struct swap_extent, list);
                         if (se->start_block == offset) {
                                 if (bdev_p)
-                                       *bdev_p = sis->bdev;
+                                       *bdev_p = bdgrab(sis->bdev);
  
                                 spin_unlock(&swap_lock);
                                 bdput(bdev);
@@ -689,17 +813,20 @@ unsigned int count_swap_pages(int type, int free)
  static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
                 unsigned long addr, swp_entry_t entry, struct page *page)
  {
+       struct mem_cgroup *ptr = NULL;
         spinlock_t *ptl;
         pte_t *pte;
         int ret = 1;
  
-       if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
+       if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
                 ret = -ENOMEM;
+               goto out_nolock;
+       }
  
         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
         if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
                 if (ret > 0)
-                       mem_cgroup_uncharge_page(page);
+                       mem_cgroup_cancel_charge_swapin(ptr);
                 ret = 0;
                 goto out;
         }
@@ -709,6 +836,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
         set_pte_at(vma->vm_mm, addr, pte,
                    pte_mkold(mk_pte(page, vma->vm_page_prot)));
         page_add_anon_rmap(page, vma, addr);
+       mem_cgroup_commit_charge_swapin(page, ptr);
         swap_free(entry);
         /*
          * Move the page to the active list so it is not
@@ -717,6 +845,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
         activate_page(page);
  out:
         pte_unmap_unlock(pte, ptl);
+out_nolock:
         return ret;
  }
  
@@ -882,7 +1011,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
                         i = 1;
                 }
                 count = si->swap_map[i];
-               if (count && count != SWAP_MAP_BAD)
+               if (count && swap_count(count) != SWAP_MAP_BAD)
                         break;
         }
         return i;
@@ -986,13 +1115,13 @@ static int try_to_unuse(unsigned int type)
                  */
                 shmem = 0;
                 swcount = *swap_map;
-               if (swcount > 1) {
+               if (swap_count(swcount)) {
                         if (start_mm == &init_mm)
                                 shmem = shmem_unuse(entry, page);
                         else
                                 retval = unuse_mm(start_mm, entry, page);
                 }
-               if (*swap_map > 1) {
+               if (swap_count(*swap_map)) {
                         int set_start_mm = (*swap_map >= swcount);
                         struct list_head *p = &start_mm->mmlist;
                         struct mm_struct *new_start_mm = start_mm;
@@ -1002,7 +1131,7 @@ static int try_to_unuse(unsigned int type)
                         atomic_inc(&new_start_mm->mm_users);
                         atomic_inc(&prev_mm->mm_users);
                         spin_lock(&mmlist_lock);
-                       while (*swap_map > 1 && !retval && !shmem &&
+                       while (swap_count(*swap_map) && !retval && !shmem &&
                                         (p = p->next) != &start_mm->mmlist) {
                                 mm = list_entry(p, struct mm_struct, mmlist);
                                 if (!atomic_inc_not_zero(&mm->mm_users))
@@ -1014,14 +1143,16 @@ static int try_to_unuse(unsigned int type)
                                 cond_resched();
  
                                 swcount = *swap_map;
-                               if (swcount <= 1)
+                               if (!swap_count(swcount)) /* any usage ? */
                                         ;
                                 else if (mm == &init_mm) {
                                         set_start_mm = 1;
                                         shmem = shmem_unuse(entry, page);
                                 } else
                                         retval = unuse_mm(mm, entry, page);
-                               if (set_start_mm && *swap_map < swcount) {
+
+                               if (set_start_mm &&
+                                   swap_count(*swap_map) < swcount) {
                                         mmput(new_start_mm);
                                         atomic_inc(&mm->mm_users);
                                         new_start_mm = mm;
@@ -1048,21 +1179,25 @@ static int try_to_unuse(unsigned int type)
                 }
  
                 /*
-                * How could swap count reach 0x7fff when the maximum
-                * pid is 0x7fff, and there's no way to repeat a swap
-                * page within an mm (except in shmem, where it's the
-                * shared object which takes the reference count)?
-                * We believe SWAP_MAP_MAX cannot occur in Linux 2.4.
-                *
+                * How could swap count reach 0x7ffe ?
+                * There's no way to repeat a swap page within an mm
+                * (except in shmem, where it's the shared object which takes
+                * the reference count)?
+                * We believe SWAP_MAP_MAX cannot occur.(if occur, unsigned
+                * short is too small....)
                  * If that's wrong, then we should worry more about
                  * exit_mmap() and do_munmap() cases described above:
                  * we might be resetting SWAP_MAP_MAX too early here.
                  * We know "Undead"s can happen, they're okay, so don't
                  * report them; but do report if we reset SWAP_MAP_MAX.
                  */
-               if (*swap_map == SWAP_MAP_MAX) {
+               /* We might release the lock_page() in unuse_mm(). */
+               if (!PageSwapCache(page) || page_private(page) != entry.val)
+                       goto retry;
+
+               if (swap_count(*swap_map) == SWAP_MAP_MAX) {
                         spin_lock(&swap_lock);
-                       *swap_map = 1;
+                       *swap_map = encode_swapmap(0, true);
                         spin_unlock(&swap_lock);
                         reset_overflow = 1;
                 }
@@ -1080,7 +1215,8 @@ static int try_to_unuse(unsigned int type)
                  * pages would be incorrect if swap supported "shared
                  * private" pages, but they are handled by tmpfs files.
                  */
-               if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
+               if (swap_count(*swap_map) &&
+                    PageDirty(page) && PageSwapCache(page)) {
                         struct writeback_control wbc = {
                                 .sync_mode = WB_SYNC_NONE,
                         };
@@ -1107,6 +1243,7 @@ static int try_to_unuse(unsigned int type)
                  * mark page dirty so shrink_page_list will preserve it.
                  */
                 SetPageDirty(page);
+retry:
                 unlock_page(page);
                 page_cache_release(page);
  
@@ -1371,27 +1508,7 @@ out:
         return ret;
  }
  
-#if 0  /* We don't need this yet */
-#include <linux/backing-dev.h>
-int page_queue_congested(struct page *page)
-{
-       struct backing_dev_info *bdi;
-
-       VM_BUG_ON(!PageLocked(page));   /* It pins the swap_info_struct */
-
-       if (PageSwapCache(page)) {
-               swp_entry_t entry = { .val = page_private(page) };
-               struct swap_info_struct *sis;
-
-               sis = get_swap_info_struct(swp_type(entry));
-               bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
-       } else
-               bdi = page->mapping->backing_dev_info;
-       return bdi_write_congested(bdi);
-}
-#endif
-
-asmlinkage long sys_swapoff(const char __user * specialfile)
+SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
  {
         struct swap_info_struct * p = NULL;
         unsigned short *swap_map;
@@ -1458,9 +1575,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
         p->flags &= ~SWP_WRITEOK;
         spin_unlock(&swap_lock);
  
-       current->flags |= PF_SWAPOFF;
+       current->flags |= PF_OOM_ORIGIN;
         err = try_to_unuse(type);
-       current->flags &= ~PF_SWAPOFF;
+       current->flags &= ~PF_OOM_ORIGIN;
  
         if (err) {
                 /* re-insert swap space back into swap_list */
@@ -1511,6 +1628,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
         spin_unlock(&swap_lock);
         mutex_unlock(&swapon_mutex);
         vfree(swap_map);
+       /* Destroy swap account informatin */
+       swap_cgroup_swapoff(type);
+
         inode = mapping->host;
         if (S_ISBLK(inode->i_mode)) {
                 struct block_device *bdev = I_BDEV(inode);
@@ -1644,7 +1764,7 @@ late_initcall(max_swapfiles_check);
   *
   * The swapon system call
   */
-asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
+SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
  {
         struct swap_info_struct * p;
         char *name = NULL;
@@ -1828,6 +1948,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                 }
                 swap_map[page_nr] = SWAP_MAP_BAD;
         }
+
+       error = swap_cgroup_swapon(type, maxpages);
+       if (error)
+               goto bad_swap;
+
         nr_good_pages = swap_header->info.last_page -
                         swap_header->info.nr_badpages -
                         1 /* header page */;
@@ -1849,13 +1974,14 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                 goto bad_swap;
         }
  
-       if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
-               p->flags |= SWP_SOLIDSTATE;
-               srandom32((u32)get_seconds());
-               p->cluster_next = 1 + (random32() % p->highest_bit);
+       if (p->bdev) {
+               if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+                       p->flags |= SWP_SOLIDSTATE;
+                       p->cluster_next = 1 + (random32() % p->highest_bit);
+               }
+               if (discard_swap(p) == 0)
+                       p->flags |= SWP_DISCARDABLE;
         }
-       if (discard_swap(p) == 0)
-               p->flags |= SWP_DISCARDABLE;
  
         mutex_lock(&swapon_mutex);
         spin_lock(&swap_lock);
@@ -1900,6 +2026,7 @@ bad_swap:
                 bd_release(bdev);
         }
         destroy_swap_extents(p);
+       swap_cgroup_swapoff(type);
  bad_swap_2:
         spin_lock(&swap_lock);
         p->swap_file = NULL;
@@ -1945,15 +2072,23 @@ void si_swapinfo(struct sysinfo *val)
   *
   * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
   * "permanent", but will be reclaimed by the next swapoff.
+ * Returns error code in following case.
+ * - success -> 0
+ * - swp_entry is invalid -> EINVAL
+ * - swp_entry is migration entry -> EINVAL
+ * - swap-cache reference is requested but there is already one. -> EEXIST
+ * - swap-cache reference is requested but the entry is not used. -> ENOENT
   */
-int swap_duplicate(swp_entry_t entry)
+static int __swap_duplicate(swp_entry_t entry, bool cache)
  {
         struct swap_info_struct * p;
         unsigned long offset, type;
-       int result = 0;
+       int result = -EINVAL;
+       int count;
+       bool has_cache;
  
-       if (is_migration_entry(entry))
-               return 1;
+       if (non_swap_entry(entry))
+               return -EINVAL;
  
         type = swp_type(entry);
         if (type >= nr_swapfiles)
@@ -1962,17 +2097,40 @@ int swap_duplicate(swp_entry_t entry)
         offset = swp_offset(entry);
  
         spin_lock(&swap_lock);
-       if (offset < p->max && p->swap_map[offset]) {
-               if (p->swap_map[offset] < SWAP_MAP_MAX - 1) {
-                       p->swap_map[offset]++;
-                       result = 1;
-               } else if (p->swap_map[offset] <= SWAP_MAP_MAX) {
+
+       if (unlikely(offset >= p->max))
+               goto unlock_out;
+
+       count = swap_count(p->swap_map[offset]);
+       has_cache = swap_has_cache(p->swap_map[offset]);
+
+       if (cache == SWAP_CACHE) { /* called for swapcache/swapin-readahead */
+
+               /* set SWAP_HAS_CACHE if there is no cache and entry is used */
+               if (!has_cache && count) {
+                       p->swap_map[offset] = encode_swapmap(count, true);
+                       result = 0;
+               } else if (has_cache) /* someone added cache */
+                       result = -EEXIST;
+               else if (!count) /* no users */
+                       result = -ENOENT;
+
+       } else if (count || has_cache) {
+               if (count < SWAP_MAP_MAX - 1) {
+                       p->swap_map[offset] = encode_swapmap(count + 1,
+                                                            has_cache);
+                       result = 0;
+               } else if (count <= SWAP_MAP_MAX) {
                         if (swap_overflow++ < 5)
-                               printk(KERN_WARNING "swap_dup: swap entry overflow\n");
-                       p->swap_map[offset] = SWAP_MAP_MAX;
-                       result = 1;
+                               printk(KERN_WARNING
+                                      "swap_dup: swap entry overflow\n");
+                       p->swap_map[offset] = encode_swapmap(SWAP_MAP_MAX,
+                                                             has_cache);
+                       result = 0;
                 }
-       }
+       } else
+               result = -ENOENT; /* unused swap entry */
+unlock_out:
         spin_unlock(&swap_lock);
  out:
         return result;
@@ -1981,6 +2139,27 @@ bad_file:
         printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
         goto out;
  }
+/*
+ * increase reference count of swap entry by 1.
+ */
+void swap_duplicate(swp_entry_t entry)
+{
+       __swap_duplicate(entry, SWAP_MAP);
+}
+
+/*
+ * @entry: swap entry for which we allocate swap cache.
+ *
+ * Called when allocating swap cache for exising swap entry,
+ * This can return error codes. Returns 0 at success.
+ * -EBUSY means there is a swap cache.
+ * Note: return code is different from swap_duplicate().
+ */
+int swapcache_prepare(swp_entry_t entry)
+{
+       return __swap_duplicate(entry, SWAP_CACHE);
+}
+
  
  struct swap_info_struct *
  get_swap_info_struct(unsigned type)
@@ -2019,7 +2198,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
                 /* Don't read in free or bad pages */
                 if (!si->swap_map[toff])
                         break;
-               if (si->swap_map[toff] == SWAP_MAP_BAD)
+               if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
                         break;
         }
         /* Count contiguous allocated slots below our target */
@@ -2027,7 +2206,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
                 /* Don't read in free or bad pages */
                 if (!si->swap_map[toff])
                         break;
-               if (si->swap_map[toff] == SWAP_MAP_BAD)
+               if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
                         break;
         }
         spin_unlock(&swap_lock);