intelfb delousing
[safe/jmp/linux-2.6] / mm / swapfile.c
index 6da4b28..cc367f7 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/rmap.h>
 #include <linux/security.h>
 #include <linux/backing-dev.h>
+#include <linux/mutex.h>
+#include <linux/capability.h>
 #include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
@@ -43,14 +45,14 @@ static const char Unused_offset[] = "Unused swap offset entry ";
 
 struct swap_list_t swap_list = {-1, -1};
 
-struct swap_info_struct swap_info[MAX_SWAPFILES];
+static struct swap_info_struct swap_info[MAX_SWAPFILES];
 
-static DECLARE_MUTEX(swapon_sem);
+static DEFINE_MUTEX(swapon_mutex);
 
 /*
  * We need this because the bdev->unplug_fn can sleep and we cannot
  * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a semaphore.
+ * cannot be turned into a mutex.
  */
 static DECLARE_RWSEM(swap_unplug_sem);
 
@@ -114,7 +116,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
                                last_in_cluster = offset + SWAPFILE_CLUSTER;
                        else if (offset == last_in_cluster) {
                                spin_lock(&swap_lock);
-                               si->cluster_next = offset-SWAPFILE_CLUSTER-1;
+                               si->cluster_next = offset-SWAPFILE_CLUSTER+1;
                                goto cluster;
                        }
                        if (unlikely(--latency_ration < 0)) {
@@ -393,20 +395,29 @@ void free_swap_and_cache(swp_entry_t entry)
        struct swap_info_struct * p;
        struct page *page = NULL;
 
+       if (is_migration_entry(entry))
+               return;
+
        p = swap_info_get(entry);
        if (p) {
-               if (swap_entry_free(p, swp_offset(entry)) == 1)
-                       page = find_trylock_page(&swapper_space, entry.val);
+               if (swap_entry_free(p, swp_offset(entry)) == 1) {
+                       page = find_get_page(&swapper_space, entry.val);
+                       if (page && unlikely(TestSetPageLocked(page))) {
+                               page_cache_release(page);
+                               page = NULL;
+                       }
+               }
                spin_unlock(&swap_lock);
        }
        if (page) {
                int one_user;
 
                BUG_ON(PagePrivate(page));
-               page_cache_get(page);
                one_user = (page_count(page) == 2);
                /* Only cache user (+us), or swap space full? Free it! */
-               if (!PageWriteback(page) && (one_user || vm_swap_full())) {
+               /* Also recheck PageSwapCache after page is locked (above) */
+               if (PageSwapCache(page) && !PageWriteback(page) &&
+                                       (one_user || vm_swap_full())) {
                        delete_from_swap_cache(page);
                        SetPageDirty(page);
                }
@@ -415,6 +426,61 @@ void free_swap_and_cache(swp_entry_t entry)
        }
 }
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/*
+ * Find the swap type that corresponds to given device (if any)
+ *
+ * This is needed for software suspend and is done in such a way that inode
+ * aliasing is allowed.
+ */
+int swap_type_of(dev_t device)
+{
+       int i;
+
+       spin_lock(&swap_lock);
+       for (i = 0; i < nr_swapfiles; i++) {
+               struct inode *inode;
+
+               if (!(swap_info[i].flags & SWP_WRITEOK))
+                       continue;
+               if (!device) {
+                       spin_unlock(&swap_lock);
+                       return i;
+               }
+               inode = swap_info->swap_file->f_dentry->d_inode;
+               if (S_ISBLK(inode->i_mode) &&
+                   device == MKDEV(imajor(inode), iminor(inode))) {
+                       spin_unlock(&swap_lock);
+                       return i;
+               }
+       }
+       spin_unlock(&swap_lock);
+       return -ENODEV;
+}
+
+/*
+ * Return either the total number of swap pages of given type, or the number
+ * of free pages of that type (depending on @free)
+ *
+ * This is needed for software suspend
+ */
+unsigned int count_swap_pages(int type, int free)
+{
+       unsigned int n = 0;
+
+       if (type < nr_swapfiles) {
+               spin_lock(&swap_lock);
+               if (swap_info[type].flags & SWP_WRITEOK) {
+                       n = swap_info[type].pages;
+                       if (free)
+                               n -= swap_info[type].inuse_pages;
+               }
+               spin_unlock(&swap_lock);
+       }
+       return n;
+}
+#endif
+
 /*
  * No need to decide whether this PTE shares the swap entry with others,
  * just let do_wp_page work it out if a write is requested later - to
@@ -706,10 +772,8 @@ static int try_to_unuse(unsigned int type)
                        while (*swap_map > 1 && !retval &&
                                        (p = p->next) != &start_mm->mmlist) {
                                mm = list_entry(p, struct mm_struct, mmlist);
-                               if (atomic_inc_return(&mm->mm_users) == 1) {
-                                       atomic_dec(&mm->mm_users);
+                               if (!atomic_inc_not_zero(&mm->mm_users))
                                        continue;
-                               }
                                spin_unlock(&mmlist_lock);
                                mmput(prev_mm);
                                prev_mm = mm;
@@ -1160,7 +1224,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
        up_write(&swap_unplug_sem);
 
        destroy_swap_extents(p);
-       down(&swapon_sem);
+       mutex_lock(&swapon_mutex);
        spin_lock(&swap_lock);
        drain_mmlist();
 
@@ -1179,7 +1243,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
        p->swap_map = NULL;
        p->flags = 0;
        spin_unlock(&swap_lock);
-       up(&swapon_sem);
+       mutex_unlock(&swapon_mutex);
        vfree(swap_map);
        inode = mapping->host;
        if (S_ISBLK(inode->i_mode)) {
@@ -1187,9 +1251,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
                set_blocksize(bdev, p->old_block_size);
                bd_release(bdev);
        } else {
-               down(&inode->i_sem);
+               mutex_lock(&inode->i_mutex);
                inode->i_flags &= ~S_SWAPFILE;
-               up(&inode->i_sem);
+               mutex_unlock(&inode->i_mutex);
        }
        filp_close(swap_file, NULL);
        err = 0;
@@ -1208,7 +1272,7 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
        int i;
        loff_t l = *pos;
 
-       down(&swapon_sem);
+       mutex_lock(&swapon_mutex);
 
        for (i = 0; i < nr_swapfiles; i++, ptr++) {
                if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
@@ -1237,7 +1301,7 @@ static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
 
 static void swap_stop(struct seq_file *swap, void *v)
 {
-       up(&swapon_sem);
+       mutex_unlock(&swapon_mutex);
 }
 
 static int swap_show(struct seq_file *swap, void *v)
@@ -1328,19 +1392,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                if (!(p->flags & SWP_USED))
                        break;
        error = -EPERM;
-       /*
-        * Test if adding another swap device is possible. There are
-        * two limiting factors: 1) the number of bits for the swap
-        * type swp_entry_t definition and 2) the number of bits for
-        * the swap type in the swap ptes as defined by the different
-        * architectures. To honor both limitations a swap entry
-        * with swap offset 0 and swap type ~0UL is created, encoded
-        * to a swap pte, decoded to a swp_entry_t again and finally
-        * the swap type part is extracted. This will mask all bits
-        * from the initial ~0UL that can't be encoded in either the
-        * swp_entry_t or the architecture definition of a swap pte.
-        */
-       if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) {
+       if (type >= MAX_SWAPFILES) {
                spin_unlock(&swap_lock);
                goto out;
        }
@@ -1406,7 +1458,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                p->bdev = bdev;
        } else if (S_ISREG(inode->i_mode)) {
                p->bdev = inode->i_sb->s_bdev;
-               down(&inode->i_sem);
+               mutex_lock(&inode->i_mutex);
                did_down = 1;
                if (IS_SWAPFILE(inode)) {
                        error = -EBUSY;
@@ -1425,8 +1477,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                error = -EINVAL;
                goto bad_swap;
        }
-       page = read_cache_page(mapping, 0,
-                       (filler_t *)mapping->a_ops->readpage, swap_file);
+       page = read_mapping_page(mapping, 0, swap_file);
        if (IS_ERR(page)) {
                error = PTR_ERR(page);
                goto bad_swap;
@@ -1442,7 +1493,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
        else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10))
                swap_header_version = 2;
        else {
-               printk("Unable to find swap-space signature\n");
+               printk(KERN_ERR "Unable to find swap-space signature\n");
                error = -EINVAL;
                goto bad_swap;
        }
@@ -1493,7 +1544,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                        goto bad_swap;
                if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
                        goto bad_swap;
-               
+
                /* OK, set up the swap map and apply the bad block list */
                if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
                        error = -ENOMEM;
@@ -1502,17 +1553,17 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 
                error = 0;
                memset(p->swap_map, 0, maxpages * sizeof(short));
-               for (i=0; i<swap_header->info.nr_badpages; i++) {
-                       int page = swap_header->info.badpages[i];
-                       if (page <= 0 || page >= swap_header->info.last_page)
+               for (i = 0; i < swap_header->info.nr_badpages; i++) {
+                       int page_nr = swap_header->info.badpages[i];
+                       if (page_nr <= 0 || page_nr >= swap_header->info.last_page)
                                error = -EINVAL;
                        else
-                               p->swap_map[page] = SWAP_MAP_BAD;
+                               p->swap_map[page_nr] = SWAP_MAP_BAD;
                }
                nr_good_pages = swap_header->info.last_page -
                                swap_header->info.nr_badpages -
                                1 /* header page */;
-               if (error) 
+               if (error)
                        goto bad_swap;
        }
 
@@ -1539,7 +1590,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                goto bad_swap;
        }
 
-       down(&swapon_sem);
+       mutex_lock(&swapon_mutex);
        spin_lock(&swap_lock);
        p->flags = SWP_ACTIVE;
        nr_swap_pages += nr_good_pages;
@@ -1565,7 +1616,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                swap_info[prev].next = p - swap_info;
        }
        spin_unlock(&swap_lock);
-       up(&swapon_sem);
+       mutex_unlock(&swapon_mutex);
        error = 0;
        goto out;
 bad_swap:
@@ -1596,7 +1647,7 @@ out:
        if (did_down) {
                if (!error)
                        inode->i_flags |= S_SWAPFILE;
-               up(&inode->i_sem);
+               mutex_unlock(&inode->i_mutex);
        }
        return error;
 }
@@ -1630,6 +1681,9 @@ int swap_duplicate(swp_entry_t entry)
        unsigned long offset, type;
        int result = 0;
 
+       if (is_migration_entry(entry))
+               return 1;
+
        type = swp_type(entry);
        if (type >= nr_swapfiles)
                goto bad_file;