nfsd4: reshuffle lease-setting code to allow reuse
[safe/jmp/linux-2.6] / fs / dcache.c
index b85fda3..953173a 100644 (file)
@@ -14,7 +14,6 @@
  * the dcache entry is deleted or garbage collected.
  */
 
-#include <linux/config.h>
 #include <linux/syscalls.h>
 #include <linux/string.h>
 #include <linux/mm.h>
@@ -22,7 +21,6 @@
 #include <linux/fsnotify.h>
 #include <linux/slab.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/hash.h>
 #include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
-
+#include <linux/fs_struct.h>
+#include <linux/hardirq.h>
+#include "internal.h"
 
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
-static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache __read_mostly;
+static struct kmem_cache *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -61,22 +61,26 @@ static kmem_cache_t *dentry_cache __read_mostly;
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
 static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
        .age_limit = 45,
 };
 
-static void d_callback(struct rcu_head *head)
+static void __d_free(struct dentry *dentry)
 {
-       struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
-
+       WARN_ON(!list_empty(&dentry->d_alias));
        if (dname_external(dentry))
                kfree(dentry->d_name.name);
        kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void d_callback(struct rcu_head *head)
+{
+       struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
+       __d_free(dentry);
+}
+
 /*
  * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
  * inside dcache_lock.
@@ -85,15 +89,20 @@ static void d_free(struct dentry *dentry)
 {
        if (dentry->d_op && dentry->d_op->d_release)
                dentry->d_op->d_release(dentry);
-       call_rcu(&dentry->d_u.d_rcu, d_callback);
+       /* if dentry was never inserted into hash, immediate free is OK */
+       if (hlist_unhashed(&dentry->d_hash))
+               __d_free(dentry);
+       else
+               call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
 /*
  * Release the dentry's inode, using the filesystem
  * d_iput() operation if defined.
- * Called with dcache_lock and per dentry lock held, drops both.
  */
 static void dentry_iput(struct dentry * dentry)
+       __releases(dentry->d_lock)
+       __releases(dcache_lock)
 {
        struct inode *inode = dentry->d_inode;
        if (inode) {
@@ -113,6 +122,67 @@ static void dentry_iput(struct dentry * dentry)
        }
 }
 
+/*
+ * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ */
+static void dentry_lru_add(struct dentry *dentry)
+{
+       list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+       dentry->d_sb->s_nr_dentry_unused++;
+       dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_add_tail(struct dentry *dentry)
+{
+       list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+       dentry->d_sb->s_nr_dentry_unused++;
+       dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_del(struct dentry *dentry)
+{
+       if (!list_empty(&dentry->d_lru)) {
+               list_del(&dentry->d_lru);
+               dentry->d_sb->s_nr_dentry_unused--;
+               dentry_stat.nr_unused--;
+       }
+}
+
+static void dentry_lru_del_init(struct dentry *dentry)
+{
+       if (likely(!list_empty(&dentry->d_lru))) {
+               list_del_init(&dentry->d_lru);
+               dentry->d_sb->s_nr_dentry_unused--;
+               dentry_stat.nr_unused--;
+       }
+}
+
+/**
+ * d_kill - kill dentry and return parent
+ * @dentry: dentry to kill
+ *
+ * The dentry must already be unhashed and removed from the LRU.
+ *
+ * If this is the root of the dentry tree, return NULL.
+ */
+static struct dentry *d_kill(struct dentry *dentry)
+       __releases(dentry->d_lock)
+       __releases(dcache_lock)
+{
+       struct dentry *parent;
+
+       list_del(&dentry->d_u.d_child);
+       dentry_stat.nr_dentry--;        /* For d_free, below */
+       /*drops the locks, at that point nobody can reach this dentry */
+       dentry_iput(dentry);
+       if (IS_ROOT(dentry))
+               parent = NULL;
+       else
+               parent = dentry->d_parent;
+       d_free(dentry);
+       return parent;
+}
+
 /* 
  * This is dput
  *
@@ -172,8 +242,7 @@ repeat:
                goto kill_it;
        if (list_empty(&dentry->d_lru)) {
                dentry->d_flags |= DCACHE_REFERENCED;
-               list_add(&dentry->d_lru, &dentry_unused);
-               dentry_stat.nr_unused++;
+               dentry_lru_add(dentry);
        }
        spin_unlock(&dentry->d_lock);
        spin_unlock(&dcache_lock);
@@ -181,28 +250,12 @@ repeat:
 
 unhash_it:
        __d_drop(dentry);
-
-kill_it: {
-               struct dentry *parent;
-
-               /* If dentry was on d_lru list
-                * delete it from there
-                */
-               if (!list_empty(&dentry->d_lru)) {
-                       list_del(&dentry->d_lru);
-                       dentry_stat.nr_unused--;
-               }
-               list_del(&dentry->d_u.d_child);
-               dentry_stat.nr_dentry--;        /* For d_free, below */
-               /*drops the locks, at that point nobody can reach this dentry */
-               dentry_iput(dentry);
-               parent = dentry->d_parent;
-               d_free(dentry);
-               if (dentry == parent)
-                       return;
-               dentry = parent;
+kill_it:
+       /* if dentry was on the d_lru list delete it from there */
+       dentry_lru_del(dentry);
+       dentry = d_kill(dentry);
+       if (dentry)
                goto repeat;
-       }
 }
 
 /**
@@ -267,10 +320,7 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
        atomic_inc(&dentry->d_count);
-       if (!list_empty(&dentry->d_lru)) {
-               dentry_stat.nr_unused--;
-               list_del_init(&dentry->d_lru);
-       }
+       dentry_lru_del_init(dentry);
        return dentry;
 }
 
@@ -291,9 +341,9 @@ struct dentry * dget_locked(struct dentry *dentry)
  * it can be unhashed only if it has no children, or if it is the root
  * of a filesystem.
  *
- * If the inode has a DCACHE_DISCONNECTED alias, then prefer
+ * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
  * any other hashed alias over that one unless @want_discon is set,
- * in which case only return a DCACHE_DISCONNECTED alias.
+ * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
  */
 
 static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
@@ -309,7 +359,8 @@ static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
                prefetch(next);
                alias = list_entry(tmp, struct dentry, d_alias);
                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
-                       if (alias->d_flags & DCACHE_DISCONNECTED)
+                       if (IS_ROOT(alias) &&
+                           (alias->d_flags & DCACHE_DISCONNECTED))
                                discon_alias = alias;
                        else if (!want_discon) {
                                __dget_locked(alias);
@@ -362,143 +413,191 @@ restart:
  * Throw away a dentry - free the inode, dput the parent.  This requires that
  * the LRU list has already been removed.
  *
- * Called with dcache_lock, drops it and then regains.
- * Called with dentry->d_lock held, drops it.
+ * Try to prune ancestors as well.  This is necessary to prevent
+ * quadratic behavior of shrink_dcache_parent(), but is also expected
+ * to be beneficial in reducing dentry cache fragmentation.
  */
 static void prune_one_dentry(struct dentry * dentry)
+       __releases(dentry->d_lock)
+       __releases(dcache_lock)
+       __acquires(dcache_lock)
 {
-       struct dentry * parent;
-
        __d_drop(dentry);
-       list_del(&dentry->d_u.d_child);
-       dentry_stat.nr_dentry--;        /* For d_free, below */
-       dentry_iput(dentry);
-       parent = dentry->d_parent;
-       d_free(dentry);
-       if (parent != dentry)
-               dput(parent);
+       dentry = d_kill(dentry);
+
+       /*
+        * Prune ancestors.  Locking is simpler than in dput(),
+        * because dcache_lock needs to be taken anyway.
+        */
        spin_lock(&dcache_lock);
+       while (dentry) {
+               if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+                       return;
+
+               if (dentry->d_op && dentry->d_op->d_delete)
+                       dentry->d_op->d_delete(dentry);
+               dentry_lru_del_init(dentry);
+               __d_drop(dentry);
+               dentry = d_kill(dentry);
+               spin_lock(&dcache_lock);
+       }
 }
 
-/**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try and free
- * @sb: if given, ignore dentries for other superblocks
- *         which are being unmounted.
- *
- * Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
- *
- * This function may fail to free any resources if
- * all the dentries are in use.
+/*
+ * Shrink the dentry LRU on a given superblock.
+ * @sb   : superblock to shrink dentry LRU.
+ * @count: If count is NULL, we prune all dentries on superblock.
+ * @flags: If flags is non-zero, we need to do special processing based on
+ * which flags are set. This means we don't need to maintain multiple
+ * similar copies of this loop.
  */
-static void prune_dcache(int count, struct super_block *sb)
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 {
-       spin_lock(&dcache_lock);
-       for (; count ; count--) {
-               struct dentry *dentry;
-               struct list_head *tmp;
-               struct rw_semaphore *s_umount;
-
-               cond_resched_lock(&dcache_lock);
+       LIST_HEAD(referenced);
+       LIST_HEAD(tmp);
+       struct dentry *dentry;
+       int cnt = 0;
 
-               tmp = dentry_unused.prev;
-               if (sb) {
-                       /* Try to find a dentry for this sb, but don't try
-                        * too hard, if they aren't near the tail they will
-                        * be moved down again soon
+       BUG_ON(!sb);
+       BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
+       spin_lock(&dcache_lock);
+       if (count != NULL)
+               /* called from prune_dcache() and shrink_dcache_parent() */
+               cnt = *count;
+restart:
+       if (count == NULL)
+               list_splice_init(&sb->s_dentry_lru, &tmp);
+       else {
+               while (!list_empty(&sb->s_dentry_lru)) {
+                       dentry = list_entry(sb->s_dentry_lru.prev,
+                                       struct dentry, d_lru);
+                       BUG_ON(dentry->d_sb != sb);
+
+                       spin_lock(&dentry->d_lock);
+                       /*
+                        * If we are honouring the DCACHE_REFERENCED flag and
+                        * the dentry has this flag set, don't free it. Clear
+                        * the flag and put it back on the LRU.
                         */
-                       int skip = count;
-                       while (skip && tmp != &dentry_unused &&
-                           list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
-                               skip--;
-                               tmp = tmp->prev;
+                       if ((flags & DCACHE_REFERENCED)
+                               && (dentry->d_flags & DCACHE_REFERENCED)) {
+                               dentry->d_flags &= ~DCACHE_REFERENCED;
+                               list_move(&dentry->d_lru, &referenced);
+                               spin_unlock(&dentry->d_lock);
+                       } else {
+                               list_move_tail(&dentry->d_lru, &tmp);
+                               spin_unlock(&dentry->d_lock);
+                               cnt--;
+                               if (!cnt)
+                                       break;
                        }
+                       cond_resched_lock(&dcache_lock);
                }
-               if (tmp == &dentry_unused)
-                       break;
-               list_del_init(tmp);
-               prefetch(dentry_unused.prev);
-               dentry_stat.nr_unused--;
-               dentry = list_entry(tmp, struct dentry, d_lru);
-
-               spin_lock(&dentry->d_lock);
+       }
+       while (!list_empty(&tmp)) {
+               dentry = list_entry(tmp.prev, struct dentry, d_lru);
+               dentry_lru_del_init(dentry);
+               spin_lock(&dentry->d_lock);
                /*
                 * We found an inuse dentry which was not removed from
-                * dentry_unused because of laziness during lookup.  Do not free
-                * it - just keep it off the dentry_unused list.
+                * the LRU because of laziness during lookup.  Do not free
+                * it - just keep it off the LRU list.
                 */
-               if (atomic_read(&dentry->d_count)) {
-                       spin_unlock(&dentry->d_lock);
+               if (atomic_read(&dentry->d_count)) {
+                       spin_unlock(&dentry->d_lock);
                        continue;
                }
-               /* If the dentry was recently referenced, don't free it. */
-               if (dentry->d_flags & DCACHE_REFERENCED) {
-                       dentry->d_flags &= ~DCACHE_REFERENCED;
-                       list_add(&dentry->d_lru, &dentry_unused);
-                       dentry_stat.nr_unused++;
-                       spin_unlock(&dentry->d_lock);
+               prune_one_dentry(dentry);
+               /* dentry->d_lock was dropped in prune_one_dentry() */
+               cond_resched_lock(&dcache_lock);
+       }
+       if (count == NULL && !list_empty(&sb->s_dentry_lru))
+               goto restart;
+       if (count != NULL)
+               *count = cnt;
+       if (!list_empty(&referenced))
+               list_splice(&referenced, &sb->s_dentry_lru);
+       spin_unlock(&dcache_lock);
+}
+
+/**
+ * prune_dcache - shrink the dcache
+ * @count: number of entries to try to free
+ *
+ * Shrink the dcache. This is done when we need more memory, or simply when we
+ * need to unmount something (at which point we need to unuse all dentries).
+ *
+ * This function may fail to free any resources if all the dentries are in use.
+ */
+static void prune_dcache(int count)
+{
+       struct super_block *sb;
+       int w_count;
+       int unused = dentry_stat.nr_unused;
+       int prune_ratio;
+       int pruned;
+
+       if (unused == 0 || count == 0)
+               return;
+       spin_lock(&dcache_lock);
+restart:
+       if (count >= unused)
+               prune_ratio = 1;
+       else
+               prune_ratio = unused / count;
+       spin_lock(&sb_lock);
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_nr_dentry_unused == 0)
                        continue;
-               }
-               /*
-                * If the dentry is not DCACHED_REFERENCED, it is time
-                * to remove it from the dcache, provided the super block is
-                * NULL (which means we are trying to reclaim memory)
-                * or this dentry belongs to the same super block that
-                * we want to shrink.
+               sb->s_count++;
+               /* Now, we reclaim unused dentrins with fairness.
+                * We reclaim them same percentage from each superblock.
+                * We calculate number of dentries to scan on this sb
+                * as follows, but the implementation is arranged to avoid
+                * overflows:
+                * number of dentries to scan on this sb =
+                * count * (number of dentries on this sb /
+                * number of dentries in the machine)
                 */
+               spin_unlock(&sb_lock);
+               if (prune_ratio != 1)
+                       w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
+               else
+                       w_count = sb->s_nr_dentry_unused;
+               pruned = w_count;
                /*
-                * If this dentry is for "my" filesystem, then I can prune it
-                * without taking the s_umount lock (I already hold it).
+                * We need to be sure this filesystem isn't being unmounted,
+                * otherwise we could race with generic_shutdown_super(), and
+                * end up holding a reference to an inode while the filesystem
+                * is unmounted.  So we try to get s_umount, and make sure
+                * s_root isn't NULL.
                 */
-               if (sb && dentry->d_sb == sb) {
-                       prune_one_dentry(dentry);
-                       continue;
+               if (down_read_trylock(&sb->s_umount)) {
+                       if ((sb->s_root != NULL) &&
+                           (!list_empty(&sb->s_dentry_lru))) {
+                               spin_unlock(&dcache_lock);
+                               __shrink_dcache_sb(sb, &w_count,
+                                               DCACHE_REFERENCED);
+                               pruned -= w_count;
+                               spin_lock(&dcache_lock);
+                       }
+                       up_read(&sb->s_umount);
                }
+               spin_lock(&sb_lock);
+               count -= pruned;
                /*
-                * ...otherwise we need to be sure this filesystem isn't being
-                * unmounted, otherwise we could race with
-                * generic_shutdown_super(), and end up holding a reference to
-                * an inode while the filesystem is unmounted.
-                * So we try to get s_umount, and make sure s_root isn't NULL.
-                * (Take a local copy of s_umount to avoid a use-after-free of
-                * `dentry').
+                * restart only when sb is no longer on the list and
+                * we have more work to do.
                 */
-               s_umount = &dentry->d_sb->s_umount;
-               if (down_read_trylock(s_umount)) {
-                       if (dentry->d_sb->s_root != NULL) {
-                               prune_one_dentry(dentry);
-                               up_read(s_umount);
-                               continue;
-                       }
-                       up_read(s_umount);
+               if (__put_super_and_need_restart(sb) && count > 0) {
+                       spin_unlock(&sb_lock);
+                       goto restart;
                }
-               spin_unlock(&dentry->d_lock);
-               /* Cannot remove the first dentry, and it isn't appropriate
-                * to move it to the head of the list, so give up, and try
-                * later
-                */
-               break;
        }
+       spin_unlock(&sb_lock);
        spin_unlock(&dcache_lock);
 }
 
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list.  On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them.  The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
-
 /**
  * shrink_dcache_sb - shrink dcache for a superblock
  * @sb: superblock
@@ -507,48 +606,142 @@ static void prune_dcache(int count, struct super_block *sb)
  * is used to free the dcache before unmounting a file
  * system
  */
-
 void shrink_dcache_sb(struct super_block * sb)
 {
-       struct list_head *tmp, *next;
-       struct dentry *dentry;
+       __shrink_dcache_sb(sb, NULL, 0);
+}
 
-       /*
-        * Pass one ... move the dentries for the specified
-        * superblock to the most recent end of the unused list.
-        */
+/*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ *   locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+       struct dentry *parent;
+       unsigned detached = 0;
+
+       BUG_ON(!IS_ROOT(dentry));
+
+       /* detach this root from the system */
        spin_lock(&dcache_lock);
-       list_for_each_safe(tmp, next, &dentry_unused) {
-               dentry = list_entry(tmp, struct dentry, d_lru);
-               if (dentry->d_sb != sb)
-                       continue;
-               list_del(tmp);
-               list_add(tmp, &dentry_unused);
-       }
+       dentry_lru_del_init(dentry);
+       __d_drop(dentry);
+       spin_unlock(&dcache_lock);
 
-       /*
-        * Pass two ... free the dentries for this superblock.
-        */
-repeat:
-       list_for_each_safe(tmp, next, &dentry_unused) {
-               dentry = list_entry(tmp, struct dentry, d_lru);
-               if (dentry->d_sb != sb)
-                       continue;
-               dentry_stat.nr_unused--;
-               list_del_init(tmp);
-               spin_lock(&dentry->d_lock);
-               if (atomic_read(&dentry->d_count)) {
-                       spin_unlock(&dentry->d_lock);
-                       continue;
+       for (;;) {
+               /* descend to the first leaf in the current subtree */
+               while (!list_empty(&dentry->d_subdirs)) {
+                       struct dentry *loop;
+
+                       /* this is a branch with children - detach all of them
+                        * from the system in one go */
+                       spin_lock(&dcache_lock);
+                       list_for_each_entry(loop, &dentry->d_subdirs,
+                                           d_u.d_child) {
+                               dentry_lru_del_init(loop);
+                               __d_drop(loop);
+                               cond_resched_lock(&dcache_lock);
+                       }
+                       spin_unlock(&dcache_lock);
+
+                       /* move to the first child */
+                       dentry = list_entry(dentry->d_subdirs.next,
+                                           struct dentry, d_u.d_child);
                }
-               prune_one_dentry(dentry);
-               cond_resched_lock(&dcache_lock);
-               goto repeat;
+
+               /* consume the dentries from this leaf up through its parents
+                * until we find one with children or run out altogether */
+               do {
+                       struct inode *inode;
+
+                       if (atomic_read(&dentry->d_count) != 0) {
+                               printk(KERN_ERR
+                                      "BUG: Dentry %p{i=%lx,n=%s}"
+                                      " still in use (%d)"
+                                      " [unmount of %s %s]\n",
+                                      dentry,
+                                      dentry->d_inode ?
+                                      dentry->d_inode->i_ino : 0UL,
+                                      dentry->d_name.name,
+                                      atomic_read(&dentry->d_count),
+                                      dentry->d_sb->s_type->name,
+                                      dentry->d_sb->s_id);
+                               BUG();
+                       }
+
+                       if (IS_ROOT(dentry))
+                               parent = NULL;
+                       else {
+                               parent = dentry->d_parent;
+                               atomic_dec(&parent->d_count);
+                       }
+
+                       list_del(&dentry->d_u.d_child);
+                       detached++;
+
+                       inode = dentry->d_inode;
+                       if (inode) {
+                               dentry->d_inode = NULL;
+                               list_del_init(&dentry->d_alias);
+                               if (dentry->d_op && dentry->d_op->d_iput)
+                                       dentry->d_op->d_iput(dentry, inode);
+                               else
+                                       iput(inode);
+                       }
+
+                       d_free(dentry);
+
+                       /* finished when we fall off the top of the tree,
+                        * otherwise we ascend to the parent and move to the
+                        * next sibling if there is one */
+                       if (!parent)
+                               goto out;
+
+                       dentry = parent;
+
+               } while (list_empty(&dentry->d_subdirs));
+
+               dentry = list_entry(dentry->d_subdirs.next,
+                                   struct dentry, d_u.d_child);
        }
+out:
+       /* several dentries were freed, need to correct nr_dentry */
+       spin_lock(&dcache_lock);
+       dentry_stat.nr_dentry -= detached;
        spin_unlock(&dcache_lock);
 }
 
 /*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ *   removing the dentry from the system lists and hashes because:
+ *   - the superblock is detached from all mountings and open files, so the
+ *     dentry trees will not be rearranged by the VFS
+ *   - s_umount is write-locked, so the memory pressure shrinker will ignore
+ *     any dentries belonging to this superblock that it comes across
+ *   - the filesystem itself is no longer permitted to rearrange the dentries
+ *     in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+       struct dentry *dentry;
+
+       if (down_read_trylock(&sb->s_umount))
+               BUG();
+
+       dentry = sb->s_root;
+       sb->s_root = NULL;
+       atomic_dec(&dentry->d_count);
+       shrink_dcache_for_umount_subtree(dentry);
+
+       while (!hlist_empty(&sb->s_anon)) {
+               dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+               shrink_dcache_for_umount_subtree(dentry);
+       }
+}
+
+/*
  * Search for at least 1 mount point in the dentry's subdirs.
  * We descend to the next level whenever the d_subdirs
  * list is non-empty and continue searching.
@@ -629,17 +822,13 @@ resume:
                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
 
-               if (!list_empty(&dentry->d_lru)) {
-                       dentry_stat.nr_unused--;
-                       list_del_init(&dentry->d_lru);
-               }
+               dentry_lru_del_init(dentry);
                /* 
                 * move only zero ref count dentries to the end 
                 * of the unused list for prune_dcache
                 */
                if (!atomic_read(&dentry->d_count)) {
-                       list_add(&dentry->d_lru, dentry_unused.prev);
-                       dentry_stat.nr_unused++;
+                       dentry_lru_add_tail(dentry);
                        found++;
                }
 
@@ -681,10 +870,11 @@ out:
  
 void shrink_dcache_parent(struct dentry * parent)
 {
+       struct super_block *sb = parent->d_sb;
        int found;
 
        while ((found = select_parent(parent)) != 0)
-               prune_dcache(found, parent->d_sb);
+               __shrink_dcache_sb(sb, &found, 0);
 }
 
 /*
@@ -704,11 +894,16 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
        if (nr) {
                if (!(gfp_mask & __GFP_FS))
                        return -1;
-               prune_dcache(nr, NULL);
+               prune_dcache(nr);
        }
        return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
+static struct shrinker dcache_shrinker = {
+       .shrink = shrink_dcache_memory,
+       .seeks = DEFAULT_SEEKS,
+};
+
 /**
  * d_alloc     -       allocate a dcache entry
  * @parent: parent of entry to allocate
@@ -724,7 +919,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
        struct dentry *dentry;
        char *dname;
 
-       dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
+       dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
        if (!dentry)
                return NULL;
 
@@ -753,9 +948,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
        dentry->d_op = NULL;
        dentry->d_fsdata = NULL;
        dentry->d_mounted = 0;
-#ifdef CONFIG_PROFILING
-       dentry->d_cookie = NULL;
-#endif
        INIT_HLIST_NODE(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_LIST_HEAD(&dentry->d_subdirs);
@@ -786,6 +978,16 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
        q.hash = full_name_hash(q.name, q.len);
        return d_alloc(parent, &q);
 }
+EXPORT_SYMBOL(d_alloc_name);
+
+/* the caller must hold dcache_lock */
+static void __d_instantiate(struct dentry *dentry, struct inode *inode)
+{
+       if (inode)
+               list_add(&dentry->d_alias, &inode->i_dentry);
+       dentry->d_inode = inode;
+       fsnotify_d_instantiate(dentry, inode);
+}
 
 /**
  * d_instantiate - fill in inode information for a dentry
@@ -806,10 +1008,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 {
        BUG_ON(!list_empty(&entry->d_alias));
        spin_lock(&dcache_lock);
-       if (inode)
-               list_add(&entry->d_alias, &inode->i_dentry);
-       entry->d_inode = inode;
-       fsnotify_d_instantiate(entry, inode);
+       __d_instantiate(entry, inode);
        spin_unlock(&dcache_lock);
        security_d_instantiate(entry, inode);
 }
@@ -830,17 +1029,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
  * (or otherwise set) by the caller to indicate that it is now
  * in use by the dcache.
  */
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+                                            struct inode *inode)
 {
        struct dentry *alias;
        int len = entry->d_name.len;
        const char *name = entry->d_name.name;
        unsigned int hash = entry->d_name.hash;
 
-       BUG_ON(!list_empty(&entry->d_alias));
-       spin_lock(&dcache_lock);
-       if (!inode)
-               goto do_negative;
+       if (!inode) {
+               __d_instantiate(entry, NULL);
+               return NULL;
+       }
+
        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
                struct qstr *qstr = &alias->d_name;
 
@@ -853,19 +1054,33 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
                if (memcmp(qstr->name, name, len))
                        continue;
                dget_locked(alias);
-               spin_unlock(&dcache_lock);
-               BUG_ON(!d_unhashed(alias));
-               iput(inode);
                return alias;
        }
-       list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
-       entry->d_inode = inode;
-       fsnotify_d_instantiate(entry, inode);
-       spin_unlock(&dcache_lock);
-       security_d_instantiate(entry, inode);
+
+       __d_instantiate(entry, inode);
        return NULL;
 }
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+       struct dentry *result;
+
+       BUG_ON(!list_empty(&entry->d_alias));
+
+       spin_lock(&dcache_lock);
+       result = __d_instantiate_unique(entry, inode);
+       spin_unlock(&dcache_lock);
+
+       if (!result) {
+               security_d_instantiate(entry, inode);
+               return NULL;
+       }
+
+       BUG_ON(!d_unhashed(result));
+       iput(inode);
+       return result;
+}
+
 EXPORT_SYMBOL(d_instantiate_unique);
 
 /**
@@ -903,69 +1118,71 @@ static inline struct hlist_head *d_hash(struct dentry *parent,
 }
 
 /**
- * d_alloc_anon - allocate an anonymous dentry
+ * d_obtain_alias - find or allocate a dentry for a given inode
  * @inode: inode to allocate the dentry for
  *
- * This is similar to d_alloc_root.  It is used by filesystems when
- * creating a dentry for a given inode, often in the process of 
- * mapping a filehandle to a dentry.  The returned dentry may be
- * anonymous, or may have a full name (if the inode was already
- * in the cache).  The file system may need to make further
- * efforts to connect this dentry into the dcache properly.
+ * Obtain a dentry for an inode resulting from NFS filehandle conversion or
+ * similar open by handle operations.  The returned dentry may be anonymous,
+ * or may have a full name (if the inode was already in the cache).
  *
- * When called on a directory inode, we must ensure that
- * the inode only ever has one dentry.  If a dentry is
- * found, that is returned instead of allocating a new one.
+ * When called on a directory inode, we must ensure that the inode only ever
+ * has one dentry.  If a dentry is found, that is returned instead of
+ * allocating a new one.
  *
  * On successful return, the reference to the inode has been transferred
- * to the dentry.  If %NULL is returned (indicating kmalloc failure),
- * the reference on the inode has not been released.
+ * to the dentry.  In case of an error the reference on the inode is released.
+ * To make it easier to use in export operations a %NULL or IS_ERR inode may
+ * be passed in and will be the error will be propagate to the return value,
+ * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
  */
-
-struct dentry * d_alloc_anon(struct inode *inode)
+struct dentry *d_obtain_alias(struct inode *inode)
 {
        static const struct qstr anonstring = { .name = "" };
        struct dentry *tmp;
        struct dentry *res;
 
-       if ((res = d_find_alias(inode))) {
-               iput(inode);
-               return res;
-       }
+       if (!inode)
+               return ERR_PTR(-ESTALE);
+       if (IS_ERR(inode))
+               return ERR_CAST(inode);
 
-       tmp = d_alloc(NULL, &anonstring);
-       if (!tmp)
-               return NULL;
+       res = d_find_alias(inode);
+       if (res)
+               goto out_iput;
 
+       tmp = d_alloc(NULL, &anonstring);
+       if (!tmp) {
+               res = ERR_PTR(-ENOMEM);
+               goto out_iput;
+       }
        tmp->d_parent = tmp; /* make sure dput doesn't croak */
-       
+
        spin_lock(&dcache_lock);
        res = __d_find_alias(inode, 0);
-       if (!res) {
-               /* attach a disconnected dentry */
-               res = tmp;
-               tmp = NULL;
-               spin_lock(&res->d_lock);
-               res->d_sb = inode->i_sb;
-               res->d_parent = res;
-               res->d_inode = inode;
-               res->d_flags |= DCACHE_DISCONNECTED;
-               res->d_flags &= ~DCACHE_UNHASHED;
-               list_add(&res->d_alias, &inode->i_dentry);
-               hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
-               spin_unlock(&res->d_lock);
-
-               inode = NULL; /* don't drop reference */
+       if (res) {
+               spin_unlock(&dcache_lock);
+               dput(tmp);
+               goto out_iput;
        }
+
+       /* attach a disconnected dentry */
+       spin_lock(&tmp->d_lock);
+       tmp->d_sb = inode->i_sb;
+       tmp->d_inode = inode;
+       tmp->d_flags |= DCACHE_DISCONNECTED;
+       tmp->d_flags &= ~DCACHE_UNHASHED;
+       list_add(&tmp->d_alias, &inode->i_dentry);
+       hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
+       spin_unlock(&tmp->d_lock);
+
        spin_unlock(&dcache_lock);
+       return tmp;
 
-       if (inode)
-               iput(inode);
-       if (tmp)
-               dput(tmp);
+ out_iput:
+       iput(inode);
        return res;
 }
-
+EXPORT_SYMBOL(d_obtain_alias);
 
 /**
  * d_splice_alias - splice a disconnected dentry into the tree if one exists
@@ -987,22 +1204,19 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 {
        struct dentry *new = NULL;
 
-       if (inode) {
+       if (inode && S_ISDIR(inode->i_mode)) {
                spin_lock(&dcache_lock);
                new = __d_find_alias(inode, 1);
                if (new) {
                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
-                       fsnotify_d_instantiate(new, inode);
                        spin_unlock(&dcache_lock);
                        security_d_instantiate(new, inode);
                        d_rehash(dentry);
                        d_move(new, dentry);
                        iput(inode);
                } else {
-                       /* d_instantiate takes dcache_lock, so we do it by hand */
-                       list_add(&dentry->d_alias, &inode->i_dentry);
-                       dentry->d_inode = inode;
-                       fsnotify_d_instantiate(dentry, inode);
+                       /* already taking dcache_lock, so d_add() by hand */
+                       __d_instantiate(dentry, inode);
                        spin_unlock(&dcache_lock);
                        security_d_instantiate(dentry, inode);
                        d_rehash(dentry);
@@ -1012,6 +1226,94 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
        return new;
 }
 
+/**
+ * d_add_ci - lookup or allocate new dentry with case-exact name
+ * @inode:  the inode case-insensitive lookup has found
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name:   the case-exact name to be associated with the returned dentry
+ *
+ * This is to avoid filling the dcache with case-insensitive names to the
+ * same inode, only the actual correct case is stored in the dcache for
+ * case-insensitive filesystems.
+ *
+ * For a case-insensitive lookup match and if the the case-exact dentry
+ * already exists in in the dcache, use it and return it.
+ *
+ * If no entry exists with the exact case name, allocate new dentry with
+ * the exact case, and return the spliced entry.
+ */
+struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
+                       struct qstr *name)
+{
+       int error;
+       struct dentry *found;
+       struct dentry *new;
+
+       /*
+        * First check if a dentry matching the name already exists,
+        * if not go ahead and create it now.
+        */
+       found = d_hash_and_lookup(dentry->d_parent, name);
+       if (!found) {
+               new = d_alloc(dentry->d_parent, name);
+               if (!new) {
+                       error = -ENOMEM;
+                       goto err_out;
+               }
+
+               found = d_splice_alias(inode, new);
+               if (found) {
+                       dput(new);
+                       return found;
+               }
+               return new;
+       }
+
+       /*
+        * If a matching dentry exists, and it's not negative use it.
+        *
+        * Decrement the reference count to balance the iget() done
+        * earlier on.
+        */
+       if (found->d_inode) {
+               if (unlikely(found->d_inode != inode)) {
+                       /* This can't happen because bad inodes are unhashed. */
+                       BUG_ON(!is_bad_inode(inode));
+                       BUG_ON(!is_bad_inode(found->d_inode));
+               }
+               iput(inode);
+               return found;
+       }
+
+       /*
+        * Negative dentry: instantiate it unless the inode is a directory and
+        * already has a dentry.
+        */
+       spin_lock(&dcache_lock);
+       if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
+               __d_instantiate(found, inode);
+               spin_unlock(&dcache_lock);
+               security_d_instantiate(found, inode);
+               return found;
+       }
+
+       /*
+        * In case a directory already has a (disconnected) entry grab a
+        * reference to it, move it in place and use it.
+        */
+       new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+       dget_locked(new);
+       spin_unlock(&dcache_lock);
+       security_d_instantiate(found, inode);
+       d_move(new, found);
+       iput(inode);
+       dput(found);
+       return new;
+
+err_out:
+       iput(inode);
+       return ERR_PTR(error);
+}
 
 /**
  * d_lookup - search for a dentry
@@ -1020,7 +1322,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
  *
  * Searches the children of the parent dentry for the name in question. If
  * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use d_put to free the entry when it has
+ * is returned. The caller must use dput to free the entry when it has
  * finished using it. %NULL is returned on failure.
  *
  * __d_lookup is dcache_lock free. The hash list is protected using RCU.
@@ -1033,7 +1335,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
  * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
  * lookup is going on.
  *
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
  * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
  * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
  * acquisition.
@@ -1086,6 +1388,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
                if (dentry->d_parent != parent)
                        goto next;
 
+               /* non-existing due to RCU? */
+               if (d_unhashed(dentry))
+                       goto next;
+
                /*
                 * It is safe to compare names since d_move() cannot
                 * change the qstr (protected by d_lock).
@@ -1101,10 +1407,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
                                goto next;
                }
 
-               if (!d_unhashed(dentry)) {
-                       atomic_inc(&dentry->d_count);
-                       found = dentry;
-               }
+               atomic_inc(&dentry->d_count);
+               found = dentry;
                spin_unlock(&dentry->d_lock);
                break;
 next:
@@ -1145,8 +1449,6 @@ out:
  * d_validate - verify dentry provided from insecure source
  * @dentry: The dentry alleged to be valid child of @dparent
  * @dparent: The parent dentry (known to be valid)
- * @hash: Hash of the dentry
- * @len: Length of the name
  *
  * An insecure source has sent us a dentry, here we verify it and dget() it.
  * This is used by ncpfs in its readdir implementation.
@@ -1215,9 +1517,6 @@ void d_delete(struct dentry * dentry)
        if (atomic_read(&dentry->d_count) == 1) {
                dentry_iput(dentry);
                fsnotify_nameremove(dentry, isdir);
-
-               /* remove this and other inotify debug checks after 2.6.18 */
-               dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
                return;
        }
 
@@ -1237,6 +1536,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
        hlist_add_head_rcu(&entry->d_hash, list);
 }
 
+static void _d_rehash(struct dentry * entry)
+{
+       __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
 /**
  * d_rehash    - add an entry back to the hash
  * @entry: dentry to add to the hash
@@ -1246,19 +1550,13 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
  
 void d_rehash(struct dentry * entry)
 {
-       struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
        spin_lock(&dcache_lock);
        spin_lock(&entry->d_lock);
-       __d_rehash(entry, list);
+       _d_rehash(entry);
        spin_unlock(&entry->d_lock);
        spin_unlock(&dcache_lock);
 }
 
-#define do_switch(x,y) do { \
-       __typeof__ (x) __tmp = x; \
-       x = y; y = __tmp; } while (0)
-
 /*
  * When switching names, the actual string doesn't strictly have to
  * be preserved in the target - because we're dropping the target
@@ -1277,12 +1575,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
                        /*
                         * Both external: swap the pointers
                         */
-                       do_switch(target->d_name.name, dentry->d_name.name);
+                       swap(target->d_name.name, dentry->d_name.name);
                } else {
                        /*
                         * dentry:internal, target:external.  Steal target's
                         * storage and make target internal.
                         */
+                       memcpy(target->d_iname, dentry->d_name.name,
+                                       dentry->d_name.len + 1);
                        dentry->d_name.name = target->d_name.name;
                        target->d_name.name = target->d_iname;
                }
@@ -1302,8 +1602,11 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
                         */
                        memcpy(dentry->d_iname, target->d_name.name,
                                        target->d_name.len + 1);
+                       dentry->d_name.len = target->d_name.len;
+                       return;
                }
        }
+       swap(dentry->d_name.len, target->d_name.len);
 }
 
 /*
@@ -1314,41 +1617,39 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
  * This forceful removal will result in ugly /proc output if
  * somebody holds a file open that got deleted due to a rename.
  * We could be nicer about the deleted file, and let it show
- * up under the name it got deleted rather than the name that
- * deleted it.
+ * up under the name it had before it was deleted rather than
+ * under the original name of the file that was moved on top of it.
  */
  
-/**
- * d_move - move a dentry
+/*
+ * d_move_locked - move a dentry
  * @dentry: entry to move
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
  * dcache entries should not be moved in this way.
  */
-
-void d_move(struct dentry * dentry, struct dentry * target)
+static void d_move_locked(struct dentry * dentry, struct dentry * target)
 {
        struct hlist_head *list;
 
        if (!dentry->d_inode)
                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
-       spin_lock(&dcache_lock);
        write_seqlock(&rename_lock);
        /*
         * XXXX: do we really need to take target->d_lock?
         */
        if (target < dentry) {
                spin_lock(&target->d_lock);
-               spin_lock(&dentry->d_lock);
+               spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
        } else {
                spin_lock(&dentry->d_lock);
-               spin_lock(&target->d_lock);
+               spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
        }
 
        /* Move the dentry to the target hash queue, if on different bucket */
-       if (dentry->d_flags & DCACHE_UNHASHED)
+       if (d_unhashed(dentry))
                goto already_unhashed;
 
        hlist_del_rcu(&dentry->d_hash);
@@ -1365,8 +1666,7 @@ already_unhashed:
 
        /* Switch the names.. */
        switch_names(dentry, target);
-       do_switch(dentry->d_name.len, target->d_name.len);
-       do_switch(dentry->d_name.hash, target->d_name.hash);
+       swap(dentry->d_name.hash, target->d_name.hash);
 
        /* ... and switch the parents */
        if (IS_ROOT(dentry)) {
@@ -1374,7 +1674,7 @@ already_unhashed:
                target->d_parent = target;
                INIT_LIST_HEAD(&target->d_u.d_child);
        } else {
-               do_switch(dentry->d_parent, target->d_parent);
+               swap(dentry->d_parent, target->d_parent);
 
                /* And add them back to the (new) parent lists */
                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
@@ -1385,42 +1685,236 @@ already_unhashed:
        fsnotify_d_move(dentry);
        spin_unlock(&dentry->d_lock);
        write_sequnlock(&rename_lock);
+}
+
+/**
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+
+void d_move(struct dentry * dentry, struct dentry * target)
+{
+       spin_lock(&dcache_lock);
+       d_move_locked(dentry, target);
        spin_unlock(&dcache_lock);
 }
 
 /**
- * d_path - return the path of a dentry
- * @dentry: dentry to report
- * @vfsmnt: vfsmnt to which the dentry belongs
- * @root: root dentry
- * @rootmnt: vfsmnt to which the root dentry belongs
+ * d_ancestor - search for an ancestor
+ * @p1: ancestor dentry
+ * @p2: child dentry
+ *
+ * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
+ * an ancestor of p2, else NULL.
+ */
+struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
+{
+       struct dentry *p;
+
+       for (p = p2; !IS_ROOT(p); p = p->d_parent) {
+               if (p->d_parent == p1)
+                       return p;
+       }
+       return NULL;
+}
+
+/*
+ * This helper attempts to cope with remotely renamed directories
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ *
+ * Note: If ever the locking in lock_rename() changes, then please
+ * remember to update this too...
+ */
+static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+       __releases(dcache_lock)
+{
+       struct mutex *m1 = NULL, *m2 = NULL;
+       struct dentry *ret;
+
+       /* If alias and dentry share a parent, then no extra locks required */
+       if (alias->d_parent == dentry->d_parent)
+               goto out_unalias;
+
+       /* Check for loops */
+       ret = ERR_PTR(-ELOOP);
+       if (d_ancestor(alias, dentry))
+               goto out_err;
+
+       /* See lock_rename() */
+       ret = ERR_PTR(-EBUSY);
+       if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
+               goto out_err;
+       m1 = &dentry->d_sb->s_vfs_rename_mutex;
+       if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+               goto out_err;
+       m2 = &alias->d_parent->d_inode->i_mutex;
+out_unalias:
+       d_move_locked(alias, dentry);
+       ret = alias;
+out_err:
+       spin_unlock(&dcache_lock);
+       if (m2)
+               mutex_unlock(m2);
+       if (m1)
+               mutex_unlock(m1);
+       return ret;
+}
+
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+       struct dentry *dparent, *aparent;
+
+       switch_names(dentry, anon);
+       swap(dentry->d_name.hash, anon->d_name.hash);
+
+       dparent = dentry->d_parent;
+       aparent = anon->d_parent;
+
+       dentry->d_parent = (aparent == anon) ? dentry : aparent;
+       list_del(&dentry->d_u.d_child);
+       if (!IS_ROOT(dentry))
+               list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+       else
+               INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+       anon->d_parent = (dparent == dentry) ? anon : dparent;
+       list_del(&anon->d_u.d_child);
+       if (!IS_ROOT(anon))
+               list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+       else
+               INIT_LIST_HEAD(&anon->d_u.d_child);
+
+       anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+       struct dentry *actual;
+
+       BUG_ON(!d_unhashed(dentry));
+
+       spin_lock(&dcache_lock);
+
+       if (!inode) {
+               actual = dentry;
+               __d_instantiate(dentry, NULL);
+               goto found_lock;
+       }
+
+       if (S_ISDIR(inode->i_mode)) {
+               struct dentry *alias;
+
+               /* Does an aliased dentry already exist? */
+               alias = __d_find_alias(inode, 0);
+               if (alias) {
+                       actual = alias;
+                       /* Is this an anonymous mountpoint that we could splice
+                        * into our tree? */
+                       if (IS_ROOT(alias)) {
+                               spin_lock(&alias->d_lock);
+                               __d_materialise_dentry(dentry, alias);
+                               __d_drop(alias);
+                               goto found;
+                       }
+                       /* Nope, but we must(!) avoid directory aliasing */
+                       actual = __d_unalias(dentry, alias);
+                       if (IS_ERR(actual))
+                               dput(alias);
+                       goto out_nolock;
+               }
+       }
+
+       /* Add a unique reference */
+       actual = __d_instantiate_unique(dentry, inode);
+       if (!actual)
+               actual = dentry;
+       else if (unlikely(!d_unhashed(actual)))
+               goto shouldnt_be_hashed;
+
+found_lock:
+       spin_lock(&actual->d_lock);
+found:
+       _d_rehash(actual);
+       spin_unlock(&actual->d_lock);
+       spin_unlock(&dcache_lock);
+out_nolock:
+       if (actual == dentry) {
+               security_d_instantiate(dentry, inode);
+               return NULL;
+       }
+
+       iput(inode);
+       return actual;
+
+shouldnt_be_hashed:
+       spin_unlock(&dcache_lock);
+       BUG();
+}
+
+static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+{
+       *buflen -= namelen;
+       if (*buflen < 0)
+               return -ENAMETOOLONG;
+       *buffer -= namelen;
+       memcpy(*buffer, str, namelen);
+       return 0;
+}
+
+static int prepend_name(char **buffer, int *buflen, struct qstr *name)
+{
+       return prepend(buffer, buflen, name->name, name->len);
+}
+
+/**
+ * __d_path - return the path of a dentry
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry (may be modified by this function)
  * @buffer: buffer to return value in
  * @buflen: buffer length
  *
  * Convert a dentry into an ASCII path name. If the entry has been deleted
  * the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * Returns the buffer or an error code if the path was too long.
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
  *
  * "buflen" should be positive. Caller holds the dcache_lock.
+ *
+ * If path is not reachable from the supplied root, then the value of
+ * root is changed (without modifying refcounts).
  */
-static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
-                       struct dentry *root, struct vfsmount *rootmnt,
-                       char *buffer, int buflen)
-{
-       char * end = buffer+buflen;
-       char * retval;
-       int namelen;
-
-       *--end = '\0';
-       buflen--;
-       if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-               buflen -= 10;
-               end -= 10;
-               if (buflen < 0)
+char *__d_path(const struct path *path, struct path *root,
+              char *buffer, int buflen)
+{
+       struct dentry *dentry = path->dentry;
+       struct vfsmount *vfsmnt = path->mnt;
+       char *end = buffer + buflen;
+       char *retval;
+
+       spin_lock(&vfsmount_lock);
+       prepend(&end, &buflen, "\0", 1);
+       if (d_unlinked(dentry) &&
+               (prepend(&end, &buflen, " (deleted)", 10) != 0))
                        goto Elong;
-               memcpy(end, " (deleted)", 10);
-       }
 
        if (buflen < 1)
                goto Elong;
@@ -1431,68 +1925,146 @@ static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
        for (;;) {
                struct dentry * parent;
 
-               if (dentry == root && vfsmnt == rootmnt)
+               if (dentry == root->dentry && vfsmnt == root->mnt)
                        break;
                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
                        /* Global root? */
-                       spin_lock(&vfsmount_lock);
                        if (vfsmnt->mnt_parent == vfsmnt) {
-                               spin_unlock(&vfsmount_lock);
                                goto global_root;
                        }
                        dentry = vfsmnt->mnt_mountpoint;
                        vfsmnt = vfsmnt->mnt_parent;
-                       spin_unlock(&vfsmount_lock);
                        continue;
                }
                parent = dentry->d_parent;
                prefetch(parent);
-               namelen = dentry->d_name.len;
-               buflen -= namelen + 1;
-               if (buflen < 0)
+               if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
+                   (prepend(&end, &buflen, "/", 1) != 0))
                        goto Elong;
-               end -= namelen;
-               memcpy(end, dentry->d_name.name, namelen);
-               *--end = '/';
                retval = end;
                dentry = parent;
        }
 
+out:
+       spin_unlock(&vfsmount_lock);
        return retval;
 
 global_root:
-       namelen = dentry->d_name.len;
-       buflen -= namelen;
-       if (buflen < 0)
+       retval += 1;    /* hit the slash */
+       if (prepend_name(&retval, &buflen, &dentry->d_name) != 0)
                goto Elong;
-       retval -= namelen-1;    /* hit the slash */
-       memcpy(retval, dentry->d_name.name, namelen);
-       return retval;
+       root->mnt = vfsmnt;
+       root->dentry = dentry;
+       goto out;
+
 Elong:
-       return ERR_PTR(-ENAMETOOLONG);
+       retval = ERR_PTR(-ENAMETOOLONG);
+       goto out;
 }
 
-/* write full pathname into buffer and return start of pathname */
-char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
-                               char *buf, int buflen)
+/**
+ * d_path - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * the string " (deleted)" is appended. Note that this is ambiguous.
+ *
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
+ *
+ * "buflen" should be positive.
+ */
+char *d_path(const struct path *path, char *buf, int buflen)
 {
        char *res;
-       struct vfsmount *rootmnt;
-       struct dentry *root;
+       struct path root;
+       struct path tmp;
+
+       /*
+        * We have various synthetic filesystems that never get mounted.  On
+        * these filesystems dentries are never used for lookup purposes, and
+        * thus don't need to be hashed.  They also don't need a name until a
+        * user wants to identify the object in /proc/pid/fd/.  The little hack
+        * below allows us to generate a name for these objects on demand:
+        */
+       if (path->dentry->d_op && path->dentry->d_op->d_dname)
+               return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 
        read_lock(&current->fs->lock);
-       rootmnt = mntget(current->fs->rootmnt);
-       root = dget(current->fs->root);
+       root = current->fs->root;
+       path_get(&root);
        read_unlock(&current->fs->lock);
        spin_lock(&dcache_lock);
-       res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
+       tmp = root;
+       res = __d_path(path, &tmp, buf, buflen);
        spin_unlock(&dcache_lock);
-       dput(root);
-       mntput(rootmnt);
+       path_put(&root);
        return res;
 }
 
 /*
+ * Helper function for dentry_operations.d_dname() members
+ */
+char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
+                       const char *fmt, ...)
+{
+       va_list args;
+       char temp[64];
+       int sz;
+
+       va_start(args, fmt);
+       sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
+       va_end(args);
+
+       if (sz > sizeof(temp) || sz > buflen)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       buffer += buflen - sz;
+       return memcpy(buffer, temp, sz);
+}
+
+/*
+ * Write full pathname from the root of the filesystem into the buffer.
+ */
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+       char *end = buf + buflen;
+       char *retval;
+
+       spin_lock(&dcache_lock);
+       prepend(&end, &buflen, "\0", 1);
+       if (d_unlinked(dentry) &&
+               (prepend(&end, &buflen, "//deleted", 9) != 0))
+                       goto Elong;
+       if (buflen < 1)
+               goto Elong;
+       /* Get '/' right */
+       retval = end-1;
+       *retval = '/';
+
+       while (!IS_ROOT(dentry)) {
+               struct dentry *parent = dentry->d_parent;
+
+               prefetch(parent);
+               if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
+                   (prepend(&end, &buflen, "/", 1) != 0))
+                       goto Elong;
+
+               retval = end;
+               dentry = parent;
+       }
+       spin_unlock(&dcache_lock);
+       return retval;
+Elong:
+       spin_unlock(&dcache_lock);
+       return ERR_PTR(-ENAMETOOLONG);
+}
+
+/*
  * NOTE! The user-level library version returns a
  * character pointer. The kernel system call just
  * returns the length of the buffer filled (which
@@ -1510,31 +2082,30 @@ char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
  *             return NULL;
  *     }
  */
-asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
+SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 {
        int error;
-       struct vfsmount *pwdmnt, *rootmnt;
-       struct dentry *pwd, *root;
+       struct path pwd, root;
        char *page = (char *) __get_free_page(GFP_USER);
 
        if (!page)
                return -ENOMEM;
 
        read_lock(&current->fs->lock);
-       pwdmnt = mntget(current->fs->pwdmnt);
-       pwd = dget(current->fs->pwd);
-       rootmnt = mntget(current->fs->rootmnt);
-       root = dget(current->fs->root);
+       pwd = current->fs->pwd;
+       path_get(&pwd);
+       root = current->fs->root;
+       path_get(&root);
        read_unlock(&current->fs->lock);
 
        error = -ENOENT;
-       /* Has the current directory has been unlinked? */
        spin_lock(&dcache_lock);
-       if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
+       if (!d_unlinked(pwd.dentry)) {
                unsigned long len;
+               struct path tmp = root;
                char * cwd;
 
-               cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
+               cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE);
                spin_unlock(&dcache_lock);
 
                error = PTR_ERR(cwd);
@@ -1552,10 +2123,8 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
                spin_unlock(&dcache_lock);
 
 out:
-       dput(pwd);
-       mntput(pwdmnt);
-       dput(root);
-       mntput(rootmnt);
+       path_put(&pwd);
+       path_put(&root);
        free_page((unsigned long) page);
        return error;
 }
@@ -1576,32 +2145,26 @@ out:
  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
  */
   
-int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
+int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 {
        int result;
-       struct dentry * saved = new_dentry;
        unsigned long seq;
 
-       /* need rcu_readlock to protect against the d_parent trashing due to
-        * d_move
+       if (new_dentry == old_dentry)
+               return 1;
+
+       /*
+        * Need rcu_readlock to protect against the d_parent trashing
+        * due to d_move
         */
        rcu_read_lock();
-        do {
+       do {
                /* for restarting inner loop in case of seq retry */
-               new_dentry = saved;
-               result = 0;
                seq = read_seqbegin(&rename_lock);
-               for (;;) {
-                       if (new_dentry != old_dentry) {
-                               struct dentry * parent = new_dentry->d_parent;
-                               if (parent == new_dentry)
-                                       break;
-                               new_dentry = parent;
-                               continue;
-                       }
+               if (d_ancestor(old_dentry, new_dentry))
                        result = 1;
-                       break;
-               }
+               else
+                       result = 0;
        } while (read_seqretry(&rename_lock, seq));
        rcu_read_unlock();
 
@@ -1700,7 +2263,7 @@ static void __init dcache_init_early(void)
                INIT_HLIST_HEAD(&dentry_hashtable[loop]);
 }
 
-static void __init dcache_init(unsigned long mempages)
+static void __init dcache_init(void)
 {
        int loop;
 
@@ -1709,14 +2272,10 @@ static void __init dcache_init(unsigned long mempages)
         * but it is probably not worth it because of the cache nature
         * of the dcache. 
         */
-       dentry_cache = kmem_cache_create("dentry_cache",
-                                        sizeof(struct dentry),
-                                        0,
-                                        (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
-                                        SLAB_MEM_SPREAD),
-                                        NULL, NULL);
+       dentry_cache = KMEM_CACHE(dentry,
+               SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
        
-       set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
+       register_shrinker(&dcache_shrinker);
 
        /* Hash may have been set up in dcache_init_early */
        if (!hashdist)
@@ -1737,16 +2296,10 @@ static void __init dcache_init(unsigned long mempages)
 }
 
 /* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep __read_mostly;
-
-/* SLAB cache for file structures */
-kmem_cache_t *filp_cachep __read_mostly;
+struct kmem_cache *names_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
-extern void bdev_cache_init(void);
-extern void chrdev_init(void);
-
 void __init vfs_caches_init_early(void)
 {
        dcache_init_early();
@@ -1764,21 +2317,17 @@ void __init vfs_caches_init(unsigned long mempages)
        mempages -= reserve;
 
        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
-                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-
-       filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 
-       dcache_init(mempages);
-       inode_init(mempages);
+       dcache_init();
+       inode_init();
        files_init(mempages);
-       mnt_init(mempages);
+       mnt_init();
        bdev_cache_init();
        chrdev_init();
 }
 
 EXPORT_SYMBOL(d_alloc);
-EXPORT_SYMBOL(d_alloc_anon);
 EXPORT_SYMBOL(d_alloc_root);
 EXPORT_SYMBOL(d_delete);
 EXPORT_SYMBOL(d_find_alias);
@@ -1786,10 +2335,12 @@ EXPORT_SYMBOL(d_instantiate);
 EXPORT_SYMBOL(d_invalidate);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
 EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
 EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_add_ci);
 EXPORT_SYMBOL(d_validate);
 EXPORT_SYMBOL(dget_locked);
 EXPORT_SYMBOL(dput);