* the dcache entry is deleted or garbage collected.
*/
-#include <linux/config.h>
#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/slab.h>
#include <linux/init.h>
-#include <linux/smp_lock.h>
#include <linux/hash.h>
#include <linux/cache.h>
#include <linux/module.h>
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
+#include "internal.h"
-/* #define DCACHE_DEBUG 1 */
-int sysctl_vfs_cache_pressure = 100;
+int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
-seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
EXPORT_SYMBOL(dcache_lock);
-static kmem_cache_t *dentry_cache;
+static struct kmem_cache *dentry_cache __read_mostly;
#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
#define D_HASHBITS d_hash_shift
#define D_HASHMASK d_hash_mask
-static unsigned int d_hash_mask;
-static unsigned int d_hash_shift;
-static struct hlist_head *dentry_hashtable;
+static unsigned int d_hash_mask __read_mostly;
+static unsigned int d_hash_shift __read_mostly;
+static struct hlist_head *dentry_hashtable __read_mostly;
static LIST_HEAD(dentry_unused);
/* Statistics gathering. */
.age_limit = 45,
};
-static void d_callback(struct rcu_head *head)
+static void __d_free(struct dentry *dentry)
{
- struct dentry * dentry = container_of(head, struct dentry, d_rcu);
-
if (dname_external(dentry))
kfree(dentry->d_name.name);
kmem_cache_free(dentry_cache, dentry);
}
+static void d_callback(struct rcu_head *head)
+{
+ struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
+ __d_free(dentry);
+}
+
/*
* no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry
* inside dcache_lock.
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- call_rcu(&dentry->d_rcu, d_callback);
+ /* if dentry was never inserted into hash, immediate free is OK */
+ if (hlist_unhashed(&dentry->d_hash))
+ __d_free(dentry);
+ else
+ call_rcu(&dentry->d_u.d_rcu, d_callback);
+}
+
+static void dentry_lru_remove(struct dentry *dentry)
+{
+ if (!list_empty(&dentry->d_lru)) {
+ list_del_init(&dentry->d_lru);
+ dentry_stat.nr_unused--;
+ }
}
/*
* d_iput() operation if defined.
* Called with dcache_lock and per dentry lock held, drops both.
*/
-static inline void dentry_iput(struct dentry * dentry)
+static void dentry_iput(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
if (inode) {
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+ if (!inode->i_nlink)
+ fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
dentry->d_op->d_iput(dentry, inode);
else
}
}
+/**
+ * d_kill - kill dentry and return parent
+ * @dentry: dentry to kill
+ *
+ * Called with dcache_lock and d_lock, releases both. The dentry must
+ * already be unhashed and removed from the LRU.
+ *
+ * If this is the root of the dentry tree, return NULL.
+ */
+static struct dentry *d_kill(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ list_del(&dentry->d_u.d_child);
+ dentry_stat.nr_dentry--; /* For d_free, below */
+ /*drops the locks, at that point nobody can reach this dentry */
+ dentry_iput(dentry);
+ parent = dentry->d_parent;
+ d_free(dentry);
+ return dentry == parent ? NULL : parent;
+}
+
/*
* This is dput
*
unhash_it:
__d_drop(dentry);
-
-kill_it: {
- struct dentry *parent;
-
- /* If dentry was on d_lru list
- * delete it from there
- */
- if (!list_empty(&dentry->d_lru)) {
- list_del(&dentry->d_lru);
- dentry_stat.nr_unused--;
- }
- list_del(&dentry->d_child);
- dentry_stat.nr_dentry--; /* For d_free, below */
- /*drops the locks, at that point nobody can reach this dentry */
- dentry_iput(dentry);
- parent = dentry->d_parent;
- d_free(dentry);
- if (dentry == parent)
- return;
- dentry = parent;
+kill_it:
+ dentry_lru_remove(dentry);
+ dentry = d_kill(dentry);
+ if (dentry)
goto repeat;
- }
}
/**
static inline struct dentry * __dget_locked(struct dentry *dentry)
{
atomic_inc(&dentry->d_count);
- if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&dentry->d_lru);
- }
+ dentry_lru_remove(dentry);
return dentry;
}
* it can be unhashed only if it has no children, or if it is the root
* of a filesystem.
*
- * If the inode has a DCACHE_DISCONNECTED alias, then prefer
+ * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
* any other hashed alias over that one unless @want_discon is set,
- * in which case only return a DCACHE_DISCONNECTED alias.
+ * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
*/
static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
prefetch(next);
alias = list_entry(tmp, struct dentry, d_alias);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
- if (alias->d_flags & DCACHE_DISCONNECTED)
+ if (IS_ROOT(alias) &&
+ (alias->d_flags & DCACHE_DISCONNECTED))
discon_alias = alias;
else if (!want_discon) {
__dget_locked(alias);
struct dentry * d_find_alias(struct inode *inode)
{
- struct dentry *de;
- spin_lock(&dcache_lock);
- de = __d_find_alias(inode, 0);
- spin_unlock(&dcache_lock);
+ struct dentry *de = NULL;
+
+ if (!list_empty(&inode->i_dentry)) {
+ spin_lock(&dcache_lock);
+ de = __d_find_alias(inode, 0);
+ spin_unlock(&dcache_lock);
+ }
return de;
}
*/
void d_prune_aliases(struct inode *inode)
{
- struct list_head *tmp, *head = &inode->i_dentry;
+ struct dentry *dentry;
restart:
spin_lock(&dcache_lock);
- tmp = head;
- while ((tmp = tmp->next) != head) {
- struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
__dget_locked(dentry);
}
/*
- * Throw away a dentry - free the inode, dput the parent.
- * This requires that the LRU list has already been
- * removed.
+ * Throw away a dentry - free the inode, dput the parent. This requires that
+ * the LRU list has already been removed.
+ *
+ * Try to prune ancestors as well. This is necessary to prevent
+ * quadratic behavior of shrink_dcache_parent(), but is also expected
+ * to be beneficial in reducing dentry cache fragmentation.
+ *
* Called with dcache_lock, drops it and then regains.
+ * Called with dentry->d_lock held, drops it.
*/
-static inline void prune_one_dentry(struct dentry * dentry)
+static void prune_one_dentry(struct dentry * dentry)
{
- struct dentry * parent;
-
__d_drop(dentry);
- list_del(&dentry->d_child);
- dentry_stat.nr_dentry--; /* For d_free, below */
- dentry_iput(dentry);
- parent = dentry->d_parent;
- d_free(dentry);
- if (parent != dentry)
- dput(parent);
+ dentry = d_kill(dentry);
+
+ /*
+ * Prune ancestors. Locking is simpler than in dput(),
+ * because dcache_lock needs to be taken anyway.
+ */
spin_lock(&dcache_lock);
+ while (dentry) {
+ if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+ return;
+
+ if (dentry->d_op && dentry->d_op->d_delete)
+ dentry->d_op->d_delete(dentry);
+ dentry_lru_remove(dentry);
+ __d_drop(dentry);
+ dentry = d_kill(dentry);
+ spin_lock(&dcache_lock);
+ }
}
/**
* prune_dcache - shrink the dcache
* @count: number of entries to try and free
+ * @sb: if given, ignore dentries for other superblocks
+ * which are being unmounted.
*
* Shrink the dcache. This is done when we need
* more memory, or simply when we need to unmount
* all the dentries are in use.
*/
-static void prune_dcache(int count)
+static void prune_dcache(int count, struct super_block *sb)
{
spin_lock(&dcache_lock);
for (; count ; count--) {
struct dentry *dentry;
struct list_head *tmp;
+ struct rw_semaphore *s_umount;
cond_resched_lock(&dcache_lock);
tmp = dentry_unused.prev;
+ if (sb) {
+ /* Try to find a dentry for this sb, but don't try
+ * too hard, if they aren't near the tail they will
+ * be moved down again soon
+ */
+ int skip = count;
+ while (skip && tmp != &dentry_unused &&
+ list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
+ skip--;
+ tmp = tmp->prev;
+ }
+ }
if (tmp == &dentry_unused)
break;
list_del_init(tmp);
spin_unlock(&dentry->d_lock);
continue;
}
- prune_one_dentry(dentry);
+ /*
+ * If the dentry is not DCACHED_REFERENCED, it is time
+ * to remove it from the dcache, provided the super block is
+ * NULL (which means we are trying to reclaim memory)
+ * or this dentry belongs to the same super block that
+ * we want to shrink.
+ */
+ /*
+ * If this dentry is for "my" filesystem, then I can prune it
+ * without taking the s_umount lock (I already hold it).
+ */
+ if (sb && dentry->d_sb == sb) {
+ prune_one_dentry(dentry);
+ continue;
+ }
+ /*
+ * ...otherwise we need to be sure this filesystem isn't being
+ * unmounted, otherwise we could race with
+ * generic_shutdown_super(), and end up holding a reference to
+ * an inode while the filesystem is unmounted.
+ * So we try to get s_umount, and make sure s_root isn't NULL.
+ * (Take a local copy of s_umount to avoid a use-after-free of
+ * `dentry').
+ */
+ s_umount = &dentry->d_sb->s_umount;
+ if (down_read_trylock(s_umount)) {
+ if (dentry->d_sb->s_root != NULL) {
+ prune_one_dentry(dentry);
+ up_read(s_umount);
+ continue;
+ }
+ up_read(s_umount);
+ }
+ spin_unlock(&dentry->d_lock);
+ /*
+ * Insert dentry at the head of the list as inserting at the
+ * tail leads to a cycle.
+ */
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
}
spin_unlock(&dcache_lock);
}
* superblock to the most recent end of the unused list.
*/
spin_lock(&dcache_lock);
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_prev_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
- list_del(tmp);
- list_add(tmp, &dentry_unused);
+ list_move_tail(tmp, &dentry_unused);
}
/*
* Pass two ... free the dentries for this superblock.
*/
repeat:
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_prev_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
continue;
}
prune_one_dentry(dentry);
+ cond_resched_lock(&dcache_lock);
goto repeat;
}
spin_unlock(&dcache_lock);
}
/*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ * locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+ struct dentry *parent;
+ unsigned detached = 0;
+
+ BUG_ON(!IS_ROOT(dentry));
+
+ /* detach this root from the system */
+ spin_lock(&dcache_lock);
+ dentry_lru_remove(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dcache_lock);
+
+ for (;;) {
+ /* descend to the first leaf in the current subtree */
+ while (!list_empty(&dentry->d_subdirs)) {
+ struct dentry *loop;
+
+ /* this is a branch with children - detach all of them
+ * from the system in one go */
+ spin_lock(&dcache_lock);
+ list_for_each_entry(loop, &dentry->d_subdirs,
+ d_u.d_child) {
+ dentry_lru_remove(loop);
+ __d_drop(loop);
+ cond_resched_lock(&dcache_lock);
+ }
+ spin_unlock(&dcache_lock);
+
+ /* move to the first child */
+ dentry = list_entry(dentry->d_subdirs.next,
+ struct dentry, d_u.d_child);
+ }
+
+ /* consume the dentries from this leaf up through its parents
+ * until we find one with children or run out altogether */
+ do {
+ struct inode *inode;
+
+ if (atomic_read(&dentry->d_count) != 0) {
+ printk(KERN_ERR
+ "BUG: Dentry %p{i=%lx,n=%s}"
+ " still in use (%d)"
+ " [unmount of %s %s]\n",
+ dentry,
+ dentry->d_inode ?
+ dentry->d_inode->i_ino : 0UL,
+ dentry->d_name.name,
+ atomic_read(&dentry->d_count),
+ dentry->d_sb->s_type->name,
+ dentry->d_sb->s_id);
+ BUG();
+ }
+
+ parent = dentry->d_parent;
+ if (parent == dentry)
+ parent = NULL;
+ else
+ atomic_dec(&parent->d_count);
+
+ list_del(&dentry->d_u.d_child);
+ detached++;
+
+ inode = dentry->d_inode;
+ if (inode) {
+ dentry->d_inode = NULL;
+ list_del_init(&dentry->d_alias);
+ if (dentry->d_op && dentry->d_op->d_iput)
+ dentry->d_op->d_iput(dentry, inode);
+ else
+ iput(inode);
+ }
+
+ d_free(dentry);
+
+ /* finished when we fall off the top of the tree,
+ * otherwise we ascend to the parent and move to the
+ * next sibling if there is one */
+ if (!parent)
+ goto out;
+
+ dentry = parent;
+
+ } while (list_empty(&dentry->d_subdirs));
+
+ dentry = list_entry(dentry->d_subdirs.next,
+ struct dentry, d_u.d_child);
+ }
+out:
+ /* several dentries were freed, need to correct nr_dentry */
+ spin_lock(&dcache_lock);
+ dentry_stat.nr_dentry -= detached;
+ spin_unlock(&dcache_lock);
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ * removing the dentry from the system lists and hashes because:
+ * - the superblock is detached from all mountings and open files, so the
+ * dentry trees will not be rearranged by the VFS
+ * - s_umount is write-locked, so the memory pressure shrinker will ignore
+ * any dentries belonging to this superblock that it comes across
+ * - the filesystem itself is no longer permitted to rearrange the dentries
+ * in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+ struct dentry *dentry;
+
+ if (down_read_trylock(&sb->s_umount))
+ BUG();
+
+ dentry = sb->s_root;
+ sb->s_root = NULL;
+ atomic_dec(&dentry->d_count);
+ shrink_dcache_for_umount_subtree(dentry);
+
+ while (!hlist_empty(&sb->s_anon)) {
+ dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+ shrink_dcache_for_umount_subtree(dentry);
+ }
+}
+
+/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
/* Have we found a mount point ? */
if (d_mountpoint(dentry))
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
this_parent = this_parent->d_parent;
goto resume;
}
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
- if (!list_empty(&dentry->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&dentry->d_lru);
- }
+ dentry_lru_remove(dentry);
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
if (!atomic_read(&dentry->d_count)) {
- list_add(&dentry->d_lru, dentry_unused.prev);
+ list_add_tail(&dentry->d_lru, &dentry_unused);
dentry_stat.nr_unused++;
found++;
}
*/
if (!list_empty(&dentry->d_subdirs)) {
this_parent = dentry;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, found);
-#endif
goto repeat;
}
}
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
this_parent = this_parent->d_parent;
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
-this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
-#endif
goto resume;
}
out:
int found;
while ((found = select_parent(parent)) != 0)
- prune_dcache(found);
-}
-
-/**
- * shrink_dcache_anon - further prune the cache
- * @head: head of d_hash list of dentries to prune
- *
- * Prune the dentries that are anonymous
- *
- * parsing d_hash list does not hlist_for_each_rcu() as it
- * done under dcache_lock.
- *
- */
-void shrink_dcache_anon(struct hlist_head *head)
-{
- struct hlist_node *lp;
- int found;
- do {
- found = 0;
- spin_lock(&dcache_lock);
- hlist_for_each(lp, head) {
- struct dentry *this = hlist_entry(lp, struct dentry, d_hash);
- if (!list_empty(&this->d_lru)) {
- dentry_stat.nr_unused--;
- list_del_init(&this->d_lru);
- }
-
- /*
- * move only zero ref count dentries to the end
- * of the unused list for prune_dcache
- */
- if (!atomic_read(&this->d_count)) {
- list_add_tail(&this->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
- found++;
- }
- }
- spin_unlock(&dcache_lock);
- prune_dcache(found);
- } while(found);
+ prune_dcache(found, parent->d_sb);
}
/*
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
+static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
- prune_dcache(nr);
+ prune_dcache(nr, NULL);
}
return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
+static struct shrinker dcache_shrinker = {
+ .shrink = shrink_dcache_memory,
+ .seeks = DEFAULT_SEEKS,
+};
+
/**
* d_alloc - allocate a dcache entry
* @parent: parent of entry to allocate
struct dentry *dentry;
char *dname;
- dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
dentry->d_mounted = 0;
+#ifdef CONFIG_PROFILING
dentry->d_cookie = NULL;
+#endif
INIT_HLIST_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
dentry->d_parent = dget(parent);
dentry->d_sb = parent->d_sb;
} else {
- INIT_LIST_HEAD(&dentry->d_child);
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
}
spin_lock(&dcache_lock);
if (parent)
- list_add(&dentry->d_child, &parent->d_subdirs);
+ list_add(&dentry->d_u.d_child, &parent->d_subdirs);
dentry_stat.nr_dentry++;
spin_unlock(&dcache_lock);
void d_instantiate(struct dentry *entry, struct inode * inode)
{
- if (!list_empty(&entry->d_alias)) BUG();
+ BUG_ON(!list_empty(&entry->d_alias));
spin_lock(&dcache_lock);
if (inode)
list_add(&entry->d_alias, &inode->i_dentry);
entry->d_inode = inode;
+ fsnotify_d_instantiate(entry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
}
*
* Fill in inode information in the entry. On success, it returns NULL.
* If an unhashed alias of "entry" already exists, then we return the
- * aliased dentry instead.
+ * aliased dentry instead and drop one reference to inode.
*
* Note that in order to avoid conflicts with rename() etc, the caller
* had better be holding the parent directory semaphore.
+ *
+ * This also assumes that the inode count has been incremented
+ * (or otherwise set) by the caller to indicate that it is now
+ * in use by the dcache.
*/
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+ struct inode *inode)
{
struct dentry *alias;
int len = entry->d_name.len;
const char *name = entry->d_name.name;
unsigned int hash = entry->d_name.hash;
- BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_lock);
- if (!inode)
- goto do_negative;
+ if (!inode) {
+ entry->d_inode = NULL;
+ return NULL;
+ }
+
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;
if (memcmp(qstr->name, name, len))
continue;
dget_locked(alias);
- spin_unlock(&dcache_lock);
- BUG_ON(!d_unhashed(alias));
return alias;
}
+
list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
entry->d_inode = inode;
- spin_unlock(&dcache_lock);
- security_d_instantiate(entry, inode);
+ fsnotify_d_instantiate(entry, inode);
return NULL;
}
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+ struct dentry *result;
+
+ BUG_ON(!list_empty(&entry->d_alias));
+
+ spin_lock(&dcache_lock);
+ result = __d_instantiate_unique(entry, inode);
+ spin_unlock(&dcache_lock);
+
+ if (!result) {
+ security_d_instantiate(entry, inode);
+ return NULL;
+ }
+
+ BUG_ON(!d_unhashed(result));
+ iput(inode);
+ return result;
+}
+
EXPORT_SYMBOL(d_instantiate_unique);
/**
{
struct dentry *new = NULL;
- if (inode) {
+ if (inode && S_ISDIR(inode->i_mode)) {
spin_lock(&dcache_lock);
new = __d_find_alias(inode, 1);
if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
+ fsnotify_d_instantiate(new, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(new, inode);
d_rehash(dentry);
/* d_instantiate takes dcache_lock, so we do it by hand */
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
+ fsnotify_d_instantiate(dentry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
struct hlist_head *head = d_hash(parent,hash);
struct dentry *found = NULL;
struct hlist_node *node;
+ struct dentry *dentry;
rcu_read_lock();
- hlist_for_each_rcu(node, head) {
- struct dentry *dentry;
+ hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
struct qstr *qstr;
- dentry = hlist_entry(node, struct dentry, d_hash);
-
if (dentry->d_name.hash != hash)
continue;
if (dentry->d_parent != parent)
}
/**
+ * d_hash_and_lookup - hash the qstr then search for a dentry
+ * @dir: Directory to search in
+ * @name: qstr of name we wish to find
+ *
+ * On hash failure or on lookup failure NULL is returned.
+ */
+struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
+{
+ struct dentry *dentry = NULL;
+
+ /*
+ * Check for a fs-specific hash function. Note that we must
+ * calculate the standard hash first, as the d_op->d_hash()
+ * routine may choose to leave the hash value unchanged.
+ */
+ name->hash = full_name_hash(name->name, name->len);
+ if (dir->d_op && dir->d_op->d_hash) {
+ if (dir->d_op->d_hash(dir, name) < 0)
+ goto out;
+ }
+ dentry = d_lookup(dir, name);
+out:
+ return dentry;
+}
+
+/**
* d_validate - verify dentry provided from insecure source
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
spin_lock(&dcache_lock);
base = d_hash(dparent, dentry->d_name.hash);
hlist_for_each(lhp,base) {
- /* hlist_for_each_rcu() not required for d_hash list
+ /* hlist_for_each_entry_rcu() not required for d_hash list
* as it is parsed under dcache_lock
*/
if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
void d_delete(struct dentry * dentry)
{
+ int isdir = 0;
/*
* Are we the only user?
*/
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
+ isdir = S_ISDIR(dentry->d_inode->i_mode);
if (atomic_read(&dentry->d_count) == 1) {
dentry_iput(dentry);
+ fsnotify_nameremove(dentry, isdir);
return;
}
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+
+ fsnotify_nameremove(dentry, isdir);
}
static void __d_rehash(struct dentry * entry, struct hlist_head *list)
hlist_add_head_rcu(&entry->d_hash, list);
}
+static void _d_rehash(struct dentry * entry)
+{
+ __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
/**
* d_rehash - add an entry back to the hash
* @entry: dentry to add to the hash
void d_rehash(struct dentry * entry)
{
- struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
- __d_rehash(entry, list);
+ _d_rehash(entry);
spin_unlock(&entry->d_lock);
spin_unlock(&dcache_lock);
}
* dentry:internal, target:external. Steal target's
* storage and make target internal.
*/
+ memcpy(target->d_iname, dentry->d_name.name,
+ dentry->d_name.len + 1);
dentry->d_name.name = target->d_name.name;
target->d_name.name = target->d_iname;
}
* This forceful removal will result in ugly /proc output if
* somebody holds a file open that got deleted due to a rename.
* We could be nicer about the deleted file, and let it show
- * up under the name it got deleted rather than the name that
- * deleted it.
+ * up under the name it had before it was deleted rather than
+ * under the original name of the file that was moved on top of it.
*/
-/**
- * d_move - move a dentry
+/*
+ * d_move_locked - move a dentry
* @dentry: entry to move
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
* dcache entries should not be moved in this way.
*/
-
-void d_move(struct dentry * dentry, struct dentry * target)
+static void d_move_locked(struct dentry * dentry, struct dentry * target)
{
struct hlist_head *list;
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
- spin_lock(&dcache_lock);
write_seqlock(&rename_lock);
/*
* XXXX: do we really need to take target->d_lock?
*/
if (target < dentry) {
spin_lock(&target->d_lock);
- spin_lock(&dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
} else {
spin_lock(&dentry->d_lock);
- spin_lock(&target->d_lock);
+ spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
}
/* Move the dentry to the target hash queue, if on different bucket */
- if (dentry->d_flags & DCACHE_UNHASHED)
+ if (d_unhashed(dentry))
goto already_unhashed;
hlist_del_rcu(&dentry->d_hash);
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
- list_del(&dentry->d_child);
- list_del(&target->d_child);
+ list_del(&dentry->d_u.d_child);
+ list_del(&target->d_u.d_child);
/* Switch the names.. */
switch_names(dentry, target);
if (IS_ROOT(dentry)) {
dentry->d_parent = target->d_parent;
target->d_parent = target;
- INIT_LIST_HEAD(&target->d_child);
+ INIT_LIST_HEAD(&target->d_u.d_child);
} else {
do_switch(dentry->d_parent, target->d_parent);
/* And add them back to the (new) parent lists */
- list_add(&target->d_child, &target->d_parent->d_subdirs);
+ list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
}
- list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
+ list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
spin_unlock(&target->d_lock);
+ fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
+}
+
+/**
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+
+void d_move(struct dentry * dentry, struct dentry * target)
+{
+ spin_lock(&dcache_lock);
+ d_move_locked(dentry, target);
spin_unlock(&dcache_lock);
}
+/*
+ * Helper that returns 1 if p1 is a parent of p2, else 0
+ */
+static int d_isparent(struct dentry *p1, struct dentry *p2)
+{
+ struct dentry *p;
+
+ for (p = p2; p->d_parent != p; p = p->d_parent) {
+ if (p->d_parent == p1)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * This helper attempts to cope with remotely renamed directories
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ *
+ * Note: If ever the locking in lock_rename() changes, then please
+ * remember to update this too...
+ *
+ * On return, dcache_lock will have been unlocked.
+ */
+static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+{
+ struct mutex *m1 = NULL, *m2 = NULL;
+ struct dentry *ret;
+
+ /* If alias and dentry share a parent, then no extra locks required */
+ if (alias->d_parent == dentry->d_parent)
+ goto out_unalias;
+
+ /* Check for loops */
+ ret = ERR_PTR(-ELOOP);
+ if (d_isparent(alias, dentry))
+ goto out_err;
+
+ /* See lock_rename() */
+ ret = ERR_PTR(-EBUSY);
+ if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
+ goto out_err;
+ m1 = &dentry->d_sb->s_vfs_rename_mutex;
+ if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+ goto out_err;
+ m2 = &alias->d_parent->d_inode->i_mutex;
+out_unalias:
+ d_move_locked(alias, dentry);
+ ret = alias;
+out_err:
+ spin_unlock(&dcache_lock);
+ if (m2)
+ mutex_unlock(m2);
+ if (m1)
+ mutex_unlock(m1);
+ return ret;
+}
+
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+ struct dentry *dparent, *aparent;
+
+ switch_names(dentry, anon);
+ do_switch(dentry->d_name.len, anon->d_name.len);
+ do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+ dparent = dentry->d_parent;
+ aparent = anon->d_parent;
+
+ dentry->d_parent = (aparent == anon) ? dentry : aparent;
+ list_del(&dentry->d_u.d_child);
+ if (!IS_ROOT(dentry))
+ list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ else
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+ anon->d_parent = (dparent == dentry) ? anon : dparent;
+ list_del(&anon->d_u.d_child);
+ if (!IS_ROOT(anon))
+ list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+ else
+ INIT_LIST_HEAD(&anon->d_u.d_child);
+
+ anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+ struct dentry *actual;
+
+ BUG_ON(!d_unhashed(dentry));
+
+ spin_lock(&dcache_lock);
+
+ if (!inode) {
+ actual = dentry;
+ dentry->d_inode = NULL;
+ goto found_lock;
+ }
+
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *alias;
+
+ /* Does an aliased dentry already exist? */
+ alias = __d_find_alias(inode, 0);
+ if (alias) {
+ actual = alias;
+ /* Is this an anonymous mountpoint that we could splice
+ * into our tree? */
+ if (IS_ROOT(alias)) {
+ spin_lock(&alias->d_lock);
+ __d_materialise_dentry(dentry, alias);
+ __d_drop(alias);
+ goto found;
+ }
+ /* Nope, but we must(!) avoid directory aliasing */
+ actual = __d_unalias(dentry, alias);
+ if (IS_ERR(actual))
+ dput(alias);
+ goto out_nolock;
+ }
+ }
+
+ /* Add a unique reference */
+ actual = __d_instantiate_unique(dentry, inode);
+ if (!actual)
+ actual = dentry;
+ else if (unlikely(!d_unhashed(actual)))
+ goto shouldnt_be_hashed;
+
+found_lock:
+ spin_lock(&actual->d_lock);
+found:
+ _d_rehash(actual);
+ spin_unlock(&actual->d_lock);
+ spin_unlock(&dcache_lock);
+out_nolock:
+ if (actual == dentry) {
+ security_d_instantiate(dentry, inode);
+ return NULL;
+ }
+
+ iput(inode);
+ return actual;
+
+shouldnt_be_hashed:
+ spin_unlock(&dcache_lock);
+ BUG();
+ goto shouldnt_be_hashed;
+}
+
/**
* d_path - return the path of a dentry
* @dentry: dentry to report
*
* "buflen" should be positive. Caller holds the dcache_lock.
*/
-static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
- struct dentry *root, struct vfsmount *rootmnt,
- char *buffer, int buflen)
+static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+ struct path *root, char *buffer, int buflen)
{
char * end = buffer+buflen;
char * retval;
for (;;) {
struct dentry * parent;
- if (dentry == root && vfsmnt == rootmnt)
+ if (dentry == root->dentry && vfsmnt == root->mnt)
break;
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
/* Global root? */
return ERR_PTR(-ENAMETOOLONG);
}
-/* write full pathname into buffer and return start of pathname */
-char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
- char *buf, int buflen)
+/**
+ * d_path - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * the string " (deleted)" is appended. Note that this is ambiguous.
+ *
+ * Returns the buffer or an error code if the path was too long.
+ *
+ * "buflen" should be positive. Caller holds the dcache_lock.
+ */
+char *d_path(struct path *path, char *buf, int buflen)
{
char *res;
- struct vfsmount *rootmnt;
- struct dentry *root;
+ struct path root;
+
+ /*
+ * We have various synthetic filesystems that never get mounted. On
+ * these filesystems dentries are never used for lookup purposes, and
+ * thus don't need to be hashed. They also don't need a name until a
+ * user wants to identify the object in /proc/pid/fd/. The little hack
+ * below allows us to generate a name for these objects on demand:
+ */
+ if (path->dentry->d_op && path->dentry->d_op->d_dname)
+ return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
read_lock(¤t->fs->lock);
- rootmnt = mntget(current->fs->rootmnt);
- root = dget(current->fs->root);
+ root = current->fs->root;
+ path_get(¤t->fs->root);
read_unlock(¤t->fs->lock);
spin_lock(&dcache_lock);
- res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
+ res = __d_path(path->dentry, path->mnt, &root, buf, buflen);
spin_unlock(&dcache_lock);
- dput(root);
- mntput(rootmnt);
+ path_put(&root);
return res;
}
/*
+ * Helper function for dentry_operations.d_dname() members
+ */
+char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
+ const char *fmt, ...)
+{
+ va_list args;
+ char temp[64];
+ int sz;
+
+ va_start(args, fmt);
+ sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
+ va_end(args);
+
+ if (sz > sizeof(temp) || sz > buflen)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ buffer += buflen - sz;
+ return memcpy(buffer, temp, sz);
+}
+
+/*
* NOTE! The user-level library version returns a
* character pointer. The kernel system call just
* returns the length of the buffer filled (which
asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
{
int error;
- struct vfsmount *pwdmnt, *rootmnt;
- struct dentry *pwd, *root;
+ struct path pwd, root;
char *page = (char *) __get_free_page(GFP_USER);
if (!page)
return -ENOMEM;
read_lock(¤t->fs->lock);
- pwdmnt = mntget(current->fs->pwdmnt);
- pwd = dget(current->fs->pwd);
- rootmnt = mntget(current->fs->rootmnt);
- root = dget(current->fs->root);
+ pwd = current->fs->pwd;
+ path_get(¤t->fs->pwd);
+ root = current->fs->root;
+ path_get(¤t->fs->root);
read_unlock(¤t->fs->lock);
error = -ENOENT;
/* Has the current directory has been unlinked? */
spin_lock(&dcache_lock);
- if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
+ if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) {
unsigned long len;
char * cwd;
- cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
+ cwd = __d_path(pwd.dentry, pwd.mnt, &root, page, PAGE_SIZE);
spin_unlock(&dcache_lock);
error = PTR_ERR(cwd);
spin_unlock(&dcache_lock);
out:
- dput(pwd);
- mntput(pwdmnt);
- dput(root);
- mntput(rootmnt);
+ path_put(&pwd);
+ path_put(&root);
free_page((unsigned long) page);
return error;
}
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
if (d_unhashed(dentry)||!dentry->d_inode)
continue;
atomic_dec(&dentry->d_count);
}
if (this_parent != root) {
- next = this_parent->d_child.next;
+ next = this_parent->d_u.d_child.next;
atomic_dec(&this_parent->d_count);
this_parent = this_parent->d_parent;
goto resume;
struct dentry * dentry;
ino_t ino = 0;
- /*
- * Check for a fs-specific hash function. Note that we must
- * calculate the standard hash first, as the d_op->d_hash()
- * routine may choose to leave the hash value unchanged.
- */
- name->hash = full_name_hash(name->name, name->len);
- if (dir->d_op && dir->d_op->d_hash)
- {
- if (dir->d_op->d_hash(dir, name) != 0)
- goto out;
- }
-
- dentry = d_lookup(dir, name);
- if (dentry)
- {
+ dentry = d_hash_and_lookup(dir, name);
+ if (dentry) {
if (dentry->d_inode)
ino = dentry->d_inode->i_ino;
dput(dentry);
}
-out:
return ino;
}
INIT_HLIST_HEAD(&dentry_hashtable[loop]);
}
-static void __init dcache_init(unsigned long mempages)
+static void __init dcache_init(void)
{
int loop;
* but it is probably not worth it because of the cache nature
* of the dcache.
*/
- dentry_cache = kmem_cache_create("dentry_cache",
- sizeof(struct dentry),
- 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_PANIC,
- NULL, NULL);
+ dentry_cache = KMEM_CACHE(dentry,
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
- set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory);
+ register_shrinker(&dcache_shrinker);
/* Hash may have been set up in dcache_init_early */
if (!hashdist)
}
/* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep;
+struct kmem_cache *names_cachep __read_mostly;
/* SLAB cache for file structures */
-kmem_cache_t *filp_cachep;
+struct kmem_cache *filp_cachep __read_mostly;
EXPORT_SYMBOL(d_genocide);
-extern void bdev_cache_init(void);
-extern void chrdev_init(void);
-
void __init vfs_caches_init_early(void)
{
dcache_init_early();
mempages -= reserve;
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- dcache_init(mempages);
- inode_init(mempages);
+ dcache_init();
+ inode_init();
files_init(mempages);
- mnt_init(mempages);
+ mnt_init();
bdev_cache_init();
chrdev_init();
}
EXPORT_SYMBOL(d_invalidate);
EXPORT_SYMBOL(d_lookup);
EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
EXPORT_SYMBOL(d_path);
EXPORT_SYMBOL(d_prune_aliases);
EXPORT_SYMBOL(d_rehash);