X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Finotify.c;h=7bbed1b89825f8b37f4fa753201290d0002bf618;hb=6358d6cb328a772eb6f02c870da61709d539480f;hp=7457501b95656087cfe27339b94d3bace7585782;hpb=b5e618181a927210f8be1d3d2249d31904ba358d;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/inotify.c b/fs/inotify.c index 7457501..7bbed1b 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(get_inotify_watch); +int pin_inotify_watch(struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + spin_lock(&sb_lock); + if (sb->s_count >= S_BIAS) { + atomic_inc(&sb->s_active); + spin_unlock(&sb_lock); + atomic_inc(&watch->count); + return 1; + } + spin_unlock(&sb_lock); + return 0; +} + /** * put_inotify_watch - decrements the ref count on a given watch. cleans up * watch references if the count reaches zero. inotify_watch is freed by @@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(put_inotify_watch); +void unpin_inotify_watch(struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + put_inotify_watch(watch); + deactivate_super(sb); +} + /* * inotify_handle_get_wd - returns the next WD for use by the given handle * @@ -168,20 +189,14 @@ static void set_dentry_child_flags(struct inode *inode, int watched) struct dentry *child; list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { - if (!child->d_inode) { - WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); + if (!child->d_inode) continue; - } + spin_lock(&child->d_lock); - if (watched) { - WARN_ON(child->d_flags & - DCACHE_INOTIFY_PARENT_WATCHED); + if (watched) child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; - } else { - WARN_ON(!(child->d_flags & - DCACHE_INOTIFY_PARENT_WATCHED)); - child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED; - } + else + child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } } @@ -253,7 +268,6 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode) if (!inode) return; - WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); spin_lock(&entry->d_lock); parent = entry->d_parent; if (parent->d_inode && inotify_inode_watched(parent->d_inode)) @@ -486,6 +500,112 @@ void inotify_init_watch(struct inotify_watch *watch) } EXPORT_SYMBOL_GPL(inotify_init_watch); +/* + * Watch removals suck violently. To kick the watch out we need (in this + * order) inode->inotify_mutex and ih->mutex. That's fine if we have + * a hold on inode; however, for all other cases we need to make damn sure + * we don't race with umount. We can *NOT* just grab a reference to a + * watch - inotify_unmount_inodes() will happily sail past it and we'll end + * with reference to inode potentially outliving its superblock. Ideally + * we just want to grab an active reference to superblock if we can; that + * will make sure we won't go into inotify_umount_inodes() until we are + * done. Cleanup is just deactivate_super(). However, that leaves a messy + * case - what if we *are* racing with umount() and active references to + * superblock can't be acquired anymore? We can bump ->s_count, grab + * ->s_umount, which will almost certainly wait until the superblock is shut + * down and the watch in question is pining for fjords. That's fine, but + * there is a problem - we might have hit the window between ->s_active + * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock + * is past the point of no return and is heading for shutdown) and the + * moment when deactivate_super() acquires ->s_umount. We could just do + * drop_super() yield() and retry, but that's rather antisocial and this + * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having + * found that we'd got there first (i.e. that ->s_root is non-NULL) we know + * that we won't race with inotify_umount_inodes(). So we could grab a + * reference to watch and do the rest as above, just with drop_super() instead + * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we + * could grab ->s_umount. So the watch could've been gone already. + * + * That still can be dealt with - we need to save watch->wd, do idr_find() + * and compare its result with our pointer. If they match, we either have + * the damn thing still alive or we'd lost not one but two races at once, + * the watch had been killed and a new one got created with the same ->wd + * at the same address. That couldn't have happened in inotify_destroy(), + * but inotify_rm_wd() could run into that. Still, "new one got created" + * is not a problem - we have every right to kill it or leave it alone, + * whatever's more convenient. + * + * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as + * "grab it and kill it" check. If it's been our original watch, we are + * fine, if it's a newcomer - nevermind, just pretend that we'd won the + * race and kill the fscker anyway; we are safe since we know that its + * superblock won't be going away. + * + * And yes, this is far beyond mere "not very pretty"; so's the entire + * concept of inotify to start with. + */ + +/** + * pin_to_kill - pin the watch down for removal + * @ih: inotify handle + * @watch: watch to kill + * + * Called with ih->mutex held, drops it. Possible return values: + * 0 - nothing to do, it has died + * 1 - remove it, drop the reference and deactivate_super() + * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid + * that variant, since it involved a lot of PITA, but that's the best that + * could've been done. + */ +static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) +{ + struct super_block *sb = watch->inode->i_sb; + s32 wd = watch->wd; + + spin_lock(&sb_lock); + if (sb->s_count >= S_BIAS) { + atomic_inc(&sb->s_active); + spin_unlock(&sb_lock); + get_inotify_watch(watch); + mutex_unlock(&ih->mutex); + return 1; /* the best outcome */ + } + sb->s_count++; + spin_unlock(&sb_lock); + mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ + down_read(&sb->s_umount); + if (likely(!sb->s_root)) { + /* fs is already shut down; the watch is dead */ + drop_super(sb); + return 0; + } + /* raced with the final deactivate_super() */ + mutex_lock(&ih->mutex); + if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { + /* the watch is dead */ + mutex_unlock(&ih->mutex); + drop_super(sb); + return 0; + } + /* still alive or freed and reused with the same sb and wd; kill */ + get_inotify_watch(watch); + mutex_unlock(&ih->mutex); + return 2; +} + +static void unpin_and_kill(struct inotify_watch *watch, int how) +{ + struct super_block *sb = watch->inode->i_sb; + put_inotify_watch(watch); + switch (how) { + case 1: + deactivate_super(sb); + break; + case 2: + drop_super(sb); + } +} + /** * inotify_destroy - clean up and destroy an inotify instance * @ih: inotify handle @@ -497,11 +617,15 @@ void inotify_destroy(struct inotify_handle *ih) * pretty. We cannot do a simple iteration over the list, because we * do not know the inode until we iterate to the watch. But we need to * hold inode->inotify_mutex before ih->mutex. The following works. + * + * AV: it had to become even uglier to start working ;-/ */ while (1) { struct inotify_watch *watch; struct list_head *watches; + struct super_block *sb; struct inode *inode; + int how; mutex_lock(&ih->mutex); watches = &ih->watches; @@ -510,8 +634,10 @@ void inotify_destroy(struct inotify_handle *ih) break; } watch = list_first_entry(watches, struct inotify_watch, h_list); - get_inotify_watch(watch); - mutex_unlock(&ih->mutex); + sb = watch->inode->i_sb; + how = pin_to_kill(ih, watch); + if (!how) + continue; inode = watch->inode; mutex_lock(&inode->inotify_mutex); @@ -525,7 +651,7 @@ void inotify_destroy(struct inotify_handle *ih) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(watch); + unpin_and_kill(watch, how); } /* free this handle: the put matching the get in inotify_init() */ @@ -627,6 +753,7 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, struct inode *inode, u32 mask) { int ret = 0; + int newly_watched; /* don't allow invalid bits: we don't want flags set */ mask &= IN_ALL_EVENTS | IN_ONESHOT; @@ -653,12 +780,18 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, */ watch->inode = igrab(inode); - if (!inotify_inode_watched(inode)) - set_dentry_child_flags(inode, 1); - /* Add the watch to the handle's and the inode's list */ + newly_watched = !inotify_inode_watched(inode); list_add(&watch->h_list, &ih->watches); list_add(&watch->i_list, &inode->inotify_watches); + /* + * Set child flags _after_ adding the watch, so there is no race + * windows where newly instantiated children could miss their parent's + * watched flag. + */ + if (newly_watched) + set_dentry_child_flags(inode, 1); + out: mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); @@ -667,6 +800,49 @@ out: EXPORT_SYMBOL_GPL(inotify_add_watch); /** + * inotify_clone_watch - put the watch next to existing one + * @old: already installed watch + * @new: new watch + * + * Caller must hold the inotify_mutex of inode we are dealing with; + * it is expected to remove the old watch before unlocking the inode. + */ +s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new) +{ + struct inotify_handle *ih = old->ih; + int ret = 0; + + new->mask = old->mask; + new->ih = ih; + + mutex_lock(&ih->mutex); + + /* Initialize a new watch */ + ret = inotify_handle_get_wd(ih, new); + if (unlikely(ret)) + goto out; + ret = new->wd; + + get_inotify_handle(ih); + + new->inode = igrab(old->inode); + + list_add(&new->h_list, &ih->watches); + list_add(&new->i_list, &old->inode->inotify_watches); +out: + mutex_unlock(&ih->mutex); + return ret; +} + +void inotify_evict_watch(struct inotify_watch *watch) +{ + get_inotify_watch(watch); + mutex_lock(&watch->ih->mutex); + inotify_remove_watch_locked(watch->ih, watch); + mutex_unlock(&watch->ih->mutex); +} + +/** * inotify_rm_wd - remove a watch from an inotify instance * @ih: inotify handle * @wd: watch descriptor to remove @@ -676,7 +852,9 @@ EXPORT_SYMBOL_GPL(inotify_add_watch); int inotify_rm_wd(struct inotify_handle *ih, u32 wd) { struct inotify_watch *watch; + struct super_block *sb; struct inode *inode; + int how; mutex_lock(&ih->mutex); watch = idr_find(&ih->idr, wd); @@ -684,9 +862,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) mutex_unlock(&ih->mutex); return -EINVAL; } - get_inotify_watch(watch); + sb = watch->inode->i_sb; + how = pin_to_kill(ih, watch); + if (!how) + return 0; + inode = watch->inode; - mutex_unlock(&ih->mutex); mutex_lock(&inode->inotify_mutex); mutex_lock(&ih->mutex); @@ -697,7 +878,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(watch); + unpin_and_kill(watch, how); return 0; }