X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fgfs2%2Fglock.c;h=8b674b1f3a554d4300e7965328f750c13b34bbe1;hb=746cd1e7e4a555ddaee53b19a46e05c9c61eaf09;hp=d636b3e80f5d26dda0a0f799011da225c5ef24ba;hpb=58e9fee13e579df44922172dbe3c9e3ba3edf7a3;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d636b3e..8b674b1 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -18,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -40,32 +38,35 @@ #include "quota.h" #include "super.h" #include "util.h" +#include "bmap.h" +#define CREATE_TRACE_POINTS +#include "trace_gfs2.h" struct gfs2_gl_hash_bucket { struct hlist_head hb_list; }; -struct glock_iter { - int hash; /* hash bucket index */ - struct gfs2_sbd *sdp; /* incore superblock */ - struct gfs2_glock *gl; /* current glock struct */ - struct seq_file *seq; /* sequence file for debugfs */ - char string[512]; /* scratch space */ +struct gfs2_glock_iter { + int hash; /* hash bucket index */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + char string[512]; /* scratch space */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); -static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); -static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); -static void gfs2_glock_drop_th(struct gfs2_glock *gl); -static void run_queue(struct gfs2_glock *gl); +static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); +#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static DECLARE_RWSEM(gfs2_umount_flush_sem); static struct dentry *gfs2_root; -static struct task_struct *scand_process; -static unsigned int scand_secs = 5; static struct workqueue_struct *glock_workqueue; +struct workqueue_struct *gfs2_delete_workqueue; +static LIST_HEAD(lru_list); +static atomic_t lru_count = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) @@ -123,33 +124,6 @@ static inline rwlock_t *gl_lock_addr(unsigned int x) #endif /** - * relaxed_state_ok - is a requested lock compatible with the current lock mode? - * @actual: the current state of the lock - * @requested: the lock state that was requested by the caller - * @flags: the modifier flags passed in by the caller - * - * Returns: 1 if the locks are compatible, 0 otherwise - */ - -static inline int relaxed_state_ok(unsigned int actual, unsigned requested, - int flags) -{ - if (actual == requested) - return 1; - - if (flags & GL_EXACT) - return 0; - - if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED) - return 1; - - if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY)) - return 1; - - return 0; -} - -/** * gl_hash() - Turn glock number into hash bucket number * @lock: The glock number * @@ -182,13 +156,10 @@ static void glock_free(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct inode *aspace = gl->gl_aspace; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); - if (aspace) gfs2_aspace_put(aspace); - - kmem_cache_free(gfs2_glock_cachep, gl); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); } /** @@ -197,12 +168,69 @@ static void glock_free(struct gfs2_glock *gl) * */ -static void gfs2_glock_hold(struct gfs2_glock *gl) +void gfs2_glock_hold(struct gfs2_glock *gl) { + GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); atomic_inc(&gl->gl_ref); } /** + * demote_ok - Check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: 1 if it's ok + */ + +static int demote_ok(const struct gfs2_glock *gl) +{ + const struct gfs2_glock_operations *glops = gl->gl_ops; + + if (gl->gl_state == LM_ST_UNLOCKED) + return 0; + if (!list_empty(&gl->gl_holders)) + return 0; + if (glops->go_demote_ok) + return glops->go_demote_ok(gl); + return 1; +} + +/** + * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list + * @gl: the glock + * + */ + +static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) +{ + int may_reclaim; + may_reclaim = (demote_ok(gl) && + (atomic_read(&gl->gl_ref) == 1 || + (gl->gl_name.ln_type == LM_TYPE_INODE && + atomic_read(&gl->gl_ref) <= 2))); + spin_lock(&lru_lock); + if (list_empty(&gl->gl_lru) && may_reclaim) { + list_add_tail(&gl->gl_lru, &lru_list); + atomic_inc(&lru_count); + } + spin_unlock(&lru_lock); +} + +/** + * gfs2_glock_put_nolock() - Decrement reference count on glock + * @gl: The glock to put + * + * This function should only be used if the caller has its own reference + * to the glock, in addition to the one it is dropping. + */ + +void gfs2_glock_put_nolock(struct gfs2_glock *gl) +{ + if (atomic_dec_and_test(&gl->gl_ref)) + GLOCK_BUG_ON(gl, 1); + gfs2_glock_schedule_for_reclaim(gl); +} + +/** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put * @@ -211,21 +239,25 @@ static void gfs2_glock_hold(struct gfs2_glock *gl) int gfs2_glock_put(struct gfs2_glock *gl) { int rv = 0; - struct gfs2_sbd *sdp = gl->gl_sbd; write_lock(gl_lock_addr(gl->gl_hash)); if (atomic_dec_and_test(&gl->gl_ref)) { hlist_del(&gl->gl_list); write_unlock(gl_lock_addr(gl->gl_hash)); - gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); - gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); - gfs2_assert(sdp, list_empty(&gl->gl_holders)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); + spin_lock(&lru_lock); + if (!list_empty(&gl->gl_lru)) { + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); + } + spin_unlock(&lru_lock); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); glock_free(gl); rv = 1; goto out; } + spin_lock(&gl->gl_spin); + gfs2_glock_schedule_for_reclaim(gl); + spin_unlock(&gl->gl_spin); write_unlock(gl_lock_addr(gl->gl_hash)); out: return rv; @@ -261,842 +293,667 @@ static struct gfs2_glock *search_bucket(unsigned int hash, } /** - * gfs2_glock_find() - Find glock by lock number - * @sdp: The GFS2 superblock - * @name: The lock name + * may_grant - check if its ok to grant a new lock + * @gl: The glock + * @gh: The lock request which we wish to grant * - * Returns: NULL, or the struct gfs2_glock with the requested number + * Returns: true if its ok to grant the lock */ -static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, - const struct lm_lockname *name) -{ - unsigned int hash = gl_hash(sdp, name); - struct gfs2_glock *gl; - - read_lock(gl_lock_addr(hash)); - gl = search_bucket(hash, sdp, name); - read_unlock(gl_lock_addr(hash)); - - return gl; -} - -static void glock_work_func(struct work_struct *work) +static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) { - struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); - - spin_lock(&gl->gl_spin); - if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) - set_bit(GLF_DEMOTE, &gl->gl_flags); - run_queue(gl); - spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); + const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list); + if ((gh->gh_state == LM_ST_EXCLUSIVE || + gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) + return 0; + if (gl->gl_state == gh->gh_state) + return 1; + if (gh->gh_flags & GL_EXACT) + return 0; + if (gl->gl_state == LM_ST_EXCLUSIVE) { + if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED) + return 1; + if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED) + return 1; + } + if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY)) + return 1; + return 0; } -static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, - void **lockp) +static void gfs2_holder_wake(struct gfs2_holder *gh) { - int error = -EIO; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - error = sdp->sd_lockstruct.ls_ops->lm_get_lock( - sdp->sd_lockstruct.ls_lockspace, name, lockp); - return error; + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb__after_clear_bit(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); } /** - * gfs2_glock_get() - Get a glock, or create one if one doesn't exist - * @sdp: The GFS2 superblock - * @number: the lock number - * @glops: The glock_operations to use - * @create: If 0, don't create the glock if it doesn't exist - * @glp: the glock is returned here - * - * This does not lock a glock, just finds/creates structures for one. - * - * Returns: errno + * do_promote - promote as many requests as possible on the current queue + * @gl: The glock + * + * Returns: 1 if there is a blocked holder at the head of the list, or 2 + * if a type specific operation is underway. */ -int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, int create, - struct gfs2_glock **glp) +static int do_promote(struct gfs2_glock *gl) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) { - struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; - struct gfs2_glock *gl, *tmp; - unsigned int hash = gl_hash(sdp, &name); - int error; - - read_lock(gl_lock_addr(hash)); - gl = search_bucket(hash, sdp, &name); - read_unlock(gl_lock_addr(hash)); - - if (gl || !create) { - *glp = gl; - return 0; - } - - gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); - if (!gl) - return -ENOMEM; - - gl->gl_flags = 0; - gl->gl_name = name; - atomic_set(&gl->gl_ref, 1); - gl->gl_state = LM_ST_UNLOCKED; - gl->gl_demote_state = LM_ST_EXCLUSIVE; - gl->gl_hash = hash; - gl->gl_owner_pid = NULL; - gl->gl_ip = 0; - gl->gl_ops = glops; - gl->gl_req_gh = NULL; - gl->gl_stamp = jiffies; - gl->gl_tchange = jiffies; - gl->gl_object = NULL; - gl->gl_sbd = sdp; - gl->gl_aspace = NULL; - INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_holder *gh, *tmp; + int ret; - /* If this glock protects actual on-disk data or metadata blocks, - create a VFS inode to manage the pages/buffers holding them. */ - if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) { - gl->gl_aspace = gfs2_aspace_get(sdp); - if (!gl->gl_aspace) { - error = -ENOMEM; - goto fail; +restart: + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (may_grant(gl, gh)) { + if (gh->gh_list.prev == &gl->gl_holders && + glops->go_lock) { + spin_unlock(&gl->gl_spin); + /* FIXME: eliminate this eventually */ + ret = glops->go_lock(gh); + spin_lock(&gl->gl_spin); + if (ret) { + if (ret == 1) + return 2; + gh->gh_error = ret; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + goto restart; + } + set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 1); + gfs2_holder_wake(gh); + goto restart; + } + set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh, 0); + gfs2_holder_wake(gh); + continue; } + if (gh->gh_list.prev == &gl->gl_holders) + return 1; + break; } - - error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock); - if (error) - goto fail_aspace; - - write_lock(gl_lock_addr(hash)); - tmp = search_bucket(hash, sdp, &name); - if (tmp) { - write_unlock(gl_lock_addr(hash)); - glock_free(gl); - gl = tmp; - } else { - hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); - write_unlock(gl_lock_addr(hash)); - } - - *glp = gl; - return 0; - -fail_aspace: - if (gl->gl_aspace) - gfs2_aspace_put(gl->gl_aspace); -fail: - kmem_cache_free(gfs2_glock_cachep, gl); - return error; } /** - * gfs2_holder_init - initialize a struct gfs2_holder in the default way - * @gl: the glock - * @state: the state we're requesting - * @flags: the modifier flags - * @gh: the holder structure + * do_error - Something unexpected has happened during a lock request * */ -void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, - struct gfs2_holder *gh) +static inline void do_error(struct gfs2_glock *gl, const int ret) { - INIT_LIST_HEAD(&gh->gh_list); - gh->gh_gl = gl; - gh->gh_ip = (unsigned long)__builtin_return_address(0); - gh->gh_owner_pid = get_pid(task_pid(current)); - gh->gh_state = state; - gh->gh_flags = flags; - gh->gh_error = 0; - gh->gh_iflags = 0; - gfs2_glock_hold(gl); + struct gfs2_holder *gh, *tmp; + + list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + continue; + if (ret & LM_OUT_ERROR) + gh->gh_error = -EIO; + else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + gh->gh_error = GLR_TRYFAILED; + else + continue; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + } } /** - * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it - * @state: the state we're requesting - * @flags: the modifier flags - * @gh: the holder structure - * - * Don't mess with the glock. - * + * find_first_waiter - find the first gh that's waiting for the glock + * @gl: the glock */ -void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh) +static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl) { - gh->gh_state = state; - gh->gh_flags = flags; - gh->gh_iflags = 0; - gh->gh_ip = (unsigned long)__builtin_return_address(0); + struct gfs2_holder *gh; + + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) + return gh; + } + return NULL; } /** - * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) - * @gh: the holder structure + * state_change - record that the glock is now in a different state + * @gl: the glock + * @new_state the new state * */ -void gfs2_holder_uninit(struct gfs2_holder *gh) +static void state_change(struct gfs2_glock *gl, unsigned int new_state) { - put_pid(gh->gh_owner_pid); - gfs2_glock_put(gh->gh_gl); - gh->gh_gl = NULL; - gh->gh_ip = 0; -} + int held1, held2; -static void gfs2_holder_wake(struct gfs2_holder *gh) -{ - clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb__after_clear_bit(); - wake_up_bit(&gh->gh_iflags, HIF_WAIT); -} + held1 = (gl->gl_state != LM_ST_UNLOCKED); + held2 = (new_state != LM_ST_UNLOCKED); -static int just_schedule(void *word) -{ - schedule(); - return 0; -} + if (held1 != held2) { + if (held2) + gfs2_glock_hold(gl); + else + gfs2_glock_put_nolock(gl); + } -static void wait_on_holder(struct gfs2_holder *gh) -{ - might_sleep(); - wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); + gl->gl_state = new_state; + gl->gl_tchange = jiffies; } static void gfs2_demote_wake(struct gfs2_glock *gl) { gl->gl_demote_state = LM_ST_EXCLUSIVE; - clear_bit(GLF_DEMOTE, &gl->gl_flags); - smp_mb__after_clear_bit(); - wake_up_bit(&gl->gl_flags, GLF_DEMOTE); -} - -static void wait_on_demote(struct gfs2_glock *gl) -{ - might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); + clear_bit(GLF_DEMOTE, &gl->gl_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&gl->gl_flags, GLF_DEMOTE); } /** - * rq_mutex - process a mutex request in the queue - * @gh: the glock holder + * finish_xmote - The DLM has replied to one of our lock requests + * @gl: The glock + * @ret: The status from the DLM * - * Returns: 1 if the queue is blocked */ -static int rq_mutex(struct gfs2_holder *gh) +static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) { - struct gfs2_glock *gl = gh->gh_gl; + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_holder *gh; + unsigned state = ret & LM_OUT_ST_MASK; + int rv; - list_del_init(&gh->gh_list); - /* gh->gh_error never examined. */ - set_bit(GLF_LOCK, &gl->gl_flags); - clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb(); - wake_up_bit(&gh->gh_iflags, HIF_WAIT); - - return 1; -} - -/** - * rq_promote - process a promote request in the queue - * @gh: the glock holder - * - * Acquire a new inter-node lock, or change a lock state to more restrictive. - * - * Returns: 1 if the queue is blocked - */ - -static int rq_promote(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; + spin_lock(&gl->gl_spin); + trace_gfs2_glock_state_change(gl, state); + state_change(gl, state); + gh = find_first_waiter(gl); + + /* Demote to UN request arrived during demote to SH or DF */ + if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && + state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED) + gl->gl_target = LM_ST_UNLOCKED; + + /* Check for state != intended state */ + if (unlikely(state != gl->gl_target)) { + if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { + /* move to back of queue and try next entry */ + if (ret & LM_OUT_CANCELED) { + if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0) + list_move_tail(&gh->gh_list, &gl->gl_holders); + gh = find_first_waiter(gl); + gl->gl_target = gh->gh_state; + goto retry; + } + /* Some error or failed "try lock" - report it */ + if ((ret & LM_OUT_ERROR) || + (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { + gl->gl_target = gl->gl_state; + do_error(gl, ret); + goto out; + } + } + switch(state) { + /* Unlocked due to conversion deadlock, try again */ + case LM_ST_UNLOCKED: +retry: + do_xmote(gl, gh, gl->gl_target); + break; + /* Conversion fails, unlock and try again */ + case LM_ST_SHARED: + case LM_ST_DEFERRED: + do_xmote(gl, gh, LM_ST_UNLOCKED); + break; + default: /* Everything else */ + printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); + GLOCK_BUG_ON(gl, 1); + } + spin_unlock(&gl->gl_spin); + gfs2_glock_put(gl); + return; + } - if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - if (list_empty(&gl->gl_holders)) { - gl->gl_req_gh = gh; - set_bit(GLF_LOCK, &gl->gl_flags); + /* Fast path - we got what we asked for */ + if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) + gfs2_demote_wake(gl); + if (state != LM_ST_UNLOCKED) { + if (glops->go_xmote_bh) { spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gh->gh_gl, gh); + rv = glops->go_xmote_bh(gl, gh); + if (rv == -EAGAIN) + return; spin_lock(&gl->gl_spin); + if (rv) { + do_error(gl, rv); + goto out; + } } - return 1; - } - - if (list_empty(&gl->gl_holders)) { - set_bit(HIF_FIRST, &gh->gh_iflags); - set_bit(GLF_LOCK, &gl->gl_flags); - } else { - struct gfs2_holder *next_gh; - if (gh->gh_state == LM_ST_EXCLUSIVE) - return 1; - next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, - gh_list); - if (next_gh->gh_state == LM_ST_EXCLUSIVE) - return 1; + rv = do_promote(gl); + if (rv == 2) + goto out_locked; } +out: + clear_bit(GLF_LOCK, &gl->gl_flags); +out_locked: + spin_unlock(&gl->gl_spin); + gfs2_glock_put(gl); +} - list_move_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); +static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, + unsigned int req_state, + unsigned int flags) +{ + int ret = LM_OUT_ERROR; - gfs2_holder_wake(gh); + if (!sdp->sd_lockstruct.ls_ops->lm_lock) + return req_state == LM_ST_UNLOCKED ? 0 : req_state; - return 0; + if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, + req_state, flags); + return ret; } /** - * rq_demote - process a demote request in the queue - * @gh: the glock holder + * do_xmote - Calls the DLM to change the state of a lock + * @gl: The lock state + * @gh: The holder (only for promotes) + * @target: The target lock state * - * Returns: 1 if the queue is blocked */ -static int rq_demote(struct gfs2_glock *gl) +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) { - if (!list_empty(&gl->gl_holders)) - return 1; + const struct gfs2_glock_operations *glops = gl->gl_ops; + struct gfs2_sbd *sdp = gl->gl_sbd; + unsigned int lck_flags = gh ? gh->gh_flags : 0; + int ret; - if (gl->gl_state == gl->gl_demote_state || - gl->gl_state == LM_ST_UNLOCKED) { - gfs2_demote_wake(gl); - return 0; + lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | + LM_FLAG_PRIORITY); + BUG_ON(gl->gl_state == target); + BUG_ON(gl->gl_state == gl->gl_target); + if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && + glops->go_inval) { + set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + do_error(gl, 0); /* Fail queued try locks */ } + spin_unlock(&gl->gl_spin); + if (glops->go_xmote_th) + glops->go_xmote_th(gl); + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); + clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); - set_bit(GLF_LOCK, &gl->gl_flags); - set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - - if (gl->gl_demote_state == LM_ST_UNLOCKED || - gl->gl_state != LM_ST_EXCLUSIVE) { - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); + gfs2_glock_hold(gl); + if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED || + gl->gl_state == LM_ST_DEFERRED) && + !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) + lck_flags |= LM_FLAG_TRY_1CB; + ret = gfs2_lm_lock(sdp, gl, target, lck_flags); + + if (!(ret & LM_OUT_ASYNC)) { + finish_xmote(gl, ret); + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } else { - spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gl, NULL); + GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC); } - spin_lock(&gl->gl_spin); - clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - - return 0; } /** - * run_queue - process holder structures on a glock + * find_first_holder - find the first "holder" gh * @gl: the glock - * */ -static void run_queue(struct gfs2_glock *gl) + +static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) { struct gfs2_holder *gh; - int blocked = 1; - - for (;;) { - if (test_bit(GLF_LOCK, &gl->gl_flags)) - break; - - if (!list_empty(&gl->gl_waiters1)) { - gh = list_entry(gl->gl_waiters1.next, - struct gfs2_holder, gh_list); - blocked = rq_mutex(gh); - } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { - blocked = rq_demote(gl); - if (test_bit(GLF_WAITERS2, &gl->gl_flags) && - !blocked) { - set_bit(GLF_DEMOTE, &gl->gl_flags); - gl->gl_demote_state = LM_ST_UNLOCKED; - } - clear_bit(GLF_WAITERS2, &gl->gl_flags); - } else if (!list_empty(&gl->gl_waiters3)) { - gh = list_entry(gl->gl_waiters3.next, - struct gfs2_holder, gh_list); - blocked = rq_promote(gh); - } else - break; - if (blocked) - break; + if (!list_empty(&gl->gl_holders)) { + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + return gh; } + return NULL; } /** - * gfs2_glmutex_lock - acquire a local lock on a glock - * @gl: the glock + * run_queue - do all outstanding tasks related to a glock + * @gl: The glock in question + * @nonblock: True if we must not block in run_queue * - * Gives caller exclusive access to manipulate a glock structure. */ -static void gfs2_glmutex_lock(struct gfs2_glock *gl) +static void run_queue(struct gfs2_glock *gl, const int nonblock) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) { - spin_lock(&gl->gl_spin); - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - struct gfs2_holder gh; + struct gfs2_holder *gh = NULL; + int ret; - gfs2_holder_init(gl, 0, 0, &gh); - set_bit(HIF_WAIT, &gh.gh_iflags); - list_add_tail(&gh.gh_list, &gl->gl_waiters1); - spin_unlock(&gl->gl_spin); - wait_on_holder(&gh); - gfs2_holder_uninit(&gh); + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + return; + + GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); + + if (test_bit(GLF_DEMOTE, &gl->gl_flags) && + gl->gl_demote_state != gl->gl_state) { + if (find_first_holder(gl)) + goto out_unlock; + if (nonblock) + goto out_sched; + set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); + GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); + gl->gl_target = gl->gl_demote_state; } else { - gl->gl_owner_pid = get_pid(task_pid(current)); - gl->gl_ip = (unsigned long)__builtin_return_address(0); - spin_unlock(&gl->gl_spin); + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) + gfs2_demote_wake(gl); + ret = do_promote(gl); + if (ret == 0) + goto out_unlock; + if (ret == 2) + goto out; + gh = find_first_waiter(gl); + gl->gl_target = gh->gh_state; + if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) + do_error(gl, 0); /* Fail queued try locks */ } -} + do_xmote(gl, gh, gl->gl_target); +out: + return; -/** - * gfs2_glmutex_trylock - try to acquire a local lock on a glock - * @gl: the glock - * - * Returns: 1 if the glock is acquired - */ +out_sched: + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put_nolock(gl); +out_unlock: + clear_bit(GLF_LOCK, &gl->gl_flags); + goto out; +} -static int gfs2_glmutex_trylock(struct gfs2_glock *gl) +static void delete_work_func(struct work_struct *work) { - int acquired = 1; + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_inode *ip = NULL; + struct inode *inode; + u64 no_addr = 0; spin_lock(&gl->gl_spin); - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - acquired = 0; - } else { - gl->gl_owner_pid = get_pid(task_pid(current)); - gl->gl_ip = (unsigned long)__builtin_return_address(0); - } + ip = (struct gfs2_inode *)gl->gl_object; + if (ip) + no_addr = ip->i_no_addr; spin_unlock(&gl->gl_spin); - - return acquired; + if (ip) { + inode = gfs2_ilookup(sdp->sd_vfs, no_addr); + if (inode) { + d_prune_aliases(inode); + iput(inode); + } + } + gfs2_glock_put(gl); } -/** - * gfs2_glmutex_unlock - release a local lock on a glock - * @gl: the glock - * - */ - -static void gfs2_glmutex_unlock(struct gfs2_glock *gl) +static void glock_work_func(struct work_struct *work) { - struct pid *pid; + unsigned long delay = 0; + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + finish_xmote(gl, gl->gl_reply); + down_read(&gfs2_umount_flush_sem); spin_lock(&gl->gl_spin); - clear_bit(GLF_LOCK, &gl->gl_flags); - pid = gl->gl_owner_pid; - gl->gl_owner_pid = NULL; - gl->gl_ip = 0; - run_queue(gl); + if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + gl->gl_state != LM_ST_UNLOCKED && + gl->gl_demote_state != LM_ST_EXCLUSIVE) { + unsigned long holdtime, now = jiffies; + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + if (time_before(now, holdtime)) + delay = holdtime - now; + set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); + } + run_queue(gl, 0); spin_unlock(&gl->gl_spin); - - put_pid(pid); + up_read(&gfs2_umount_flush_sem); + if (!delay || + queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); } /** - * handle_callback - process a demote request - * @gl: the glock - * @state: the state the caller wants us to change to + * gfs2_glock_get() - Get a glock, or create one if one doesn't exist + * @sdp: The GFS2 superblock + * @number: the lock number + * @glops: The glock_operations to use + * @create: If 0, don't create the glock if it doesn't exist + * @glp: the glock is returned here * - * There are only two requests that we are going to see in actual - * practise: LM_ST_SHARED and LM_ST_UNLOCKED + * This does not lock a glock, just finds/creates structures for one. + * + * Returns: errno */ -static void handle_callback(struct gfs2_glock *gl, unsigned int state, - int remote, unsigned long delay) +int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, int create, + struct gfs2_glock **glp) { - int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; + struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; + struct gfs2_glock *gl, *tmp; + unsigned int hash = gl_hash(sdp, &name); + int error; - spin_lock(&gl->gl_spin); - set_bit(bit, &gl->gl_flags); - if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { - gl->gl_demote_state = state; - gl->gl_demote_time = jiffies; - if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && - gl->gl_object) { - gfs2_glock_schedule_for_reclaim(gl); - spin_unlock(&gl->gl_spin); - return; + read_lock(gl_lock_addr(hash)); + gl = search_bucket(hash, sdp, &name); + read_unlock(gl_lock_addr(hash)); + + *glp = gl; + if (gl) + return 0; + if (!create) + return -ENOENT; + + gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); + if (!gl) + return -ENOMEM; + + gl->gl_flags = 0; + gl->gl_name = name; + atomic_set(&gl->gl_ref, 1); + gl->gl_state = LM_ST_UNLOCKED; + gl->gl_target = LM_ST_UNLOCKED; + gl->gl_demote_state = LM_ST_EXCLUSIVE; + gl->gl_hash = hash; + gl->gl_ops = glops; + snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number); + memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); + gl->gl_lksb.sb_lvbptr = gl->gl_lvb; + gl->gl_tchange = jiffies; + gl->gl_object = NULL; + gl->gl_sbd = sdp; + gl->gl_aspace = NULL; + INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); + INIT_WORK(&gl->gl_delete, delete_work_func); + + /* If this glock protects actual on-disk data or metadata blocks, + create a VFS inode to manage the pages/buffers holding them. */ + if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) { + gl->gl_aspace = gfs2_aspace_get(sdp); + if (!gl->gl_aspace) { + error = -ENOMEM; + goto fail; } - } else if (gl->gl_demote_state != LM_ST_UNLOCKED && - gl->gl_demote_state != state) { - if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) - set_bit(GLF_WAITERS2, &gl->gl_flags); - else - gl->gl_demote_state = LM_ST_UNLOCKED; } - spin_unlock(&gl->gl_spin); + + write_lock(gl_lock_addr(hash)); + tmp = search_bucket(hash, sdp, &name); + if (tmp) { + write_unlock(gl_lock_addr(hash)); + glock_free(gl); + gl = tmp; + } else { + hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); + write_unlock(gl_lock_addr(hash)); + } + + *glp = gl; + + return 0; + +fail: + kmem_cache_free(gfs2_glock_cachep, gl); + return error; } /** - * state_change - record that the glock is now in a different state + * gfs2_holder_init - initialize a struct gfs2_holder in the default way * @gl: the glock - * @new_state the new state + * @state: the state we're requesting + * @flags: the modifier flags + * @gh: the holder structure * */ -static void state_change(struct gfs2_glock *gl, unsigned int new_state) +void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, + struct gfs2_holder *gh) { - int held1, held2; - - held1 = (gl->gl_state != LM_ST_UNLOCKED); - held2 = (new_state != LM_ST_UNLOCKED); - - if (held1 != held2) { - if (held2) - gfs2_glock_hold(gl); - else - gfs2_glock_put(gl); - } - - gl->gl_state = new_state; - gl->gl_tchange = jiffies; + INIT_LIST_HEAD(&gh->gh_list); + gh->gh_gl = gl; + gh->gh_ip = (unsigned long)__builtin_return_address(0); + gh->gh_owner_pid = get_pid(task_pid(current)); + gh->gh_state = state; + gh->gh_flags = flags; + gh->gh_error = 0; + gh->gh_iflags = 0; + gfs2_glock_hold(gl); } /** - * drop_bh - Called after a lock module unlock completes - * @gl: the glock - * @ret: the return status + * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it + * @state: the state we're requesting + * @flags: the modifier flags + * @gh: the holder structure * - * Doesn't wake up the process waiting on the struct gfs2_holder (if any) - * Doesn't drop the reference on the glock the top half took out + * Don't mess with the glock. * */ -static void drop_bh(struct gfs2_glock *gl, unsigned int ret) +void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh) { - struct gfs2_sbd *sdp = gl->gl_sbd; - struct gfs2_holder *gh = gl->gl_req_gh; - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, !ret); - - state_change(gl, LM_ST_UNLOCKED); - - if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) { - spin_lock(&gl->gl_spin); - gh->gh_error = 0; - spin_unlock(&gl->gl_spin); - gfs2_glock_xmote_th(gl, gl->gl_req_gh); - gfs2_glock_put(gl); - return; - } - - spin_lock(&gl->gl_spin); - gfs2_demote_wake(gl); - clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); + gh->gh_state = state; + gh->gh_flags = flags; + gh->gh_iflags = 0; + gh->gh_ip = (unsigned long)__builtin_return_address(0); } /** - * xmote_bh - Called after the lock module is done acquiring a lock - * @gl: The glock in question - * @ret: the int returned from the lock module + * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference) + * @gh: the holder structure * */ -static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh = gl->gl_req_gh; - int op_done = 1; - - if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) { - drop_bh(gl, ret); - return; - } - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); - - state_change(gl, ret & LM_OUT_ST_MASK); - - /* Deal with each possible exit condition */ - - if (!gh) { - gl->gl_stamp = jiffies; - if (ret & LM_OUT_CANCELED) { - op_done = 0; - } else { - spin_lock(&gl->gl_spin); - if (gl->gl_state != gl->gl_demote_state) { - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); - gfs2_glock_put(gl); - return; - } - gfs2_demote_wake(gl); - spin_unlock(&gl->gl_spin); - } - } else { - spin_lock(&gl->gl_spin); - if (ret & LM_OUT_CONV_DEADLK) { - gh->gh_error = 0; - set_bit(GLF_CONV_DEADLK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_glock_drop_th(gl); - gfs2_glock_put(gl); - return; - } - list_del_init(&gh->gh_list); - gh->gh_error = -EIO; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - goto out; - gh->gh_error = GLR_CANCELED; - if (ret & LM_OUT_CANCELED) - goto out; - if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - list_add_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - set_bit(HIF_FIRST, &gh->gh_iflags); - op_done = 0; - goto out; - } - gh->gh_error = GLR_TRYFAILED; - if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) - goto out; - gh->gh_error = -EINVAL; - if (gfs2_assert_withdraw(sdp, 0) == -1) - fs_err(sdp, "ret = 0x%.8X\n", ret); -out: - spin_unlock(&gl->gl_spin); - } - - if (glops->go_xmote_bh) - glops->go_xmote_bh(gl); - - if (op_done) { - spin_lock(&gl->gl_spin); - gl->gl_req_gh = NULL; - clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - } - - gfs2_glock_put(gl); - - if (gh) - gfs2_holder_wake(gh); -} - -static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, - unsigned int cur_state, unsigned int req_state, - unsigned int flags) +void gfs2_holder_uninit(struct gfs2_holder *gh) { - int ret = 0; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, - req_state, flags); - return ret; + put_pid(gh->gh_owner_pid); + gfs2_glock_put(gh->gh_gl); + gh->gh_gl = NULL; + gh->gh_ip = 0; } /** - * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock - * @gl: The glock in question - * @state: the requested state - * @flags: modifier flags to the lock call + * gfs2_glock_holder_wait + * @word: unused * + * This function and gfs2_glock_demote_wait both show up in the WCHAN + * field. Thus I've separated these otherwise identical functions in + * order to be more informative to the user. */ -static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) +static int gfs2_glock_holder_wait(void *word) { - struct gfs2_sbd *sdp = gl->gl_sbd; - int flags = gh ? gh->gh_flags : 0; - unsigned state = gh ? gh->gh_state : gl->gl_demote_state; - const struct gfs2_glock_operations *glops = gl->gl_ops; - int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | - LM_FLAG_NOEXP | LM_FLAG_ANY | - LM_FLAG_PRIORITY); - unsigned int lck_ret; - - if (glops->go_xmote_th) - glops->go_xmote_th(gl); - if (state == LM_ST_DEFERRED && glops->go_inval) - glops->go_inval(gl, DIO_METADATA); - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); - gfs2_assert_warn(sdp, state != gl->gl_state); - - gfs2_glock_hold(gl); - - lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); - - if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR))) - return; - - if (lck_ret & LM_OUT_ASYNC) - gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC); - else - xmote_bh(gl, lck_ret); + schedule(); + return 0; } -static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, - unsigned int cur_state) +static int gfs2_glock_demote_wait(void *word) { - int ret = 0; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); - return ret; + schedule(); + return 0; } -/** - * gfs2_glock_drop_th - call into the lock module to unlock a lock - * @gl: the glock - * - */ - -static void gfs2_glock_drop_th(struct gfs2_glock *gl) +static void wait_on_holder(struct gfs2_holder *gh) { - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - unsigned int ret; - - if (glops->go_xmote_th) - glops->go_xmote_th(gl); - if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA); - - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); - gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); - - gfs2_glock_hold(gl); - - ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); - - if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR))) - return; + might_sleep(); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); +} - if (!ret) - drop_bh(gl, ret); - else - gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC); +static void wait_on_demote(struct gfs2_glock *gl) +{ + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** - * do_cancels - cancel requests for locks stuck waiting on an expire flag - * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock + * handle_callback - process a demote request + * @gl: the glock + * @state: the state the caller wants us to change to * - * Don't cancel GL_NOCANCEL requests. + * There are only two requests that we are going to see in actual + * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ -static void do_cancels(struct gfs2_holder *gh) +static void handle_callback(struct gfs2_glock *gl, unsigned int state, + unsigned long delay) { - struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; - - spin_lock(&gl->gl_spin); + int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; - while (gl->gl_req_gh != gh && - !test_bit(HIF_HOLDER, &gh->gh_iflags) && - !list_empty(&gh->gh_list)) { - if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { - spin_unlock(&gl->gl_spin); - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); - msleep(100); - spin_lock(&gl->gl_spin); - } else { - spin_unlock(&gl->gl_spin); - msleep(100); - spin_lock(&gl->gl_spin); - } + set_bit(bit, &gl->gl_flags); + if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { + gl->gl_demote_state = state; + gl->gl_demote_time = jiffies; + } else if (gl->gl_demote_state != LM_ST_UNLOCKED && + gl->gl_demote_state != state) { + gl->gl_demote_state = LM_ST_UNLOCKED; } - - spin_unlock(&gl->gl_spin); + if (gl->gl_ops->go_callback) + gl->gl_ops->go_callback(gl); + trace_gfs2_demote_rq(gl); } /** - * glock_wait_internal - wait on a glock acquisition + * gfs2_glock_wait - wait on a glock acquisition * @gh: the glock holder * * Returns: 0 on success */ -static int glock_wait_internal(struct gfs2_holder *gh) +int gfs2_glock_wait(struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; - - if (test_bit(HIF_ABORTED, &gh->gh_iflags)) - return -EIO; - - if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { - spin_lock(&gl->gl_spin); - if (gl->gl_req_gh != gh && - !test_bit(HIF_HOLDER, &gh->gh_iflags) && - !list_empty(&gh->gh_list)) { - list_del_init(&gh->gh_list); - gh->gh_error = GLR_TRYFAILED; - run_queue(gl); - spin_unlock(&gl->gl_spin); - return gh->gh_error; - } - spin_unlock(&gl->gl_spin); - } - - if (gh->gh_flags & LM_FLAG_PRIORITY) - do_cancels(gh); - wait_on_holder(gh); - if (gh->gh_error) - return gh->gh_error; - - gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags)); - gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state, gh->gh_state, - gh->gh_flags)); - - if (test_bit(HIF_FIRST, &gh->gh_iflags)) { - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - - if (glops->go_lock) { - gh->gh_error = glops->go_lock(gh); - if (gh->gh_error) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - spin_unlock(&gl->gl_spin); - } - } - - spin_lock(&gl->gl_spin); - gl->gl_req_gh = NULL; - clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); - spin_unlock(&gl->gl_spin); - } - return gh->gh_error; } -static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, struct pid *pid) -{ - struct gfs2_holder *gh; - - list_for_each_entry(gh, head, gh_list) { - if (gh->gh_owner_pid == pid) - return gh; - } - - return NULL; -} - -static void print_dbg(struct glock_iter *gi, const char *fmt, ...) +void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { va_list args; va_start(args, fmt); - if (gi) { + if (seq) { + struct gfs2_glock_iter *gi = seq->private; vsprintf(gi->string, fmt, args); - seq_printf(gi->seq, gi->string); - } - else + seq_printf(seq, gi->string); + } else { + printk(KERN_ERR " "); vprintk(fmt, args); + } va_end(args); } @@ -1104,50 +961,79 @@ static void print_dbg(struct glock_iter *gi, const char *fmt, ...) * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add * + * Eventually we should move the recursive locking trap to a + * debugging option or something like that. This is the fast + * path and needs to have the minimum number of distractions. + * */ -static void add_to_queue(struct gfs2_holder *gh) +static inline void add_to_queue(struct gfs2_holder *gh) +__releases(&gl->gl_spin) +__acquires(&gl->gl_spin) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_holder *existing; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct list_head *insert_pt = NULL; + struct gfs2_holder *gh2; + int try_lock = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); - if (!(gh->gh_flags & GL_FLOCK)) { - existing = find_holder_by_owner(&gl->gl_holders, - gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", - existing->gh_ip); - printk(KERN_INFO "pid : %d\n", - pid_nr(existing->gh_owner_pid)); - printk(KERN_INFO "lock type : %d lock state : %d\n", - existing->gh_gl->gl_name.ln_type, - existing->gh_gl->gl_state); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", - pid_nr(gh->gh_owner_pid)); - printk(KERN_INFO "lock type : %d lock state : %d\n", - gl->gl_name.ln_type, gl->gl_state); - BUG(); - } - - existing = find_holder_by_owner(&gl->gl_waiters3, - gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", - existing->gh_ip); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - BUG(); + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { + if (test_bit(GLF_LOCK, &gl->gl_flags)) + try_lock = 1; + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + goto fail; + } + + list_for_each_entry(gh2, &gl->gl_holders, gh_list) { + if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && + (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) + goto trap_recursive; + if (try_lock && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && + !may_grant(gl, gh)) { +fail: + gh->gh_error = GLR_TRYFAILED; + gfs2_holder_wake(gh); + return; } + if (test_bit(HIF_HOLDER, &gh2->gh_iflags)) + continue; + if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) + insert_pt = &gh2->gh_list; + } + if (likely(insert_pt == NULL)) { + list_add_tail(&gh->gh_list, &gl->gl_holders); + if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) + goto do_cancel; + return; } + trace_gfs2_glock_queue(gh, 1); + list_add_tail(&gh->gh_list, insert_pt); +do_cancel: + gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { + spin_unlock(&gl->gl_spin); + if (sdp->sd_lockstruct.ls_ops->lm_cancel) + sdp->sd_lockstruct.ls_ops->lm_cancel(gl); + spin_lock(&gl->gl_spin); + } + return; - if (gh->gh_flags & LM_FLAG_PRIORITY) - list_add(&gh->gh_list, &gl->gl_waiters3); - else - list_add_tail(&gh->gh_list, &gl->gl_waiters3); +trap_recursive: + print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip); + printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); + printk(KERN_ERR "lock type: %d req lock state : %d\n", + gh2->gh_gl->gl_name.ln_type, gh2->gh_state); + print_symbol(KERN_ERR "new: %s\n", gh->gh_ip); + printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); + printk(KERN_ERR "lock type: %d req lock state : %d\n", + gh->gh_gl->gl_name.ln_type, gh->gh_state); + __dump_glock(NULL, gl); + BUG(); } /** @@ -1165,24 +1051,16 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_sbd *sdp = gl->gl_sbd; int error = 0; -restart: - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { - set_bit(HIF_ABORTED, &gh->gh_iflags); + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return -EIO; - } spin_lock(&gl->gl_spin); add_to_queue(gh); - run_queue(gl); + run_queue(gl, 1); spin_unlock(&gl->gl_spin); - if (!(gh->gh_flags & GL_ASYNC)) { - error = glock_wait_internal(gh); - if (error == GLR_CANCELED) { - msleep(100); - goto restart; - } - } + if (!(gh->gh_flags & GL_ASYNC)) + error = gfs2_glock_wait(gh); return error; } @@ -1196,48 +1074,7 @@ restart: int gfs2_glock_poll(struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; - int ready = 0; - - spin_lock(&gl->gl_spin); - - if (test_bit(HIF_HOLDER, &gh->gh_iflags)) - ready = 1; - else if (list_empty(&gh->gh_list)) { - if (gh->gh_error == GLR_CANCELED) { - spin_unlock(&gl->gl_spin); - msleep(100); - if (gfs2_glock_nq(gh)) - return 1; - return 0; - } else - ready = 1; - } - - spin_unlock(&gl->gl_spin); - - return ready; -} - -/** - * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC - * @gh: the holder structure - * - * Returns: 0, GLR_TRYFAILED, or errno on failure - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - int error; - - error = glock_wait_internal(gh); - if (error == GLR_CANCELED) { - msleep(100); - gh->gh_flags &= ~GL_ASYNC; - error = gfs2_glock_nq(gh); - } - - return error; + return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; } /** @@ -1251,26 +1088,30 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned delay = 0; + int fast_path = 0; + spin_lock(&gl->gl_spin); if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); + handle_callback(gl, LM_ST_UNLOCKED, 0); - gfs2_glmutex_lock(gl); - - spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); - - if (list_empty(&gl->gl_holders)) { + if (find_first_holder(gl) == NULL) { if (glops->go_unlock) { + GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); spin_unlock(&gl->gl_spin); glops->go_unlock(gh); spin_lock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); } - gl->gl_stamp = jiffies; + if (list_empty(&gl->gl_holders) && + !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + fast_path = 1; } - - clear_bit(GLF_LOCK, &gl->gl_flags); + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); + if (likely(fast_path)) + return; gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && @@ -1451,225 +1292,111 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) gfs2_glock_dq_uninit(&ghs[x]); } -static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) -{ - int error = -EIO; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); - return error; -} - -/** - * gfs2_lvb_hold - attach a LVB from a glock - * @gl: The glock in question - * - */ - -int gfs2_lvb_hold(struct gfs2_glock *gl) -{ - int error; - - gfs2_glmutex_lock(gl); - - if (!atomic_read(&gl->gl_lvb_count)) { - error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb); - if (error) { - gfs2_glmutex_unlock(gl); - return error; - } - gfs2_glock_hold(gl); - } - atomic_inc(&gl->gl_lvb_count); - - gfs2_glmutex_unlock(gl); - - return 0; -} - -/** - * gfs2_lvb_unhold - detach a LVB from a glock - * @gl: The glock in question - * - */ - -void gfs2_lvb_unhold(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_sbd; - - gfs2_glock_hold(gl); - gfs2_glmutex_lock(gl); - - gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); - if (atomic_dec_and_test(&gl->gl_lvb_count)) { - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) - sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); - gl->gl_lvb = NULL; - gfs2_glock_put(gl); - } - - gfs2_glmutex_unlock(gl); - gfs2_glock_put(gl); -} - -static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, - unsigned int state) +void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) { - struct gfs2_glock *gl; unsigned long delay = 0; unsigned long holdtime; unsigned long now = jiffies; - gl = gfs2_glock_find(sdp, name); - if (!gl) - return; - + gfs2_glock_hold(gl); holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; + if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + delay = gl->gl_ops->go_min_hold_time; - handle_callback(gl, state, 1, delay); + spin_lock(&gl->gl_spin); + handle_callback(gl, state, delay); + spin_unlock(&gl->gl_spin); if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } /** - * gfs2_glock_cb - Callback used by locking module - * @sdp: Pointer to the superblock - * @type: Type of callback - * @data: Type dependent data pointer + * gfs2_glock_complete - Callback used by locking + * @gl: Pointer to the glock + * @ret: The return value from the dlm * - * Called by the locking module when it wants to tell us something. - * Either we need to drop a lock, one of our ASYNC requests completed, or - * a journal from another client needs to be recovered. */ -void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) +void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { - struct gfs2_sbd *sdp = cb_data; - - switch (type) { - case LM_CB_NEED_E: - blocking_cb(sdp, data, LM_ST_UNLOCKED); - return; - - case LM_CB_NEED_D: - blocking_cb(sdp, data, LM_ST_DEFERRED); - return; - - case LM_CB_NEED_S: - blocking_cb(sdp, data, LM_ST_SHARED); - return; - - case LM_CB_ASYNC: { - struct lm_async_cb *async = data; - struct gfs2_glock *gl; - - down_read(&gfs2_umount_flush_sem); - gl = gfs2_glock_find(sdp, &async->lc_name); - if (gfs2_assert_warn(sdp, gl)) + struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + gl->gl_reply = ret; + if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { + struct gfs2_holder *gh; + spin_lock(&gl->gl_spin); + gh = find_first_waiter(gl); + if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) && + (gl->gl_target != LM_ST_UNLOCKED)) || + ((ret & ~LM_OUT_ST_MASK) != 0)) + set_bit(GLF_FROZEN, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + if (test_bit(GLF_FROZEN, &gl->gl_flags)) return; - xmote_bh(gl, async->lc_ret); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put(gl); - up_read(&gfs2_umount_flush_sem); - return; } - - case LM_CB_NEED_RECOVERY: - gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data); - if (sdp->sd_recoverd_process) - wake_up_process(sdp->sd_recoverd_process); - return; - - case LM_CB_DROPLOCKS: - gfs2_gl_hash_clear(sdp, NO_WAIT); - gfs2_quota_scan(sdp); - return; - - default: - gfs2_assert_warn(sdp, 0); - return; - } -} - -/** - * demote_ok - Check to see if it's ok to unlock a glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int demote_ok(struct gfs2_glock *gl) -{ - const struct gfs2_glock_operations *glops = gl->gl_ops; - int demote = 1; - - if (test_bit(GLF_STICKY, &gl->gl_flags)) - demote = 0; - else if (glops->go_demote_ok) - demote = glops->go_demote_ok(gl); - - return demote; + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } -/** - * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - */ -void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) +static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) { - struct gfs2_sbd *sdp = gl->gl_sbd; - - spin_lock(&sdp->sd_reclaim_lock); - if (list_empty(&gl->gl_reclaim)) { - gfs2_glock_hold(gl); - list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); - atomic_inc(&sdp->sd_reclaim_count); - spin_unlock(&sdp->sd_reclaim_lock); - wake_up(&sdp->sd_reclaim_wq); - } else - spin_unlock(&sdp->sd_reclaim_lock); -} + struct gfs2_glock *gl; + int may_demote; + int nr_skipped = 0; + LIST_HEAD(skipped); -/** - * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list - * @sdp: the filesystem - * - * Called from gfs2_glockd() glock reclaim daemon, or when promoting a - * different glock and we notice that there are a lot of glocks in the - * reclaim list. - * - */ + if (nr == 0) + goto out; -void gfs2_reclaim_glock(struct gfs2_sbd *sdp) -{ - struct gfs2_glock *gl; + if (!(gfp_mask & __GFP_FS)) + return -1; - spin_lock(&sdp->sd_reclaim_lock); - if (list_empty(&sdp->sd_reclaim_list)) { - spin_unlock(&sdp->sd_reclaim_lock); - return; - } - gl = list_entry(sdp->sd_reclaim_list.next, - struct gfs2_glock, gl_reclaim); - list_del_init(&gl->gl_reclaim); - spin_unlock(&sdp->sd_reclaim_lock); + spin_lock(&lru_lock); + while(nr && !list_empty(&lru_list)) { + gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); - atomic_dec(&sdp->sd_reclaim_count); - atomic_inc(&sdp->sd_reclaimed); + /* Check if glock is about to be freed */ + if (atomic_read(&gl->gl_ref) == 0) + continue; - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); - gfs2_glmutex_unlock(gl); + /* Test for being demotable */ + if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + gfs2_glock_hold(gl); + spin_unlock(&lru_lock); + spin_lock(&gl->gl_spin); + may_demote = demote_ok(gl); + if (may_demote) { + handle_callback(gl, LM_ST_UNLOCKED, 0); + nr--; + } + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put_nolock(gl); + spin_unlock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); + spin_lock(&lru_lock); + continue; + } + nr_skipped++; + list_add(&gl->gl_lru, &skipped); } - - gfs2_glock_put(gl); + list_splice(&skipped, &lru_list); + atomic_add(nr_skipped, &lru_count); + spin_unlock(&lru_lock); +out: + return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; } +static struct shrinker glock_shrinker = { + .shrink = gfs2_shrink_glock_memory, + .seeks = DEFAULT_SEEKS, +}; + /** * examine_bucket - Call a function for glock in a hash bucket * @examiner: the function @@ -1714,28 +1441,23 @@ out: return has_entries; } + /** - * scan_glock - look at a glock and see if we can reclaim it - * @gl: the glock to look at + * thaw_glock - thaw out a glock which has an unprocessed reply waiting + * @gl: The glock to thaw * + * N.B. When we freeze a glock, we leave a ref to the glock outstanding, + * so this has to result in the ref count being dropped by one. */ -static void scan_glock(struct gfs2_glock *gl) +static void thaw_glock(struct gfs2_glock *gl) { - if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) + if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) return; - - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - goto out_schedule; - gfs2_glmutex_unlock(gl); - } - return; - -out_schedule: - gfs2_glmutex_unlock(gl); - gfs2_glock_schedule_for_reclaim(gl); + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); } /** @@ -1746,26 +1468,34 @@ out_schedule: static void clear_glock(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; - int released; - - spin_lock(&sdp->sd_reclaim_lock); - if (!list_empty(&gl->gl_reclaim)) { - list_del_init(&gl->gl_reclaim); - atomic_dec(&sdp->sd_reclaim_count); - spin_unlock(&sdp->sd_reclaim_lock); - released = gfs2_glock_put(gl); - gfs2_assert(sdp, !released); - } else { - spin_unlock(&sdp->sd_reclaim_lock); + spin_lock(&lru_lock); + if (!list_empty(&gl->gl_lru)) { + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); } + spin_unlock(&lru_lock); - if (gfs2_glmutex_trylock(gl)) { - if (list_empty(&gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, 0); - gfs2_glmutex_unlock(gl); - } + spin_lock(&gl->gl_spin); + if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) + handle_callback(gl, LM_ST_UNLOCKED, 0); + spin_unlock(&gl->gl_spin); + gfs2_glock_hold(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); +} + +/** + * gfs2_glock_thaw - Thaw any frozen glocks + * @sdp: The super block + * + */ + +void gfs2_glock_thaw(struct gfs2_sbd *sdp) +{ + unsigned x; + + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + examine_bucket(thaw_glock, sdp, x); } /** @@ -1773,11 +1503,10 @@ static void clear_glock(struct gfs2_glock *gl) * @sdp: the filesystem * @wait: wait until it's all gone * - * Called when unmounting the filesystem, or when inter-node lock manager - * requests DROPLOCKS because it is running out of capacity. + * Called when unmounting the filesystem. */ -void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) +void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { unsigned long t; unsigned int x; @@ -1792,7 +1521,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) cont = 1; } - if (!wait || !cont) + if (!cont) break; if (time_after_eq(jiffies, @@ -1810,180 +1539,177 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) } } -/* - * Diagnostic routines to help debug distributed deadlock - */ - -static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, - unsigned long address) +void gfs2_glock_finish_truncate(struct gfs2_inode *ip) { - char buffer[KSYM_SYMBOL_LEN]; + struct gfs2_glock *gl = ip->i_gl; + int ret; + + ret = gfs2_truncatei_resume(ip); + gfs2_assert_withdraw(gl->gl_sbd, ret == 0); + + spin_lock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); + run_queue(gl, 1); + spin_unlock(&gl->gl_spin); +} - sprint_symbol(buffer, address); - print_dbg(gi, fmt, buffer); +static const char *state2str(unsigned state) +{ + switch(state) { + case LM_ST_UNLOCKED: + return "UN"; + case LM_ST_SHARED: + return "SH"; + case LM_ST_DEFERRED: + return "DF"; + case LM_ST_EXCLUSIVE: + return "EX"; + } + return "??"; +} + +static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) +{ + char *p = buf; + if (flags & LM_FLAG_TRY) + *p++ = 't'; + if (flags & LM_FLAG_TRY_1CB) + *p++ = 'T'; + if (flags & LM_FLAG_NOEXP) + *p++ = 'e'; + if (flags & LM_FLAG_ANY) + *p++ = 'A'; + if (flags & LM_FLAG_PRIORITY) + *p++ = 'p'; + if (flags & GL_ASYNC) + *p++ = 'a'; + if (flags & GL_EXACT) + *p++ = 'E'; + if (flags & GL_NOCACHE) + *p++ = 'c'; + if (test_bit(HIF_HOLDER, &iflags)) + *p++ = 'H'; + if (test_bit(HIF_WAIT, &iflags)) + *p++ = 'W'; + if (test_bit(HIF_FIRST, &iflags)) + *p++ = 'F'; + *p = 0; + return buf; } /** * dump_holder - print information about a glock holder - * @str: a string naming the type of holder + * @seq: the seq_file struct * @gh: the glock holder * * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(struct glock_iter *gi, char *str, - struct gfs2_holder *gh) +static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) { - unsigned int x; - struct task_struct *gh_owner; + struct task_struct *gh_owner = NULL; + char buffer[KSYM_SYMBOL_LEN]; + char flags_buf[32]; - print_dbg(gi, " %s\n", str); - if (gh->gh_owner_pid) { - print_dbg(gi, " owner = %ld ", - (long)pid_nr(gh->gh_owner_pid)); + sprint_symbol(buffer, gh->gh_ip); + if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); - if (gh_owner) - print_dbg(gi, "(%s)\n", gh_owner->comm); - else - print_dbg(gi, "(ended)\n"); - } else - print_dbg(gi, " owner = -1\n"); - print_dbg(gi, " gh_state = %u\n", gh->gh_state); - print_dbg(gi, " gh_flags ="); - for (x = 0; x < 32; x++) - if (gh->gh_flags & (1 << x)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - print_dbg(gi, " error = %d\n", gh->gh_error); - print_dbg(gi, " gh_iflags ="); - for (x = 0; x < 32; x++) - if (test_bit(x, &gh->gh_iflags)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); - + gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", + state2str(gh->gh_state), + hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), + gh->gh_error, + gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, + gh_owner ? gh_owner->comm : "(ended)", buffer); return 0; } -/** - * dump_inode - print information about an inode - * @ip: the inode - * - * Returns: 0 on success, -ENOBUFS when we run out of space - */ - -static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) -{ - unsigned int x; - - print_dbg(gi, " Inode:\n"); - print_dbg(gi, " num = %llu/%llu\n", - (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr); - print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); - print_dbg(gi, " i_flags ="); - for (x = 0; x < 32; x++) - if (test_bit(x, &ip->i_flags)) - print_dbg(gi, " %u", x); - print_dbg(gi, " \n"); - return 0; +static const char *gflags2str(char *buf, const unsigned long *gflags) +{ + char *p = buf; + if (test_bit(GLF_LOCK, gflags)) + *p++ = 'l'; + if (test_bit(GLF_DEMOTE, gflags)) + *p++ = 'D'; + if (test_bit(GLF_PENDING_DEMOTE, gflags)) + *p++ = 'd'; + if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags)) + *p++ = 'p'; + if (test_bit(GLF_DIRTY, gflags)) + *p++ = 'y'; + if (test_bit(GLF_LFLUSH, gflags)) + *p++ = 'f'; + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) + *p++ = 'i'; + if (test_bit(GLF_REPLY_PENDING, gflags)) + *p++ = 'r'; + if (test_bit(GLF_INITIAL, gflags)) + *p++ = 'I'; + if (test_bit(GLF_FROZEN, gflags)) + *p++ = 'F'; + *p = 0; + return buf; } /** - * dump_glock - print information about a glock + * __dump_glock - print information about a glock + * @seq: The seq_file struct * @gl: the glock - * @count: where we are in the buffer + * + * The file format is as follows: + * One line per object, capital letters are used to indicate objects + * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented, + * other objects are indented by a single space and follow the glock to + * which they are related. Fields are indicated by lower case letters + * followed by a colon and the field value, except for strings which are in + * [] so that its possible to see if they are composed of spaces for + * example. The field's are n = number (id of the object), f = flags, + * t = type, s = state, r = refcount, e = error, p = pid. * * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) +static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) { - struct gfs2_holder *gh; - unsigned int x; - int error = -ENOBUFS; - struct task_struct *gl_owner; + const struct gfs2_glock_operations *glops = gl->gl_ops; + unsigned long long dtime; + const struct gfs2_holder *gh; + char gflags_buf[32]; + int error = 0; - spin_lock(&gl->gl_spin); + dtime = jiffies - gl->gl_demote_time; + dtime *= 1000000/HZ; /* demote time in uSec */ + if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) + dtime = 0; + gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu a:%d r:%d\n", + state2str(gl->gl_state), + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number, + gflags2str(gflags_buf, &gl->gl_flags), + state2str(gl->gl_target), + state2str(gl->gl_demote_state), dtime, + atomic_read(&gl->gl_ail_count), + atomic_read(&gl->gl_ref)); - print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); - print_dbg(gi, " gl_flags ="); - for (x = 0; x < 32; x++) { - if (test_bit(x, &gl->gl_flags)) - print_dbg(gi, " %u", x); - } - if (!test_bit(GLF_LOCK, &gl->gl_flags)) - print_dbg(gi, " (unlocked)"); - print_dbg(gi, " \n"); - print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); - print_dbg(gi, " gl_state = %u\n", gl->gl_state); - if (gl->gl_owner_pid) { - gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID); - if (gl_owner) - print_dbg(gi, " gl_owner = pid %d (%s)\n", - pid_nr(gl->gl_owner_pid), gl_owner->comm); - else - print_dbg(gi, " gl_owner = %d (ended)\n", - pid_nr(gl->gl_owner_pid)); - } else - print_dbg(gi, " gl_owner = -1\n"); - print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); - print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); - print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); - print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); - print_dbg(gi, " reclaim = %s\n", - (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); - if (gl->gl_aspace) - print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, - gl->gl_aspace->i_mapping->nrpages); - else - print_dbg(gi, " aspace = no\n"); - print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); - if (gl->gl_req_gh) { - error = dump_holder(gi, "Request", gl->gl_req_gh); - if (error) - goto out; - } list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder(gi, "Holder", gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { - error = dump_holder(gi, "Waiter1", gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { - error = dump_holder(gi, "Waiter3", gh); + error = dump_holder(seq, gh); if (error) goto out; } - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { - print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", - gl->gl_demote_state, (unsigned long long) - (jiffies - gl->gl_demote_time)*(1000000/HZ)); - } - if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { - if (!test_bit(GLF_LOCK, &gl->gl_flags) && - list_empty(&gl->gl_holders)) { - error = dump_inode(gi, gl->gl_object); - if (error) - goto out; - } else { - error = -ENOBUFS; - print_dbg(gi, " Inode: busy\n"); - } - } - - error = 0; - + if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) + error = glops->go_dump(seq, gl); out: - spin_unlock(&gl->gl_spin); return error; } +static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +{ + int ret; + spin_lock(&gl->gl_spin); + ret = __dump_glock(seq, gl); + spin_unlock(&gl->gl_spin); + return ret; +} + /** * gfs2_dump_lockstate - print out the current lockstate * @sdp: the filesystem @@ -2023,34 +1749,6 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) return error; } -/** - * gfs2_scand - Look for cached glocks and inodes to toss from memory - * @sdp: Pointer to GFS2 superblock - * - * One of these daemons runs, finding candidates to add to sd_reclaim_list. - * See gfs2_glockd() - */ - -static int gfs2_scand(void *data) -{ - unsigned x; - unsigned delay; - - while (!kthread_should_stop()) { - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(scan_glock, NULL, x); - if (freezing(current)) - refrigerator(); - delay = scand_secs; - if (delay < 1) - delay = 1; - schedule_timeout_interruptible(delay * HZ); - } - - return 0; -} - - int __init gfs2_glock_init(void) { @@ -2064,29 +1762,28 @@ int __init gfs2_glock_init(void) } #endif - scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand"); - if (IS_ERR(scand_process)) - return PTR_ERR(scand_process); - glock_workqueue = create_workqueue("glock_workqueue"); - if (IS_ERR(glock_workqueue)) { - kthread_stop(scand_process); + if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); + gfs2_delete_workqueue = create_workqueue("delete_workqueue"); + if (IS_ERR(gfs2_delete_workqueue)) { + destroy_workqueue(glock_workqueue); + return PTR_ERR(gfs2_delete_workqueue); } + register_shrinker(&glock_shrinker); + return 0; } void gfs2_glock_exit(void) { + unregister_shrinker(&glock_shrinker); destroy_workqueue(glock_workqueue); - kthread_stop(scand_process); + destroy_workqueue(gfs2_delete_workqueue); } -module_param(scand_secs, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); - -static int gfs2_glock_iter_next(struct glock_iter *gi) +static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) { struct gfs2_glock *gl; @@ -2096,15 +1793,17 @@ restart: if (gl) { gi->gl = hlist_entry(gl->gl_list.next, struct gfs2_glock, gl_list); - if (gi->gl) - gfs2_glock_hold(gi->gl); + } else { + gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, + struct gfs2_glock, gl_list); } + if (gi->gl) + gfs2_glock_hold(gi->gl); read_unlock(gl_lock_addr(gi->hash)); if (gl) gfs2_glock_put(gl); - if (gl && gi->gl == NULL) + while (gi->gl == NULL) { gi->hash++; - while(gi->gl == NULL) { if (gi->hash >= GFS2_GL_HASH_SIZE) return 1; read_lock(gl_lock_addr(gi->hash)); @@ -2113,7 +1812,6 @@ restart: if (gi->gl) gfs2_glock_hold(gi->gl); read_unlock(gl_lock_addr(gi->hash)); - gi->hash++; } if (gi->sdp != gi->gl->gl_sbd) @@ -2122,58 +1820,34 @@ restart: return 0; } -static void gfs2_glock_iter_free(struct glock_iter *gi) +static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi) { if (gi->gl) gfs2_glock_put(gi->gl); - kfree(gi); -} - -static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) -{ - struct glock_iter *gi; - - gi = kmalloc(sizeof (*gi), GFP_KERNEL); - if (!gi) - return NULL; - - gi->sdp = sdp; - gi->hash = 0; - gi->seq = NULL; gi->gl = NULL; - memset(gi->string, 0, sizeof(gi->string)); - - if (gfs2_glock_iter_next(gi)) { - gfs2_glock_iter_free(gi); - return NULL; - } - - return gi; } -static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) +static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) { - struct glock_iter *gi; + struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; - gi = gfs2_glock_iter_init(file->private); - if (!gi) - return NULL; + gi->hash = 0; - while(n--) { + do { if (gfs2_glock_iter_next(gi)) { gfs2_glock_iter_free(gi); return NULL; } - } + } while (n--); - return gi; + return gi->gl; } -static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, +static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos) { - struct glock_iter *gi = iter_ptr; + struct gfs2_glock_iter *gi = seq->private; (*pos)++; @@ -2182,24 +1856,18 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, return NULL; } - return gi; + return gi->gl; } -static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) +static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) { - struct glock_iter *gi = iter_ptr; - if (gi) - gfs2_glock_iter_free(gi); + struct gfs2_glock_iter *gi = seq->private; + gfs2_glock_iter_free(gi); } -static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) +static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { - struct glock_iter *gi = iter_ptr; - - gi->seq = file; - dump_glock(gi, gi->gl); - - return 0; + return dump_glock(seq, iter_ptr); } static const struct seq_operations gfs2_glock_seq_ops = { @@ -2211,17 +1879,14 @@ static const struct seq_operations gfs2_glock_seq_ops = { static int gfs2_debugfs_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int ret; - - ret = seq_open(file, &gfs2_glock_seq_ops); - if (ret) - return ret; - - seq = file->private_data; - seq->private = inode->i_private; - - return 0; + int ret = seq_open_private(file, &gfs2_glock_seq_ops, + sizeof(struct gfs2_glock_iter)); + if (ret == 0) { + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; + } + return ret; } static const struct file_operations gfs2_debug_fops = { @@ -2229,7 +1894,7 @@ static const struct file_operations gfs2_debug_fops = { .open = gfs2_debugfs_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release + .release = seq_release_private, }; int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)