X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=fs%2Fgfs2%2Fglock.c;h=80e09c50590a52ed1a6e77c47ea59459da4b1bde;hb=4c5680177012a2b5c0f3fdf58f4375dd84a1da67;hp=b348053c436351754200e7f39b825eb64d57013a;hpb=94610610f10749c0e17b4d2840ff8a7cb636c413;p=safe%2Fjmp%2Flinux-2.6 diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b348053..80e09c5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -15,13 +15,22 @@ #include #include #include -#include #include #include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include #include "gfs2.h" -#include "lm_interface.h" #include "incore.h" #include "glock.h" #include "glops.h" @@ -33,31 +42,38 @@ #include "super.h" #include "util.h" -/* Must be kept in sync with the beginning of struct gfs2_glock */ -struct glock_plug { - struct list_head gl_list; - unsigned long gl_flags; -}; - -struct greedy { - struct gfs2_holder gr_gh; - struct work_struct gr_work; +struct gfs2_gl_hash_bucket { + struct hlist_head hb_list; }; -struct gfs2_gl_hash_bucket { - struct list_head hb_list; +struct glock_iter { + int hash; /* hash bucket index */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + struct seq_file *seq; /* sequence file for debugfs */ + char string[512]; /* scratch space */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); -static int dump_glock(struct gfs2_glock *gl); - -#define GFS2_GL_HASH_SHIFT 13 +static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); +static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); +static void gfs2_glock_drop_th(struct gfs2_glock *gl); +static void run_queue(struct gfs2_glock *gl); + +static DECLARE_RWSEM(gfs2_umount_flush_sem); +static struct dentry *gfs2_root; +static struct task_struct *scand_process; +static unsigned int scand_secs = 5; +static struct workqueue_struct *glock_workqueue; + +#define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; +static struct dentry *gfs2_root; /* * Despite what you might think, the numbers below are not arbitrary :-) @@ -101,7 +117,7 @@ static inline rwlock_t *gl_lock_addr(unsigned int x) return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)]; } #else /* not SMP, so no spinlocks required */ -static inline rwlock_t *gl_lock_addr(x) +static inline rwlock_t *gl_lock_addr(unsigned int x) { return NULL; } @@ -183,23 +199,7 @@ static void glock_free(struct gfs2_glock *gl) void gfs2_glock_hold(struct gfs2_glock *gl) { - kref_get(&gl->gl_ref); -} - -/* All work is done after the return from kref_put() so we - can release the write_lock before the free. */ - -static void kill_glock(struct kref *kref) -{ - struct gfs2_glock *gl = container_of(kref, struct gfs2_glock, gl_ref); - struct gfs2_sbd *sdp = gl->gl_sbd; - - gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); - gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); - gfs2_assert(sdp, list_empty(&gl->gl_holders)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters2)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); + atomic_inc(&gl->gl_ref); } /** @@ -211,12 +211,17 @@ static void kill_glock(struct kref *kref) int gfs2_glock_put(struct gfs2_glock *gl) { int rv = 0; + struct gfs2_sbd *sdp = gl->gl_sbd; write_lock(gl_lock_addr(gl->gl_hash)); - if (kref_put(&gl->gl_ref, kill_glock)) { - list_del_init(&gl_hash_table[gl->gl_hash].hb_list); + if (atomic_dec_and_test(&gl->gl_ref)) { + hlist_del(&gl->gl_list); write_unlock(gl_lock_addr(gl->gl_hash)); - BUG_ON(spin_is_locked(&gl->gl_spin)); + gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED); + gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); + gfs2_assert(sdp, list_empty(&gl->gl_holders)); + gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); + gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); glock_free(gl); rv = 1; goto out; @@ -227,30 +232,6 @@ out: } /** - * queue_empty - check to see if a glock's queue is empty - * @gl: the glock - * @head: the head of the queue to check - * - * This function protects the list in the event that a process already - * has a holder on the list and is adding a second holder for itself. - * The glmutex lock is what generally prevents processes from working - * on the same glock at once, but the special case of adding a second - * holder for yourself ("recursive" locking) doesn't involve locking - * glmutex, making the spin lock necessary. - * - * Returns: 1 if the queue is empty - */ - -static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head) -{ - int empty; - spin_lock(&gl->gl_spin); - empty = list_empty(head); - spin_unlock(&gl->gl_spin); - return empty; -} - -/** * search_bucket() - Find struct gfs2_glock by lock number * @bucket: the bucket to search * @name: The lock name @@ -263,16 +244,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash, const struct lm_lockname *name) { struct gfs2_glock *gl; + struct hlist_node *h; - list_for_each_entry(gl, &gl_hash_table[hash].hb_list, gl_list) { - if (test_bit(GLF_PLUG, &gl->gl_flags)) - continue; + hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { if (!lm_name_equal(&gl->gl_name, name)) continue; if (gl->gl_sbd != sdp) continue; - kref_get(&gl->gl_ref); + atomic_inc(&gl->gl_ref); return gl; } @@ -301,6 +281,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, return gl; } +static void glock_work_func(struct work_struct *work) +{ + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + + spin_lock(&gl->gl_spin); + if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) + set_bit(GLF_DEMOTE, &gl->gl_flags); + run_queue(gl); + spin_unlock(&gl->gl_spin); + gfs2_glock_put(gl); +} + /** * gfs2_glock_get() - Get a glock, or create one if one doesn't exist * @sdp: The GFS2 superblock @@ -338,20 +330,22 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_flags = 0; gl->gl_name = name; - kref_init(&gl->gl_ref); + atomic_set(&gl->gl_ref, 1); gl->gl_state = LM_ST_UNLOCKED; + gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_hash = hash; - gl->gl_owner = NULL; + gl->gl_owner_pid = 0; gl->gl_ip = 0; gl->gl_ops = glops; gl->gl_req_gh = NULL; gl->gl_req_bh = NULL; gl->gl_vn = 0; gl->gl_stamp = jiffies; + gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; gl->gl_aspace = NULL; - lops_init_le(&gl->gl_le, &gfs2_glock_lops); + INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); /* If this glock protects actual on-disk data or metadata blocks, create a VFS inode to manage the pages/buffers holding them. */ @@ -374,7 +368,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, glock_free(gl); gl = tmp; } else { - list_add_tail(&gl->gl_list, &gl_hash_table[hash].hb_list); + hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); write_unlock(gl_lock_addr(hash)); } @@ -386,7 +380,7 @@ fail_aspace: if (gl->gl_aspace) gfs2_aspace_put(gl->gl_aspace); fail: - kmem_cache_free(gfs2_glock_cachep, gl); + kmem_cache_free(gfs2_glock_cachep, gl); return error; } @@ -405,16 +399,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gl; gh->gh_ip = (unsigned long)__builtin_return_address(0); - gh->gh_owner = current; + gh->gh_owner_pid = current->pid; gh->gh_state = state; gh->gh_flags = flags; gh->gh_error = 0; gh->gh_iflags = 0; - init_completion(&gh->gh_wait); - - if (gh->gh_state == LM_ST_EXCLUSIVE) - gh->gh_flags |= GL_LOCAL_EXCL; - gfs2_glock_hold(gl); } @@ -432,10 +421,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * { gh->gh_state = state; gh->gh_flags = flags; - if (gh->gh_state == LM_ST_EXCLUSIVE) - gh->gh_flags |= GL_LOCAL_EXCL; - - gh->gh_iflags &= 1 << HIF_ALLOCED; + gh->gh_iflags = 0; gh->gh_ip = (unsigned long)__builtin_return_address(0); } @@ -452,46 +438,37 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) gh->gh_ip = 0; } -/** - * gfs2_holder_get - get a struct gfs2_holder structure - * @gl: the glock - * @state: the state we're requesting - * @flags: the modifier flags - * @gfp_flags: - * - * Figure out how big an impact this function has. Either: - * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd - * 2) Leave it like it is - * - * Returns: the holder structure, NULL on ENOMEM - */ - -static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, - unsigned int state, - int flags, gfp_t gfp_flags) +static void gfs2_holder_wake(struct gfs2_holder *gh) { - struct gfs2_holder *gh; + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb__after_clear_bit(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); +} - gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags); - if (!gh) - return NULL; +static int just_schedule(void *word) +{ + schedule(); + return 0; +} - gfs2_holder_init(gl, state, flags, gh); - set_bit(HIF_ALLOCED, &gh->gh_iflags); - gh->gh_ip = (unsigned long)__builtin_return_address(0); - return gh; +static void wait_on_holder(struct gfs2_holder *gh) +{ + might_sleep(); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); } -/** - * gfs2_holder_put - get rid of a struct gfs2_holder structure - * @gh: the holder structure - * - */ +static void gfs2_demote_wake(struct gfs2_glock *gl) +{ + gl->gl_demote_state = LM_ST_EXCLUSIVE; + clear_bit(GLF_DEMOTE, &gl->gl_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&gl->gl_flags, GLF_DEMOTE); +} -static void gfs2_holder_put(struct gfs2_holder *gh) +static void wait_on_demote(struct gfs2_glock *gl) { - gfs2_holder_uninit(gh); - kfree(gh); + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); } /** @@ -508,7 +485,9 @@ static int rq_mutex(struct gfs2_holder *gh) list_del_init(&gh->gh_list); /* gh->gh_error never examined. */ set_bit(GLF_LOCK, &gl->gl_flags); - complete(&gh->gh_wait); + clear_bit(HIF_WAIT, &gh->gh_iflags); + smp_mb(); + wake_up_bit(&gh->gh_iflags, HIF_WAIT); return 1; } @@ -525,23 +504,13 @@ static int rq_mutex(struct gfs2_holder *gh) static int rq_promote(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_sbd; - const struct gfs2_glock_operations *glops = gl->gl_ops; if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { if (list_empty(&gl->gl_holders)) { gl->gl_req_gh = gh; set_bit(GLF_LOCK, &gl->gl_flags); spin_unlock(&gl->gl_spin); - - if (atomic_read(&sdp->sd_reclaim_count) > - gfs2_tune_get(sdp, gt_reclaim_limit) && - !(gh->gh_flags & LM_FLAG_PRIORITY)) { - gfs2_reclaim_glock(sdp); - gfs2_reclaim_glock(sdp); - } - - glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); + gfs2_glock_xmote_th(gh->gh_gl, gh); spin_lock(&gl->gl_spin); } return 1; @@ -552,11 +521,11 @@ static int rq_promote(struct gfs2_holder *gh) set_bit(GLF_LOCK, &gl->gl_flags); } else { struct gfs2_holder *next_gh; - if (gh->gh_flags & GL_LOCAL_EXCL) + if (gh->gh_state == LM_ST_EXCLUSIVE) return 1; next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); - if (next_gh->gh_flags & GL_LOCAL_EXCL) + if (next_gh->gh_state == LM_ST_EXCLUSIVE) return 1; } @@ -564,7 +533,7 @@ static int rq_promote(struct gfs2_holder *gh) gh->gh_error = 0; set_bit(HIF_HOLDER, &gh->gh_iflags); - complete(&gh->gh_wait); + gfs2_holder_wake(gh); return 0; } @@ -576,60 +545,31 @@ static int rq_promote(struct gfs2_holder *gh) * Returns: 1 if the queue is blocked */ -static int rq_demote(struct gfs2_holder *gh) +static int rq_demote(struct gfs2_glock *gl) { - struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; - if (!list_empty(&gl->gl_holders)) return 1; - if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) { - list_del_init(&gh->gh_list); - gh->gh_error = 0; + if (gl->gl_state == gl->gl_demote_state || + gl->gl_state == LM_ST_UNLOCKED) { + gfs2_demote_wake(gl); + return 0; + } + + set_bit(GLF_LOCK, &gl->gl_flags); + set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); + + if (gl->gl_demote_state == LM_ST_UNLOCKED || + gl->gl_state != LM_ST_EXCLUSIVE) { spin_unlock(&gl->gl_spin); - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); - spin_lock(&gl->gl_spin); + gfs2_glock_drop_th(gl); } else { - gl->gl_req_gh = gh; - set_bit(GLF_LOCK, &gl->gl_flags); spin_unlock(&gl->gl_spin); - - if (gh->gh_state == LM_ST_UNLOCKED || - gl->gl_state != LM_ST_EXCLUSIVE) - glops->go_drop_th(gl); - else - glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags); - - spin_lock(&gl->gl_spin); + gfs2_glock_xmote_th(gl, NULL); } - return 0; -} - -/** - * rq_greedy - process a queued request to drop greedy status - * @gh: the glock holder - * - * Returns: 1 if the queue is blocked - */ - -static int rq_greedy(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - list_del_init(&gh->gh_list); - /* gh->gh_error never examined. */ - clear_bit(GLF_GREEDY, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - gfs2_holder_uninit(gh); - kfree(container_of(gh, struct greedy, gr_gh)); - - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_spin); + clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); return 0; } @@ -651,33 +591,18 @@ static void run_queue(struct gfs2_glock *gl) if (!list_empty(&gl->gl_waiters1)) { gh = list_entry(gl->gl_waiters1.next, struct gfs2_holder, gh_list); - - if (test_bit(HIF_MUTEX, &gh->gh_iflags)) - blocked = rq_mutex(gh); - else - gfs2_assert_warn(gl->gl_sbd, 0); - - } else if (!list_empty(&gl->gl_waiters2) && - !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) { - gh = list_entry(gl->gl_waiters2.next, - struct gfs2_holder, gh_list); - - if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) - blocked = rq_demote(gh); - else if (test_bit(HIF_GREEDY, &gh->gh_iflags)) - blocked = rq_greedy(gh); - else - gfs2_assert_warn(gl->gl_sbd, 0); - + blocked = rq_mutex(gh); + } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + blocked = rq_demote(gl); + if (gl->gl_waiters2 && !blocked) { + set_bit(GLF_DEMOTE, &gl->gl_flags); + gl->gl_demote_state = LM_ST_UNLOCKED; + } + gl->gl_waiters2 = 0; } else if (!list_empty(&gl->gl_waiters3)) { gh = list_entry(gl->gl_waiters3.next, struct gfs2_holder, gh_list); - - if (test_bit(HIF_PROMOTE, &gh->gh_iflags)) - blocked = rq_promote(gh); - else - gfs2_assert_warn(gl->gl_sbd, 0); - + blocked = rq_promote(gh); } else break; @@ -695,23 +620,21 @@ static void run_queue(struct gfs2_glock *gl) static void gfs2_glmutex_lock(struct gfs2_glock *gl) { - struct gfs2_holder gh; - - gfs2_holder_init(gl, 0, 0, &gh); - set_bit(HIF_MUTEX, &gh.gh_iflags); - spin_lock(&gl->gl_spin); if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + struct gfs2_holder gh; + + gfs2_holder_init(gl, 0, 0, &gh); + set_bit(HIF_WAIT, &gh.gh_iflags); list_add_tail(&gh.gh_list, &gl->gl_waiters1); + spin_unlock(&gl->gl_spin); + wait_on_holder(&gh); + gfs2_holder_uninit(&gh); } else { - gl->gl_owner = current; + gl->gl_owner_pid = current->pid; gl->gl_ip = (unsigned long)__builtin_return_address(0); - complete(&gh.gh_wait); + spin_unlock(&gl->gl_spin); } - spin_unlock(&gl->gl_spin); - - wait_for_completion(&gh.gh_wait); - gfs2_holder_uninit(&gh); } /** @@ -729,7 +652,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { acquired = 0; } else { - gl->gl_owner = current; + gl->gl_owner_pid = current->pid; gl->gl_ip = (unsigned long)__builtin_return_address(0); } spin_unlock(&gl->gl_spin); @@ -747,72 +670,45 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) { spin_lock(&gl->gl_spin); clear_bit(GLF_LOCK, &gl->gl_flags); - gl->gl_owner = NULL; + gl->gl_owner_pid = 0; gl->gl_ip = 0; run_queue(gl); - BUG_ON(!spin_is_locked(&gl->gl_spin)); spin_unlock(&gl->gl_spin); } /** - * handle_callback - add a demote request to a lock's queue + * handle_callback - process a demote request * @gl: the glock * @state: the state the caller wants us to change to * - * Note: This may fail sliently if we are out of memory. + * There are only two requests that we are going to see in actual + * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ -static void handle_callback(struct gfs2_glock *gl, unsigned int state) +static void handle_callback(struct gfs2_glock *gl, unsigned int state, + int remote, unsigned long delay) { - struct gfs2_holder *gh, *new_gh = NULL; + int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; -restart: spin_lock(&gl->gl_spin); - - list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { - if (test_bit(HIF_DEMOTE, &gh->gh_iflags) && - gl->gl_req_gh != gh) { - if (gh->gh_state != state) - gh->gh_state = LM_ST_UNLOCKED; - goto out; - } - } - - if (new_gh) { - list_add_tail(&new_gh->gh_list, &gl->gl_waiters2); - new_gh = NULL; - } else { - spin_unlock(&gl->gl_spin); - - new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL); - if (!new_gh) + set_bit(bit, &gl->gl_flags); + if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { + gl->gl_demote_state = state; + gl->gl_demote_time = jiffies; + if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && + gl->gl_object) { + gfs2_glock_schedule_for_reclaim(gl); + spin_unlock(&gl->gl_spin); return; - set_bit(HIF_DEMOTE, &new_gh->gh_iflags); - set_bit(HIF_DEALLOC, &new_gh->gh_iflags); - - goto restart; + } + } else if (gl->gl_demote_state != LM_ST_UNLOCKED && + gl->gl_demote_state != state) { + if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) + gl->gl_waiters2 = 1; + else + gl->gl_demote_state = LM_ST_UNLOCKED; } - -out: - spin_unlock(&gl->gl_spin); - - if (new_gh) - gfs2_holder_put(new_gh); -} - -void gfs2_glock_inode_squish(struct inode *inode) -{ - struct gfs2_holder gh; - struct gfs2_glock *gl = GFS2_I(inode)->i_gl; - gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh); - set_bit(HIF_DEMOTE, &gh.gh_iflags); - spin_lock(&gl->gl_spin); - gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders)); - list_add_tail(&gh.gh_list, &gl->gl_waiters2); - run_queue(gl); spin_unlock(&gl->gl_spin); - wait_for_completion(&gh.gh_wait); - gfs2_holder_uninit(&gh); } /** @@ -837,6 +733,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) } gl->gl_state = new_state; + gl->gl_tchange = jiffies; } /** @@ -855,73 +752,64 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) int op_done = 1; gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); state_change(gl, ret & LM_OUT_ST_MASK); if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) { if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA | DIO_DATA); + glops->go_inval(gl, DIO_METADATA); } else if (gl->gl_state == LM_ST_DEFERRED) { /* We might not want to do this here. Look at moving to the inode glops. */ if (glops->go_inval) - glops->go_inval(gl, DIO_DATA); + glops->go_inval(gl, 0); } /* Deal with each possible exit condition */ - if (!gh) + if (!gh) { gl->gl_stamp = jiffies; - else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - gh->gh_error = -EIO; - spin_unlock(&gl->gl_spin); - } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - if (gl->gl_state == gh->gh_state || - gl->gl_state == LM_ST_UNLOCKED) { - gh->gh_error = 0; + if (ret & LM_OUT_CANCELED) { + op_done = 0; } else { - if (gfs2_assert_warn(sdp, gh->gh_flags & - (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1) - fs_warn(sdp, "ret = 0x%.8X\n", ret); - gh->gh_error = GLR_TRYFAILED; + spin_lock(&gl->gl_spin); + if (gl->gl_state != gl->gl_demote_state) { + gl->gl_req_bh = NULL; + spin_unlock(&gl->gl_spin); + gfs2_glock_drop_th(gl); + gfs2_glock_put(gl); + return; + } + gfs2_demote_wake(gl); + spin_unlock(&gl->gl_spin); } - spin_unlock(&gl->gl_spin); - - if (ret & LM_OUT_CANCELED) - handle_callback(gl, LM_ST_UNLOCKED); - - } else if (ret & LM_OUT_CANCELED) { + } else { spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); + gh->gh_error = -EIO; + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + goto out; gh->gh_error = GLR_CANCELED; - spin_unlock(&gl->gl_spin); - - } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - spin_lock(&gl->gl_spin); - list_move_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - spin_unlock(&gl->gl_spin); - - set_bit(HIF_FIRST, &gh->gh_iflags); - - op_done = 0; - - } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); + if (ret & LM_OUT_CANCELED) + goto out; + if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { + list_add_tail(&gh->gh_list, &gl->gl_holders); + gh->gh_error = 0; + set_bit(HIF_HOLDER, &gh->gh_iflags); + set_bit(HIF_FIRST, &gh->gh_iflags); + op_done = 0; + goto out; + } gh->gh_error = GLR_TRYFAILED; - spin_unlock(&gl->gl_spin); - - } else { + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + goto out; + gh->gh_error = -EINVAL; if (gfs2_assert_withdraw(sdp, 0) == -1) fs_err(sdp, "ret = 0x%.8X\n", ret); +out: + spin_unlock(&gl->gl_spin); } if (glops->go_xmote_bh) @@ -932,18 +820,13 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) gl->gl_req_gh = NULL; gl->gl_req_bh = NULL; clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); spin_unlock(&gl->gl_spin); } gfs2_glock_put(gl); - if (gh) { - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); - } + if (gh) + gfs2_holder_wake(gh); } /** @@ -954,23 +837,25 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags) +static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) { struct gfs2_sbd *sdp = gl->gl_sbd; + int flags = gh ? gh->gh_flags : 0; + unsigned state = gh ? gh->gh_state : gl->gl_demote_state; const struct gfs2_glock_operations *glops = gl->gl_ops; int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_ANY | LM_FLAG_PRIORITY); unsigned int lck_ret; + if (glops->go_xmote_th) + glops->go_xmote_th(gl); + gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED); gfs2_assert_warn(sdp, state != gl->gl_state); - if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE); - gfs2_glock_hold(gl); gl->gl_req_bh = xmote_bh; @@ -1001,16 +886,14 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh = gl->gl_req_gh; - clear_bit(GLF_PREFETCH, &gl->gl_flags); - gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); if (glops->go_inval) - glops->go_inval(gl, DIO_METADATA | DIO_DATA); + glops->go_inval(gl, DIO_METADATA); if (gh) { spin_lock(&gl->gl_spin); @@ -1019,24 +902,17 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) spin_unlock(&gl->gl_spin); } - if (glops->go_drop_bh) - glops->go_drop_bh(gl); - spin_lock(&gl->gl_spin); + gfs2_demote_wake(gl); gl->gl_req_gh = NULL; gl->gl_req_bh = NULL; clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); spin_unlock(&gl->gl_spin); gfs2_glock_put(gl); - if (gh) { - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) - gfs2_holder_put(gh); - else - complete(&gh->gh_wait); - } + if (gh) + gfs2_holder_wake(gh); } /** @@ -1045,19 +921,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_drop_th(struct gfs2_glock *gl) +static void gfs2_glock_drop_th(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned int ret; + if (glops->go_xmote_th) + glops->go_xmote_th(gl); + gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); - gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders)); + gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); - if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync) - glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE); - gfs2_glock_hold(gl); gl->gl_req_bh = drop_bh; @@ -1137,8 +1013,7 @@ static int glock_wait_internal(struct gfs2_holder *gh) if (gh->gh_flags & LM_FLAG_PRIORITY) do_cancels(gh); - wait_for_completion(&gh->gh_wait); - + wait_on_holder(gh); if (gh->gh_error) return gh->gh_error; @@ -1170,18 +1045,32 @@ static int glock_wait_internal(struct gfs2_holder *gh) } static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, struct task_struct *owner) +find_holder_by_owner(struct list_head *head, pid_t pid) { struct gfs2_holder *gh; list_for_each_entry(gh, head, gh_list) { - if (gh->gh_owner == owner) + if (gh->gh_owner_pid == pid) return gh; } return NULL; } +static void print_dbg(struct glock_iter *gi, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + if (gi) { + vsprintf(gi->string, fmt, args); + seq_printf(gi->seq, gi->string); + } + else + vprintk(fmt, args); + va_end(args); +} + /** * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add @@ -1193,32 +1082,41 @@ static void add_to_queue(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_holder *existing; - BUG_ON(!gh->gh_owner); - - existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid); - printk(KERN_INFO "lock type : %d lock state : %d\n", - existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid); - printk(KERN_INFO "lock type : %d lock state : %d\n", - gl->gl_name.ln_type, gl->gl_state); + BUG_ON(!gh->gh_owner_pid); + if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); - } - existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - BUG(); + if (!(gh->gh_flags & GL_FLOCK)) { + existing = find_holder_by_owner(&gl->gl_holders, + gh->gh_owner_pid); + if (existing) { + print_symbol(KERN_WARNING "original: %s\n", + existing->gh_ip); + printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); + printk(KERN_INFO "lock type : %d lock state : %d\n", + existing->gh_gl->gl_name.ln_type, + existing->gh_gl->gl_state); + print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); + printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); + printk(KERN_INFO "lock type : %d lock state : %d\n", + gl->gl_name.ln_type, gl->gl_state); + BUG(); + } + + existing = find_holder_by_owner(&gl->gl_waiters3, + gh->gh_owner_pid); + if (existing) { + print_symbol(KERN_WARNING "original: %s\n", + existing->gh_ip); + print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); + BUG(); + } } if (gh->gh_flags & LM_FLAG_PRIORITY) list_add(&gh->gh_list, &gl->gl_waiters3); else - list_add_tail(&gh->gh_list, &gl->gl_waiters3); + list_add_tail(&gh->gh_list, &gl->gl_waiters3); } /** @@ -1242,8 +1140,6 @@ restart: return -EIO; } - set_bit(HIF_PROMOTE, &gh->gh_iflags); - spin_lock(&gl->gl_spin); add_to_queue(gh); run_queue(gl); @@ -1257,11 +1153,6 @@ restart: } } - clear_bit(GLF_PREFETCH, &gl->gl_flags); - - if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP)) - dump_glock(gl); - return error; } @@ -1328,9 +1219,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; + unsigned delay = 0; if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); gfs2_glmutex_lock(gl); @@ -1338,111 +1230,30 @@ void gfs2_glock_dq(struct gfs2_holder *gh) list_del_init(&gh->gh_list); if (list_empty(&gl->gl_holders)) { - spin_unlock(&gl->gl_spin); - - if (glops->go_unlock) + if (glops->go_unlock) { + spin_unlock(&gl->gl_spin); glops->go_unlock(gh); - + spin_lock(&gl->gl_spin); + } gl->gl_stamp = jiffies; - - spin_lock(&gl->gl_spin); } clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); spin_unlock(&gl->gl_spin); -} - -/** - * gfs2_glock_prefetch - Try to prefetch a glock - * @gl: the glock - * @state: the state to prefetch in - * @flags: flags passed to go_xmote_th() - * - */ - -static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, - int flags) -{ - const struct gfs2_glock_operations *glops = gl->gl_ops; - - spin_lock(&gl->gl_spin); - - if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) || - !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) || - !list_empty(&gl->gl_waiters3) || - relaxed_state_ok(gl->gl_state, state, flags)) { - spin_unlock(&gl->gl_spin); - return; - } - set_bit(GLF_PREFETCH, &gl->gl_flags); - set_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - glops->go_xmote_th(gl, state, flags); -} - -static void greedy_work(void *data) -{ - struct greedy *gr = data; - struct gfs2_holder *gh = &gr->gr_gh; - struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; - - clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); - - if (glops->go_greedy) - glops->go_greedy(gl); - - spin_lock(&gl->gl_spin); - - if (list_empty(&gl->gl_waiters2)) { - clear_bit(GLF_GREEDY, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - gfs2_holder_uninit(gh); - kfree(gr); - } else { - gfs2_glock_hold(gl); - list_add_tail(&gh->gh_list, &gl->gl_waiters2); - run_queue(gl); - spin_unlock(&gl->gl_spin); + gfs2_glock_hold(gl); + if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + delay = gl->gl_ops->go_min_hold_time; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); - } } -/** - * gfs2_glock_be_greedy - - * @gl: - * @time: - * - * Returns: 0 if go_greedy will be called, 1 otherwise - */ - -int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time) +void gfs2_glock_dq_wait(struct gfs2_holder *gh) { - struct greedy *gr; - struct gfs2_holder *gh; - - if (!time || gl->gl_sbd->sd_args.ar_localcaching || - test_and_set_bit(GLF_GREEDY, &gl->gl_flags)) - return 1; - - gr = kmalloc(sizeof(struct greedy), GFP_KERNEL); - if (!gr) { - clear_bit(GLF_GREEDY, &gl->gl_flags); - return 1; - } - gh = &gr->gr_gh; - - gfs2_holder_init(gl, 0, 0, gh); - set_bit(HIF_GREEDY, &gh->gh_iflags); - INIT_WORK(&gr->gr_work, greedy_work, gr); - - set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags); - schedule_delayed_work(&gr->gr_work, time); - - return 0; + struct gfs2_glock *gl = gh->gh_gl; + gfs2_glock_dq(gh); + wait_on_demote(gl); } /** @@ -1503,10 +1314,7 @@ static int glock_compare(const void *arg_a, const void *arg_b) return 1; if (a->ln_number < b->ln_number) return -1; - if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE) - return 1; - if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL)) - return 1; + BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type); return 0; } @@ -1549,10 +1357,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * - * Figure out how big an impact this function has. Either: - * 1) Replace this code with code that calls gfs2_glock_prefetch() - * 2) Forget async stuff and just call nq_m_sync() - * 3) Leave it like it is * * Returns: 0 on success (all glocks acquired), * errno on failure (no glocks acquired) @@ -1560,62 +1364,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) { - int *e; - unsigned int x; - int borked = 0, serious = 0; + struct gfs2_holder *tmp[4]; + struct gfs2_holder **pph = tmp; int error = 0; - if (!num_gh) + switch(num_gh) { + case 0: return 0; - - if (num_gh == 1) { + case 1: ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); return gfs2_glock_nq(ghs); - } - - e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL); - if (!e) - return -ENOMEM; - - for (x = 0; x < num_gh; x++) { - ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC; - error = gfs2_glock_nq(&ghs[x]); - if (error) { - borked = 1; - serious = error; - num_gh = x; + default: + if (num_gh <= 4) break; - } + pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS); + if (!pph) + return -ENOMEM; } - for (x = 0; x < num_gh; x++) { - error = e[x] = glock_wait_internal(&ghs[x]); - if (error) { - borked = 1; - if (error != GLR_TRYFAILED && error != GLR_CANCELED) - serious = error; - } - } + error = nq_m_sync(num_gh, ghs, pph); - if (!borked) { - kfree(e); - return 0; - } - - for (x = 0; x < num_gh; x++) - if (!e[x]) - gfs2_glock_dq(&ghs[x]); - - if (serious) - error = serious; - else { - for (x = 0; x < num_gh; x++) - gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags, - &ghs[x]); - error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e); - } - - kfree(e); + if (pph != tmp) + kfree(pph); return error; } @@ -1651,34 +1421,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) } /** - * gfs2_glock_prefetch_num - prefetch a glock based on lock number - * @sdp: the filesystem - * @number: the lock number - * @glops: the glock operations for the type of glock - * @state: the state to acquire the glock in - * @flags: modifier flags for the aquisition - * - * Returns: errno - */ - -void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number, - const struct gfs2_glock_operations *glops, - unsigned int state, int flags) -{ - struct gfs2_glock *gl; - int error; - - if (atomic_read(&sdp->sd_reclaim_count) < - gfs2_tune_get(sdp, gt_reclaim_limit)) { - error = gfs2_glock_get(sdp, number, glops, CREATE, &gl); - if (!error) { - gfs2_glock_prefetch(gl, state, flags); - gfs2_glock_put(gl); - } - } -} - -/** * gfs2_lvb_hold - attach a LVB from a glock * @gl: The glock in question * @@ -1731,20 +1473,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, unsigned int state) { struct gfs2_glock *gl; + unsigned long delay = 0; + unsigned long holdtime; + unsigned long now = jiffies; gl = gfs2_glock_find(sdp, name); if (!gl) return; - if (gl->gl_ops->go_callback) - gl->gl_ops->go_callback(gl, state); - handle_callback(gl, state); - - spin_lock(&gl->gl_spin); - run_queue(gl); - spin_unlock(&gl->gl_spin); + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + if (time_before(now, holdtime)) + delay = holdtime - now; - gfs2_glock_put(gl); + handle_callback(gl, state, 1, delay); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); } /** @@ -1779,12 +1522,15 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) struct lm_async_cb *async = data; struct gfs2_glock *gl; + down_read(&gfs2_umount_flush_sem); gl = gfs2_glock_find(sdp, &async->lc_name); if (gfs2_assert_warn(sdp, gl)) return; if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) gl->gl_req_bh(gl, async->lc_ret); - gfs2_glock_put(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); + up_read(&gfs2_umount_flush_sem); return; } @@ -1814,15 +1560,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) static int demote_ok(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; const struct gfs2_glock_operations *glops = gl->gl_ops; int demote = 1; if (test_bit(GLF_STICKY, &gl->gl_flags)) demote = 0; - else if (test_bit(GLF_PREFETCH, &gl->gl_flags)) - demote = time_after_eq(jiffies, gl->gl_stamp + - gfs2_tune_get(sdp, gt_prefetch_secs) * HZ); else if (glops->go_demote_ok) demote = glops->go_demote_ok(gl); @@ -1878,9 +1620,9 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) atomic_inc(&sdp->sd_reclaimed); if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); gfs2_glmutex_unlock(gl); } @@ -1899,51 +1641,36 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, unsigned int hash) { - struct glock_plug plug; - struct list_head *tmp; - struct gfs2_glock *gl; - int entries; - - /* Add "plug" to end of bucket list, work back up list from there */ - memset(&plug.gl_flags, 0, sizeof(unsigned long)); - set_bit(GLF_PLUG, &plug.gl_flags); - - write_lock(gl_lock_addr(hash)); - list_add(&plug.gl_list, &gl_hash_table[hash].hb_list); - write_unlock(gl_lock_addr(hash)); - - for (;;) { - write_lock(gl_lock_addr(hash)); + struct gfs2_glock *gl, *prev = NULL; + int has_entries = 0; + struct hlist_head *head = &gl_hash_table[hash].hb_list; - for (;;) { - tmp = plug.gl_list.next; - - if (tmp == &gl_hash_table[hash].hb_list) { - list_del(&plug.gl_list); - entries = !list_empty(&gl_hash_table[hash].hb_list); - write_unlock(gl_lock_addr(hash)); - return entries; - } - gl = list_entry(tmp, struct gfs2_glock, gl_list); - - /* Move plug up list */ - list_move(&plug.gl_list, &gl->gl_list); - - if (test_bit(GLF_PLUG, &gl->gl_flags)) - continue; - if (gl->gl_sbd != sdp) - continue; - - /* examiner() must glock_put() */ + read_lock(gl_lock_addr(hash)); + /* Can't use hlist_for_each_entry - don't want prefetch here */ + if (hlist_empty(head)) + goto out; + gl = list_entry(head->first, struct gfs2_glock, gl_list); + while(1) { + if (!sdp || gl->gl_sbd == sdp) { gfs2_glock_hold(gl); - - break; + read_unlock(gl_lock_addr(hash)); + if (prev) + gfs2_glock_put(prev); + prev = gl; + examiner(gl); + has_entries = 1; + read_lock(gl_lock_addr(hash)); } - - write_unlock(gl_lock_addr(hash)); - - examiner(gl); + if (gl->gl_list.next == NULL) + break; + gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list); } +out: + read_unlock(gl_lock_addr(hash)); + if (prev) + gfs2_glock_put(prev); + cond_resched(); + return has_entries; } /** @@ -1954,38 +1681,20 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, static void scan_glock(struct gfs2_glock *gl) { - if (gl->gl_ops == &gfs2_inode_glops) - goto out; + if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) + return; if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && - gl->gl_state != LM_ST_UNLOCKED && - demote_ok(gl)) + if (list_empty(&gl->gl_holders) && + gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) goto out_schedule; gfs2_glmutex_unlock(gl); } -out: - gfs2_glock_put(gl); return; out_schedule: gfs2_glmutex_unlock(gl); gfs2_glock_schedule_for_reclaim(gl); - gfs2_glock_put(gl); -} - -/** - * gfs2_scand_internal - Look for glocks and inodes to toss from memory - * @sdp: the filesystem - * - */ - -void gfs2_scand_internal(struct gfs2_sbd *sdp) -{ - unsigned int x; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(scan_glock, sdp, x); } /** @@ -2011,14 +1720,11 @@ static void clear_glock(struct gfs2_glock *gl) } if (gfs2_glmutex_trylock(gl)) { - if (queue_empty(gl, &gl->gl_holders) && + if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED); - + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); gfs2_glmutex_unlock(gl); } - - gfs2_glock_put(gl); } /** @@ -2040,10 +1746,10 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) for (;;) { cont = 0; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { if (examine_bucket(clear_glock, sdp, x)) cont = 1; + } if (!wait || !cont) break; @@ -2056,7 +1762,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) t = jiffies; } + down_write(&gfs2_umount_flush_sem); invalidate_inodes(sdp->sd_vfs); + up_write(&gfs2_umount_flush_sem); msleep(10); } } @@ -2065,6 +1773,15 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) * Diagnostic routines to help debug distributed deadlock */ +static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, + unsigned long address) +{ + char buffer[KSYM_SYMBOL_LEN]; + + sprint_symbol(buffer, address); + print_dbg(gi, fmt, buffer); +} + /** * dump_holder - print information about a glock holder * @str: a string naming the type of holder @@ -2073,31 +1790,37 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(char *str, struct gfs2_holder *gh) +static int dump_holder(struct glock_iter *gi, char *str, + struct gfs2_holder *gh) { unsigned int x; - int error = -ENOBUFS; - - printk(KERN_INFO " %s\n", str); - printk(KERN_INFO " owner = %ld\n", - (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); - printk(KERN_INFO " gh_state = %u\n", gh->gh_state); - printk(KERN_INFO " gh_flags ="); + struct task_struct *gh_owner; + + print_dbg(gi, " %s\n", str); + if (gh->gh_owner_pid) { + print_dbg(gi, " owner = %ld ", (long)gh->gh_owner_pid); + gh_owner = find_task_by_pid(gh->gh_owner_pid); + if (gh_owner) + print_dbg(gi, "(%s)\n", gh_owner->comm); + else + print_dbg(gi, "(ended)\n"); + } else + print_dbg(gi, " owner = -1\n"); + print_dbg(gi, " gh_state = %u\n", gh->gh_state); + print_dbg(gi, " gh_flags ="); for (x = 0; x < 32; x++) if (gh->gh_flags & (1 << x)) - printk(" %u", x); - printk(" \n"); - printk(KERN_INFO " error = %d\n", gh->gh_error); - printk(KERN_INFO " gh_iflags ="); + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + print_dbg(gi, " error = %d\n", gh->gh_error); + print_dbg(gi, " gh_iflags ="); for (x = 0; x < 32; x++) if (test_bit(x, &gh->gh_iflags)) - printk(" %u", x); - printk(" \n"); - print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); - - error = 0; + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); - return error; + return 0; } /** @@ -2107,25 +1830,21 @@ static int dump_holder(char *str, struct gfs2_holder *gh) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_inode(struct gfs2_inode *ip) +static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) { unsigned int x; - int error = -ENOBUFS; - printk(KERN_INFO " Inode:\n"); - printk(KERN_INFO " num = %llu %llu\n", - (unsigned long long)ip->i_num.no_formal_ino, - (unsigned long long)ip->i_num.no_addr); - printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode)); - printk(KERN_INFO " i_flags ="); + print_dbg(gi, " Inode:\n"); + print_dbg(gi, " num = %llu/%llu\n", + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr); + print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); + print_dbg(gi, " i_flags ="); for (x = 0; x < 32; x++) if (test_bit(x, &ip->i_flags)) - printk(" %u", x); - printk(" \n"); - - error = 0; - - return error; + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + return 0; } /** @@ -2136,74 +1855,84 @@ static int dump_inode(struct gfs2_inode *ip) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_glock(struct gfs2_glock *gl) +static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) { struct gfs2_holder *gh; unsigned int x; int error = -ENOBUFS; + struct task_struct *gl_owner; spin_lock(&gl->gl_spin); - printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); - printk(KERN_INFO " gl_flags ="); + print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + print_dbg(gi, " gl_flags ="); for (x = 0; x < 32; x++) { if (test_bit(x, &gl->gl_flags)) - printk(" %u", x); - } - printk(" \n"); - printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref.refcount)); - printk(KERN_INFO " gl_state = %u\n", gl->gl_state); - printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm); - print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip); - printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); - printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); - printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); - printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no"); - printk(KERN_INFO " le = %s\n", - (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); - printk(KERN_INFO " reclaim = %s\n", - (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); + print_dbg(gi, " %u", x); + } + if (!test_bit(GLF_LOCK, &gl->gl_flags)) + print_dbg(gi, " (unlocked)"); + print_dbg(gi, " \n"); + print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); + print_dbg(gi, " gl_state = %u\n", gl->gl_state); + if (gl->gl_owner_pid) { + gl_owner = find_task_by_pid(gl->gl_owner_pid); + if (gl_owner) + print_dbg(gi, " gl_owner = pid %d (%s)\n", + gl->gl_owner_pid, gl_owner->comm); + else + print_dbg(gi, " gl_owner = %d (ended)\n", + gl->gl_owner_pid); + } else + print_dbg(gi, " gl_owner = -1\n"); + print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); + print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); + print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); + print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); + print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); + print_dbg(gi, " reclaim = %s\n", + (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); if (gl->gl_aspace) - printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, - gl->gl_aspace->i_mapping->nrpages); + print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, + gl->gl_aspace->i_mapping->nrpages); else - printk(KERN_INFO " aspace = no\n"); - printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count)); + print_dbg(gi, " aspace = no\n"); + print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); if (gl->gl_req_gh) { - error = dump_holder("Request", gl->gl_req_gh); + error = dump_holder(gi, "Request", gl->gl_req_gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder("Holder", gh); + error = dump_holder(gi, "Holder", gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { - error = dump_holder("Waiter1", gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { - error = dump_holder("Waiter2", gh); + error = dump_holder(gi, "Waiter1", gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { - error = dump_holder("Waiter3", gh); + error = dump_holder(gi, "Waiter3", gh); if (error) goto out; } + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", + gl->gl_demote_state, (unsigned long long) + (jiffies - gl->gl_demote_time)*(1000000/HZ)); + } if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { if (!test_bit(GLF_LOCK, &gl->gl_flags) && - list_empty(&gl->gl_holders)) { - error = dump_inode(gl->gl_object); + list_empty(&gl->gl_holders)) { + error = dump_inode(gi, gl->gl_object); if (error) goto out; } else { error = -ENOBUFS; - printk(KERN_INFO " Inode: busy\n"); + print_dbg(gi, " Inode: busy\n"); } } @@ -2226,6 +1955,7 @@ out: static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) { struct gfs2_glock *gl; + struct hlist_node *h; unsigned int x; int error = 0; @@ -2233,13 +1963,11 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) read_lock(gl_lock_addr(x)); - list_for_each_entry(gl, &gl_hash_table[x].hb_list, gl_list) { - if (test_bit(GLF_PLUG, &gl->gl_flags)) - continue; + hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) { if (gl->gl_sbd != sdp) continue; - error = dump_glock(gl); + error = dump_glock(NULL, gl); if (error) break; } @@ -2254,17 +1982,250 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) return error; } +/** + * gfs2_scand - Look for cached glocks and inodes to toss from memory + * @sdp: Pointer to GFS2 superblock + * + * One of these daemons runs, finding candidates to add to sd_reclaim_list. + * See gfs2_glockd() + */ + +static int gfs2_scand(void *data) +{ + unsigned x; + unsigned delay; + + while (!kthread_should_stop()) { + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + examine_bucket(scan_glock, NULL, x); + if (freezing(current)) + refrigerator(); + delay = scand_secs; + if (delay < 1) + delay = 1; + schedule_timeout_interruptible(delay * HZ); + } + + return 0; +} + + + int __init gfs2_glock_init(void) { unsigned i; for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { - INIT_LIST_HEAD(&gl_hash_table[i].hb_list); + INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); } #ifdef GL_HASH_LOCK_SZ for(i = 0; i < GL_HASH_LOCK_SZ; i++) { rwlock_init(&gl_hash_locks[i]); } #endif + + scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand"); + if (IS_ERR(scand_process)) + return PTR_ERR(scand_process); + + glock_workqueue = create_workqueue("glock_workqueue"); + if (IS_ERR(glock_workqueue)) { + kthread_stop(scand_process); + return PTR_ERR(glock_workqueue); + } + + return 0; +} + +void gfs2_glock_exit(void) +{ + destroy_workqueue(glock_workqueue); + kthread_stop(scand_process); +} + +module_param(scand_secs, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); + +static int gfs2_glock_iter_next(struct glock_iter *gi) +{ + struct gfs2_glock *gl; + +restart: + read_lock(gl_lock_addr(gi->hash)); + gl = gi->gl; + if (gl) { + gi->gl = hlist_entry(gl->gl_list.next, + struct gfs2_glock, gl_list); + if (gi->gl) + gfs2_glock_hold(gi->gl); + } + read_unlock(gl_lock_addr(gi->hash)); + if (gl) + gfs2_glock_put(gl); + if (gl && gi->gl == NULL) + gi->hash++; + while(gi->gl == NULL) { + if (gi->hash >= GFS2_GL_HASH_SIZE) + return 1; + read_lock(gl_lock_addr(gi->hash)); + gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, + struct gfs2_glock, gl_list); + if (gi->gl) + gfs2_glock_hold(gi->gl); + read_unlock(gl_lock_addr(gi->hash)); + gi->hash++; + } + + if (gi->sdp != gi->gl->gl_sbd) + goto restart; + return 0; } +static void gfs2_glock_iter_free(struct glock_iter *gi) +{ + if (gi->gl) + gfs2_glock_put(gi->gl); + kfree(gi); +} + +static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) +{ + struct glock_iter *gi; + + gi = kmalloc(sizeof (*gi), GFP_KERNEL); + if (!gi) + return NULL; + + gi->sdp = sdp; + gi->hash = 0; + gi->seq = NULL; + gi->gl = NULL; + memset(gi->string, 0, sizeof(gi->string)); + + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + + return gi; +} + +static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) +{ + struct glock_iter *gi; + loff_t n = *pos; + + gi = gfs2_glock_iter_init(file->private); + if (!gi) + return NULL; + + while(n--) { + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + } + + return gi; +} + +static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, + loff_t *pos) +{ + struct glock_iter *gi = iter_ptr; + + (*pos)++; + + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + + return gi; +} + +static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) +{ + struct glock_iter *gi = iter_ptr; + if (gi) + gfs2_glock_iter_free(gi); +} + +static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) +{ + struct glock_iter *gi = iter_ptr; + + gi->seq = file; + dump_glock(gi, gi->gl); + + return 0; +} + +static const struct seq_operations gfs2_glock_seq_ops = { + .start = gfs2_glock_seq_start, + .next = gfs2_glock_seq_next, + .stop = gfs2_glock_seq_stop, + .show = gfs2_glock_seq_show, +}; + +static int gfs2_debugfs_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &gfs2_glock_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; + + return 0; +} + +static const struct file_operations gfs2_debug_fops = { + .owner = THIS_MODULE, + .open = gfs2_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) +{ + sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); + if (!sdp->debugfs_dir) + return -ENOMEM; + sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_debug_fops); + if (!sdp->debugfs_dentry_glocks) + return -ENOMEM; + + return 0; +} + +void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) +{ + if (sdp && sdp->debugfs_dir) { + if (sdp->debugfs_dentry_glocks) { + debugfs_remove(sdp->debugfs_dentry_glocks); + sdp->debugfs_dentry_glocks = NULL; + } + debugfs_remove(sdp->debugfs_dir); + sdp->debugfs_dir = NULL; + } +} + +int gfs2_register_debugfs(void) +{ + gfs2_root = debugfs_create_dir("gfs2", NULL); + return gfs2_root ? 0 : -ENOMEM; +} + +void gfs2_unregister_debugfs(void) +{ + debugfs_remove(gfs2_root); + gfs2_root = NULL; +}