#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
+#include <linux/writeback.h>
#include "super.h"
#include "decode.h"
static int caps_use_count; /* in use */
static int caps_reserve_count; /* unused, reserved */
static int caps_avail_count; /* unused, unreserved */
+static int caps_min_count; /* keep at least this many (unreserved) */
void __init ceph_caps_init(void)
{
caps_avail_count = 0;
caps_use_count = 0;
caps_reserve_count = 0;
+ caps_min_count = 0;
+ spin_unlock(&caps_list_lock);
+}
+
+void ceph_adjust_min_caps(int delta)
+{
+ spin_lock(&caps_list_lock);
+ caps_min_count += delta;
+ BUG_ON(caps_min_count < 0);
spin_unlock(&caps_list_lock);
}
return cap;
}
-static void put_cap(struct ceph_cap *cap,
- struct ceph_cap_reservation *ctx)
+void ceph_put_cap(struct ceph_cap *cap)
{
spin_lock(&caps_list_lock);
- dout("put_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
- ctx, ctx ? ctx->count : 0, caps_total_count, caps_use_count,
+ dout("put_cap %p %d = %d used + %d resv + %d avail\n",
+ cap, caps_total_count, caps_use_count,
caps_reserve_count, caps_avail_count);
caps_use_count--;
/*
- * Keep some preallocated caps around, at least enough to do a
- * readdir (which needs to preallocate lots of them), to avoid
- * lots of free/alloc churn.
+ * Keep some preallocated caps around (ceph_min_count), to
+ * avoid lots of free/alloc churn.
*/
- if (caps_avail_count >= caps_reserve_count +
- ceph_client(cap->ci->vfs_inode.i_sb)->mount_args->max_readdir) {
+ if (caps_avail_count >= caps_reserve_count + caps_min_count) {
caps_total_count--;
kmem_cache_free(ceph_cap_cachep, cap);
} else {
- if (ctx) {
- ctx->count++;
- caps_reserve_count++;
- } else {
- caps_avail_count++;
- }
+ caps_avail_count++;
list_add(&cap->caps_item, &caps_list);
}
}
void ceph_reservation_status(struct ceph_client *client,
- int *total, int *avail, int *used, int *reserved)
+ int *total, int *avail, int *used, int *reserved,
+ int *min)
{
if (total)
*total = caps_total_count;
*used = caps_use_count;
if (reserved)
*reserved = caps_reserve_count;
+ if (min)
+ *min = caps_min_count;
}
/*
*/
int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
{
- int have = ci->i_snap_caps;
+ int have = ci->i_snap_caps | ci->i_cap_exporting_issued;
struct ceph_cap *cap;
struct rb_node *p;
struct ceph_mds_session *s = cap->session;
spin_lock(&s->s_cap_lock);
- if (!s->s_iterating_caps) {
+ if (s->s_cap_iterator == NULL) {
dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap,
s->s_mds);
list_move_tail(&cap->session_caps, &s->s_caps);
}
/*
- * caller should hold i_lock, and session s_mutex.
- * returns true if this is the last cap. if so, caller should iput.
+ * Remove a cap. Take steps to deal with a racing iterate_session_caps.
+ *
+ * caller should hold i_lock.
+ * caller will not hold session s_mutex if called from destroy_inode.
*/
-void __ceph_remove_cap(struct ceph_cap *cap,
- struct ceph_cap_reservation *ctx)
+void __ceph_remove_cap(struct ceph_cap *cap)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ int removed = 0;
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
/* remove from session list */
spin_lock(&session->s_cap_lock);
- list_del_init(&cap->session_caps);
- session->s_nr_caps--;
+ if (session->s_cap_iterator == cap) {
+ /* not yet, we are iterating over this very cap */
+ dout("__ceph_remove_cap delaying %p removal from session %p\n",
+ cap, cap->session);
+ } else {
+ list_del_init(&cap->session_caps);
+ session->s_nr_caps--;
+ cap->session = NULL;
+ removed = 1;
+ }
+ /* protect backpointer with s_cap_lock: see iterate_session_caps */
+ cap->ci = NULL;
spin_unlock(&session->s_cap_lock);
/* remove from inode list */
rb_erase(&cap->ci_node, &ci->i_caps);
- cap->session = NULL;
if (ci->i_auth_cap == cap)
ci->i_auth_cap = NULL;
- put_cap(cap, ctx);
+ if (removed)
+ ceph_put_cap(cap);
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
struct ceph_snap_realm *realm = ci->i_snap_realm;
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), 0, 0, NULL);
- if (IS_ERR(msg))
- return PTR_ERR(msg);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS);
+ if (!msg)
+ return -ENOMEM;
msg->hdr.tid = cpu_to_le64(flush_tid);
}
/*
- * Queue cap releases when an inode is dropped from our
- * cache.
+ * Queue cap releases when an inode is dropped from our cache. Since
+ * inode is about to be destroyed, there is no need for i_lock.
*/
void ceph_queue_caps_release(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct rb_node *p;
- spin_lock(&inode->i_lock);
p = rb_first(&ci->i_caps);
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
}
spin_unlock(&session->s_cap_lock);
p = rb_next(p);
- __ceph_remove_cap(cap, NULL);
-
+ __ceph_remove_cap(cap);
}
- spin_unlock(&inode->i_lock);
}
/*
struct ceph_inode_info *ci = cap->ci;
struct inode *inode = &ci->vfs_inode;
u64 cap_id = cap->cap_id;
- int held = cap->issued | cap->implemented;
- int revoking = cap->implemented & ~cap->issued;
- int dropping = cap->issued & ~retain;
- int keep;
+ int held, revoking, dropping, keep;
u64 seq, issue_seq, mseq, time_warp_seq, follows;
u64 size, max_size;
struct timespec mtime, atime;
int i;
int ret;
+ held = cap->issued | cap->implemented;
+ revoking = cap->implemented & ~cap->issued;
+ retain &= ~revoking;
+ dropping = cap->issued & ~retain;
+
dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n",
inode, cap, cap->session,
ceph_cap_string(held), ceph_cap_string(held & retain),
spin_unlock(&inode->i_lock);
- if (dropping & CEPH_CAP_FILE_CACHE) {
- /* invalidate what we can */
- dout("invalidating pages on %p\n", inode);
- invalidate_mapping_pages(&inode->i_data, 0, -1);
- }
-
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
size, max_size, &mtime, &atime, time_warp_seq,
if (capsnap->dirty_pages || capsnap->writing)
continue;
+ /*
+ * if cap writeback already occurred, we should have dropped
+ * the capsnap in ceph_put_wrbuffer_cap_refs.
+ */
+ BUG_ON(capsnap->dirty == 0);
+
/* pick mds, take s_mutex */
mds = __ceph_get_cap_mds(ci, &mseq);
if (session && session->s_mds != mds) {
*/
void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
{
- struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct inode *inode = &ci->vfs_inode;
int was = ci->i_dirty_caps;
int dirty = 0;
static int __mark_caps_flushing(struct inode *inode,
struct ceph_mds_session *session)
{
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int flushing;
}
/*
+ * try to invalidate mapping pages without blocking.
+ */
+static int mapping_is_empty(struct address_space *mapping)
+{
+ struct page *page = find_get_page(mapping, 0);
+
+ if (!page)
+ return 1;
+
+ put_page(page);
+ return 0;
+}
+
+static int try_nonblocking_invalidate(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u32 invalidating_gen = ci->i_rdcache_gen;
+
+ spin_unlock(&inode->i_lock);
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
+ spin_lock(&inode->i_lock);
+
+ if (mapping_is_empty(&inode->i_data) &&
+ invalidating_gen == ci->i_rdcache_gen) {
+ /* success. */
+ dout("try_nonblocking_invalidate %p success\n", inode);
+ ci->i_rdcache_gen = 0;
+ ci->i_rdcache_revoking = 0;
+ return 0;
+ }
+ dout("try_nonblocking_invalidate %p failed\n", inode);
+ return -1;
+}
+
+/*
* Swiss army knife function to examine currently used and wanted
* versus held caps. Release, flush, ack revoked caps to mds as
* appropriate.
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session)
+ __releases(session->s_mutex)
{
struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
struct ceph_mds_client *mdsc = &client->mdsc;
struct ceph_cap *cap;
int file_wanted, used;
int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
- int drop_session_lock = session ? 0 : 1;
int issued, implemented, want, retain, revoking, flushing = 0;
int mds = -1; /* keep track of how far we've gone through i_caps list
to avoid an infinite loop on retry */
ci->i_rdcache_gen && /* may have cached pages */
(file_wanted == 0 || /* no open files */
(revoking & CEPH_CAP_FILE_CACHE)) && /* or revoking cache */
- !ci->i_truncate_pending &&
!tried_invalidate) {
- u32 invalidating_gen = ci->i_rdcache_gen;
- int ret;
-
dout("check_caps trying to invalidate on %p\n", inode);
- spin_unlock(&inode->i_lock);
- ret = invalidate_mapping_pages(&inode->i_data, 0, -1);
- spin_lock(&inode->i_lock);
- if (ret == 0 && invalidating_gen == ci->i_rdcache_gen) {
- /* success. */
- ci->i_rdcache_gen = 0;
- ci->i_rdcache_revoking = 0;
- } else if (revoking & CEPH_CAP_FILE_CACHE) {
- dout("check_caps queuing invalidate\n");
- queue_invalidate = 1;
- ci->i_rdcache_revoking = ci->i_rdcache_gen;
- } else {
- dout("check_caps failed to invalidate pages\n");
- /* we failed to invalidate pages. check these
- caps again later. */
- force_requeue = 1;
- __cap_set_timeouts(mdsc, ci);
+ if (try_nonblocking_invalidate(inode) < 0) {
+ if (revoking & CEPH_CAP_FILE_CACHE) {
+ dout("check_caps queuing invalidate\n");
+ queue_invalidate = 1;
+ ci->i_rdcache_revoking = ci->i_rdcache_gen;
+ } else {
+ dout("check_caps failed to invalidate pages\n");
+ /* we failed to invalidate pages. check these
+ caps again later. */
+ force_requeue = 1;
+ __cap_set_timeouts(mdsc, ci);
+ }
}
tried_invalidate = 1;
goto retry_locked;
}
ack:
+ if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
+ dout(" skipping %p I_NOFLUSH set\n", inode);
+ continue;
+ }
+
if (session && session != cap->session) {
dout("oops, wrong session %p mutex\n", session);
mutex_unlock(&session->s_mutex);
spin_unlock(&inode->i_lock);
if (queue_invalidate)
- if (ceph_queue_page_invalidation(inode))
- igrab(inode);
+ ceph_queue_invalidate(inode);
- if (session && drop_session_lock)
+ if (session)
mutex_unlock(&session->s_mutex);
if (took_snap_rwsem)
up_read(&mdsc->snap_rwsem);
static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
unsigned *flush_tid)
{
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_inode_info *ci = ceph_inode(inode);
int unlock_session = session ? 0 : 1;
int flushing = 0;
retry:
spin_lock(&inode->i_lock);
+ if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
+ dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
+ goto out;
+ }
if (ci->i_dirty_caps && ci->i_auth_cap) {
struct ceph_cap *cap = ci->i_auth_cap;
int used = __ceph_caps_used(ci);
static int caps_are_flushed(struct inode *inode, unsigned tid)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- int dirty, i, ret = 1;
+ int i, ret = 1;
spin_lock(&inode->i_lock);
- dirty = __ceph_caps_dirty(ci);
for (i = 0; i < CEPH_CAP_BITS; i++)
if ((ci->i_flushing_caps & (1 << i)) &&
ci->i_cap_flush_tid[i] <= tid) {
* get by with fewer MDS messages if we wait for data writeback to
* complete first.
*/
-int ceph_write_inode(struct inode *inode, int wait)
+int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct ceph_inode_info *ci = ceph_inode(inode);
unsigned flush_tid;
int err = 0;
int dirty;
+ int wait = wbc->sync_mode == WB_SYNC_ALL;
dout("write_inode %p wait=%d\n", inode, wait);
if (wait) {
err = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
} else {
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc =
+ &ceph_sb_to_client(inode->i_sb)->mdsc;
spin_lock(&inode->i_lock);
if (__ceph_caps_dirty(ci))
} else {
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
- spin_unlock(&inode->i_lock);
}
+ spin_unlock(&inode->i_lock);
}
}
struct inode *inode = &ci->vfs_inode;
int ret = 0;
int have, implemented;
+ int file_wanted;
dout("get_cap_refs %p need %s want %s\n", inode,
ceph_cap_string(need), ceph_cap_string(want));
spin_lock(&inode->i_lock);
- /* make sure we _have_ some caps! */
- if (!__ceph_is_any_caps(ci)) {
- dout("get_cap_refs %p no real caps\n", inode);
+ /* make sure file is actually open */
+ file_wanted = __ceph_caps_file_wanted(ci);
+ if ((file_wanted & need) == 0) {
+ dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
+ ceph_cap_string(need), ceph_cap_string(file_wanted));
*err = -EBADF;
ret = 1;
goto out;
}
spin_unlock(&inode->i_lock);
- dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
- last ? "last" : "");
+ dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
+ last ? " last" : "", put ? " put" : "");
if (last && !flushsnaps)
ceph_check_caps(ci, 0, NULL);
{
struct inode *inode = &ci->vfs_inode;
int last = 0;
- int last_snap = 0;
+ int complete_capsnap = 0;
+ int drop_capsnap = 0;
int found = 0;
struct ceph_cap_snap *capsnap = NULL;
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
if (capsnap->context == snapc) {
found = 1;
- capsnap->dirty_pages -= nr;
- last_snap = !capsnap->dirty_pages;
break;
}
}
BUG_ON(!found);
+ capsnap->dirty_pages -= nr;
+ if (capsnap->dirty_pages == 0) {
+ complete_capsnap = 1;
+ if (capsnap->dirty == 0)
+ /* cap writeback completed before we created
+ * the cap_snap; no FLUSHSNAP is needed */
+ drop_capsnap = 1;
+ }
dout("put_wrbuffer_cap_refs on %p cap_snap %p "
- " snap %lld %d/%d -> %d/%d %s%s\n",
+ " snap %lld %d/%d -> %d/%d %s%s%s\n",
inode, capsnap, capsnap->context->seq,
ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
ci->i_wrbuffer_ref, capsnap->dirty_pages,
last ? " (wrbuffer last)" : "",
- last_snap ? " (capsnap last)" : "");
+ complete_capsnap ? " (complete capsnap)" : "",
+ drop_capsnap ? " (drop capsnap)" : "");
+ if (drop_capsnap) {
+ ceph_put_snap_context(capsnap->context);
+ list_del(&capsnap->ci_item);
+ list_del(&capsnap->flushing_item);
+ ceph_put_cap_snap(capsnap);
+ }
}
spin_unlock(&inode->i_lock);
if (last) {
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
iput(inode);
- } else if (last_snap) {
+ } else if (complete_capsnap) {
ceph_flush_snaps(ci);
wake_up(&ci->i_cap_wq);
}
+ if (drop_capsnap)
+ iput(inode);
}
/*
* Handle a cap GRANT message from the MDS. (Note that a GRANT may
* actually be a revocation if it specifies a smaller cap set.)
*
- * caller holds s_mutex.
+ * caller holds s_mutex and i_lock, we drop both.
+ *
* return value:
* 0 - ok
* 1 - check_caps on auth cap only (writeback)
* 2 - check_caps (ack revoke)
*/
-static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
- struct ceph_mds_session *session,
- struct ceph_cap *cap,
- struct ceph_buffer *xattr_buf)
+static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+ struct ceph_mds_session *session,
+ struct ceph_cap *cap,
+ struct ceph_buffer *xattr_buf)
__releases(inode->i_lock)
-
+ __releases(session->s_mutex)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
u64 size = le64_to_cpu(grant->size);
u64 max_size = le64_to_cpu(grant->max_size);
struct timespec mtime, atime, ctime;
- int reply = 0;
+ int check_caps = 0;
int wake = 0;
int writeback = 0;
int revoked_rdcache = 0;
- int invalidate_async = 0;
- int tried_invalidate = 0;
- int ret;
+ int queue_invalidate = 0;
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, mds, seq, ceph_cap_string(newcaps));
* try to invalidate (once). (If there are dirty buffers, we
* will invalidate _after_ writeback.)
*/
-restart:
if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
- !ci->i_wrbuffer_ref && !tried_invalidate) {
- dout("CACHE invalidation\n");
- spin_unlock(&inode->i_lock);
- tried_invalidate = 1;
-
- ret = invalidate_mapping_pages(&inode->i_data, 0, -1);
- spin_lock(&inode->i_lock);
- if (ret < 0) {
+ !ci->i_wrbuffer_ref) {
+ if (try_nonblocking_invalidate(inode) == 0) {
+ revoked_rdcache = 1;
+ } else {
/* there were locked pages.. invalidate later
in a separate thread. */
if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
- invalidate_async = 1;
+ queue_invalidate = 1;
ci->i_rdcache_revoking = ci->i_rdcache_gen;
}
- } else {
- /* we successfully invalidated those pages */
- revoked_rdcache = 1;
- ci->i_rdcache_gen = 0;
- ci->i_rdcache_revoking = 0;
}
- goto restart;
}
/* side effects now are allowed */
if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
writeback = 1; /* will delay ack */
else if (dirty & ~newcaps)
- reply = 1; /* initiate writeback in check_caps */
+ check_caps = 1; /* initiate writeback in check_caps */
else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
revoked_rdcache)
- reply = 2; /* send revoke ack in check_caps */
+ check_caps = 2; /* send revoke ack in check_caps */
cap->issued = newcaps;
+ cap->implemented |= newcaps;
} else if (cap->issued == newcaps) {
dout("caps unchanged: %s -> %s\n",
ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
* pending revocation */
wake = 1;
}
+ BUG_ON(cap->issued & ~cap->implemented);
spin_unlock(&inode->i_lock);
- if (writeback) {
+ if (writeback)
/*
* queue inode for writeback: we can't actually call
* filemap_write_and_wait, etc. from message handler
* context.
*/
- dout("queueing %p for writeback\n", inode);
- if (ceph_queue_writeback(inode))
- igrab(inode);
- }
- if (invalidate_async) {
- dout("queueing %p for page invalidation\n", inode);
- if (ceph_queue_page_invalidation(inode))
- igrab(inode);
- }
+ ceph_queue_writeback(inode);
+ if (queue_invalidate)
+ ceph_queue_invalidate(inode);
if (wake)
wake_up(&ci->i_cap_wq);
- return reply;
+
+ if (check_caps == 1)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+ session);
+ else if (check_caps == 2)
+ ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+ else
+ mutex_unlock(&session->s_mutex);
}
/*
__releases(inode->i_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
unsigned seq = le32_to_cpu(m->seq);
int dirty = le32_to_cpu(m->dirty);
int cleaned = 0;
break;
}
WARN_ON(capsnap->dirty_pages || capsnap->writing);
- dout(" removing cap_snap %p follows %lld\n",
- capsnap, follows);
+ dout(" removing %p cap_snap %p follows %lld\n",
+ inode, capsnap, follows);
ceph_put_snap_context(capsnap->context);
list_del(&capsnap->ci_item);
list_del(&capsnap->flushing_item);
spin_unlock(&inode->i_lock);
if (queue_trunc)
- if (queue_work(ceph_client(inode->i_sb)->trunc_wq,
- &ci->i_vmtruncate_work))
- igrab(inode);
+ ceph_queue_vmtruncate(inode);
}
/*
ci->i_cap_exporting_mseq = mseq;
ci->i_cap_exporting_issued = cap->issued;
}
- __ceph_remove_cap(cap, NULL);
- } else {
- WARN_ON(!cap);
+ __ceph_remove_cap(cap);
}
+ /* else, we already released it */
spin_unlock(&inode->i_lock);
}
struct inode *inode;
struct ceph_cap *cap;
struct ceph_mds_caps *h;
- int mds = le64_to_cpu(msg->hdr.src.name.num);
+ int mds = session->s_mds;
int op;
u32 seq;
struct ceph_vino vino;
u64 cap_id;
u64 size, max_size;
u64 tid;
- int check_caps = 0;
- int r;
+ void *snaptrace;
dout("handle_caps from mds%d\n", mds);
if (msg->front.iov_len < sizeof(*h))
goto bad;
h = msg->front.iov_base;
+ snaptrace = h + 1;
op = le32_to_cpu(h->op);
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
case CEPH_CAP_OP_IMPORT:
handle_cap_import(mdsc, inode, h, session,
- msg->middle,
- le32_to_cpu(h->snap_trace_len));
- check_caps = 1; /* we may have sent a RELEASE to the old auth */
- goto done;
+ snaptrace, le32_to_cpu(h->snap_trace_len));
+ ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
+ session);
+ goto done_unlocked;
}
/* the rest require a cap */
switch (op) {
case CEPH_CAP_OP_REVOKE:
case CEPH_CAP_OP_GRANT:
- r = handle_cap_grant(inode, h, session, cap, msg->middle);
- if (r == 1)
- ceph_check_caps(ceph_inode(inode),
- CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
- session);
- else if (r == 2)
- ceph_check_caps(ceph_inode(inode),
- CHECK_CAPS_NODELAY,
- session);
- break;
+ handle_cap_grant(inode, h, session, cap, msg->middle);
+ goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
handle_cap_flush_ack(inode, tid, h, session, cap);
done:
mutex_unlock(&session->s_mutex);
-
- if (check_caps)
- ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
+done_unlocked:
if (inode)
iput(inode);
return;
*/
void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
{
- struct ceph_inode_info *ci;
- struct inode *inode;
+ struct ceph_inode_info *ci, *nci = NULL;
+ struct inode *inode, *ninode = NULL;
+ struct list_head *p, *n;
dout("flush_dirty_caps\n");
spin_lock(&mdsc->cap_dirty_lock);
- while (!list_empty(&mdsc->cap_dirty)) {
- ci = list_first_entry(&mdsc->cap_dirty,
- struct ceph_inode_info,
- i_dirty_item);
- inode = igrab(&ci->vfs_inode);
+ list_for_each_safe(p, n, &mdsc->cap_dirty) {
+ if (nci) {
+ ci = nci;
+ inode = ninode;
+ ci->i_ceph_flags &= ~CEPH_I_NOFLUSH;
+ dout("flush_dirty_caps inode %p (was next inode)\n",
+ inode);
+ } else {
+ ci = list_entry(p, struct ceph_inode_info,
+ i_dirty_item);
+ inode = igrab(&ci->vfs_inode);
+ BUG_ON(!inode);
+ dout("flush_dirty_caps inode %p\n", inode);
+ }
+ if (n != &mdsc->cap_dirty) {
+ nci = list_entry(n, struct ceph_inode_info,
+ i_dirty_item);
+ ninode = igrab(&nci->vfs_inode);
+ BUG_ON(!ninode);
+ nci->i_ceph_flags |= CEPH_I_NOFLUSH;
+ dout("flush_dirty_caps next inode %p, noflush\n",
+ ninode);
+ } else {
+ nci = NULL;
+ ninode = NULL;
+ }
spin_unlock(&mdsc->cap_dirty_lock);
if (inode) {
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
int ret = 0;
-
- dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
- mds, ceph_cap_string(drop), ceph_cap_string(unless));
+ int used = 0;
spin_lock(&inode->i_lock);
+ used = __ceph_caps_used(ci);
+
+ dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
+ mds, ceph_cap_string(used), ceph_cap_string(drop),
+ ceph_cap_string(unless));
+
+ /* only drop unused caps */
+ drop &= ~used;
+
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
if (force ||