/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "xfs.h"
#include <linux/stddef.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/hash.h>
#include <linux/kthread.h>
-#include "xfs_linux.h"
+#include <linux/migrate.h>
+#include <linux/backing-dev.h>
+#include <linux/freezer.h>
-STATIC kmem_zone_t *xfs_buf_zone;
-STATIC kmem_shaker_t xfs_buf_shake;
+static kmem_zone_t *xfs_buf_zone;
+static struct shrinker *xfs_buf_shake;
STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-STATIC struct workqueue_struct *xfslogd_workqueue;
+static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
#ifdef XFS_BUF_TRACE
return mask;
}
-STATIC inline void
+STATIC_INLINE void
set_page_region(
struct page *page,
size_t offset,
SetPageUptodate(page);
}
-STATIC inline int
+STATIC_INLINE int
test_page_region(
struct page *page,
size_t offset,
struct a_list *next;
} a_list_t;
-STATIC a_list_t *as_free_head;
-STATIC int as_list_len;
-STATIC DEFINE_SPINLOCK(as_lock);
+static a_list_t *as_free_head;
+static int as_list_len;
+static DEFINE_SPINLOCK(as_lock);
/*
* Try to batch vunmaps because they are costly.
{
a_list_t *aentry;
- aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
+ aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
if (likely(aentry)) {
spin_lock(&as_lock);
aentry->next = as_free_head;
ASSERT(list_empty(&bp->b_hash_list));
- if (bp->b_flags & _XBF_PAGE_CACHE) {
+ if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1))
free_address(bp->b_addr - bp->b_offset);
- for (i = 0; i < bp->b_page_count; i++)
- page_cache_release(bp->b_pages[i]);
- _xfs_buf_free_pages(bp);
- } else if (bp->b_flags & _XBF_KMEM_ALLOC) {
- /*
- * XXX(hch): bp->b_count_desired might be incorrect (see
- * xfs_buf_associate_memory for details), but fortunately
- * the Linux version of kmem_free ignores the len argument..
- */
- kmem_free(bp->b_addr, bp->b_count_desired);
+ for (i = 0; i < bp->b_page_count; i++) {
+ struct page *page = bp->b_pages[i];
+
+ if (bp->b_flags & _XBF_PAGE_CACHE)
+ ASSERT(!PagePrivate(page));
+ page_cache_release(page);
+ }
_xfs_buf_free_pages(bp);
}
XFS_STATS_INC(xb_page_retries);
xfsbufd_wakeup(0, gfp_mask);
- blk_congestion_wait(WRITE, HZ/50);
+ congestion_wait(WRITE, HZ/50);
goto retry;
}
nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
size -= nbytes;
+ ASSERT(!PagePrivate(page));
if (!PageUptodate(page)) {
page_count--;
if (blocksize >= PAGE_CACHE_SIZE) {
size_t len,
xfs_buftarg_t *target)
{
- size_t malloc_len = len;
+ unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+ int error, i;
xfs_buf_t *bp;
- void *data;
- int error;
bp = xfs_buf_allocate(0);
if (unlikely(bp == NULL))
goto fail;
_xfs_buf_initialize(bp, target, 0, len, 0);
- try_again:
- data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
- if (unlikely(data == NULL))
+ error = _xfs_buf_get_pages(bp, page_count, 0);
+ if (error)
goto fail_free_buf;
- /* check whether alignment matches.. */
- if ((__psunsigned_t)data !=
- ((__psunsigned_t)data & ~target->bt_smask)) {
- /* .. else double the size and try again */
- kmem_free(data, malloc_len);
- malloc_len <<= 1;
- goto try_again;
+ for (i = 0; i < page_count; i++) {
+ bp->b_pages[i] = alloc_page(GFP_KERNEL);
+ if (!bp->b_pages[i])
+ goto fail_free_mem;
}
+ bp->b_flags |= _XBF_PAGES;
- error = xfs_buf_associate_memory(bp, data, len);
- if (error)
+ error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+ if (unlikely(error)) {
+ printk(KERN_WARNING "%s: failed to map pages\n",
+ __FUNCTION__);
goto fail_free_mem;
- bp->b_flags |= _XBF_KMEM_ALLOC;
+ }
xfs_buf_unlock(bp);
- XB_TRACE(bp, "no_daddr", data);
+ XB_TRACE(bp, "no_daddr", len);
return bp;
+
fail_free_mem:
- kmem_free(data, malloc_len);
+ while (--i >= 0)
+ __free_page(bp->b_pages[i]);
+ _xfs_buf_free_pages(bp);
fail_free_buf:
- xfs_buf_free(bp);
+ xfs_buf_deallocate(bp);
fail:
return NULL;
}
STATIC void
xfs_buf_iodone_work(
- void *v)
+ struct work_struct *work)
{
- xfs_buf_t *bp = (xfs_buf_t *)v;
+ xfs_buf_t *bp =
+ container_of(work, xfs_buf_t, b_iodone_work);
if (bp->b_iodone)
(*(bp->b_iodone))(bp);
if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
if (schedule) {
- INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work, bp);
+ INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
queue_work(xfslogd_workqueue, &bp->b_iodone_work);
} else {
- xfs_buf_iodone_work(bp);
+ xfs_buf_iodone_work(&bp->b_iodone_work);
}
} else {
up(&bp->b_iodonesema);
return status;
}
-STATIC __inline__ int
+STATIC_INLINE int
_xfs_buf_iolocked(
xfs_buf_t *bp)
{
return 0;
}
-STATIC __inline__ void
+STATIC_INLINE void
_xfs_buf_ioend(
xfs_buf_t *bp,
int schedule)
do {
struct page *page = bvec->bv_page;
+ ASSERT(!PagePrivate(page));
if (unlikely(bp->b_error)) {
if (bp->b_flags & XBF_READ)
ClearPageUptodate(page);
- SetPageError(page);
} else if (blocksize >= PAGE_CACHE_SIZE) {
SetPageUptodate(page);
} else if (!PagePrivate(page) &&
total_nr_pages = bp->b_page_count;
map_i = 0;
- if (bp->b_flags & _XBF_RUN_QUEUES) {
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_READ) ? READ_SYNC : WRITE_SYNC;
- } else {
- rw = (bp->b_flags & XBF_READ) ? READ : WRITE;
- }
-
if (bp->b_flags & XBF_ORDERED) {
ASSERT(!(bp->b_flags & XBF_READ));
rw = WRITE_BARRIER;
+ } else if (bp->b_flags & _XBF_RUN_QUEUES) {
+ ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+ bp->b_flags &= ~_XBF_RUN_QUEUES;
+ rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+ } else {
+ rw = (bp->b_flags & XBF_WRITE) ? WRITE :
+ (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
}
/* Special code path for reading a sub page size buffer in --
btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
- sizeof(xfs_bufhash_t), KM_SLEEP);
+ sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE);
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
spin_lock_init(&btp->bt_hash[i].bh_lock);
INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
/*
* buftarg list for delwrite queue processing
*/
-STATIC LIST_HEAD(xfs_buftarg_list);
-STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
+static LIST_HEAD(xfs_buftarg_list);
+static DEFINE_SPINLOCK(xfs_buftarg_lock);
STATIC void
xfs_register_buftarg(
int external)
{
xfs_flush_buftarg(btp, 1);
+ xfs_blkdev_issue_flush(btp);
if (external)
xfs_blkdev_put(btp->bt_bdev);
xfs_free_bufhash(btp);
struct backing_dev_info *bdi;
struct inode *inode;
struct address_space *mapping;
- static struct address_space_operations mapping_aops = {
+ static const struct address_space_operations mapping_aops = {
.sync_page = block_sync_page,
.migratepage = fail_migrate_page,
};
return 0;
}
+/*
+ * Move as many buffers as specified to the supplied list
+ * idicating if we skipped any buffers to prevent deadlocks.
+ */
+STATIC int
+xfs_buf_delwri_split(
+ xfs_buftarg_t *target,
+ struct list_head *list,
+ unsigned long age)
+{
+ xfs_buf_t *bp, *n;
+ struct list_head *dwq = &target->bt_delwrite_queue;
+ spinlock_t *dwlk = &target->bt_delwrite_lock;
+ int skipped = 0;
+ int force;
+
+ force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+ INIT_LIST_HEAD(list);
+ spin_lock(dwlk);
+ list_for_each_entry_safe(bp, n, dwq, b_list) {
+ XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
+ ASSERT(bp->b_flags & XBF_DELWRI);
+
+ if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
+ if (!force &&
+ time_before(jiffies, bp->b_queuetime + age)) {
+ xfs_buf_unlock(bp);
+ break;
+ }
+
+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
+ _XBF_RUN_QUEUES);
+ bp->b_flags |= XBF_WRITE;
+ list_move_tail(&bp->b_list, list);
+ } else
+ skipped++;
+ }
+ spin_unlock(dwlk);
+
+ return skipped;
+
+}
+
STATIC int
xfsbufd(
- void *data)
+ void *data)
{
- struct list_head tmp;
- unsigned long age;
- xfs_buftarg_t *target = (xfs_buftarg_t *)data;
- xfs_buf_t *bp, *n;
- struct list_head *dwq = &target->bt_delwrite_queue;
- spinlock_t *dwlk = &target->bt_delwrite_lock;
+ struct list_head tmp;
+ xfs_buftarg_t *target = (xfs_buftarg_t *)data;
+ int count;
+ xfs_buf_t *bp;
current->flags |= PF_MEMALLOC;
- INIT_LIST_HEAD(&tmp);
do {
if (unlikely(freezing(current))) {
set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
schedule_timeout_interruptible(
xfs_buf_timer_centisecs * msecs_to_jiffies(10));
- age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
- spin_lock(dwlk);
- list_for_each_entry_safe(bp, n, dwq, b_list) {
- XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
- ASSERT(bp->b_flags & XBF_DELWRI);
-
- if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
- if (!test_bit(XBT_FORCE_FLUSH,
- &target->bt_flags) &&
- time_before(jiffies,
- bp->b_queuetime + age)) {
- xfs_buf_unlock(bp);
- break;
- }
-
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
- bp->b_flags |= XBF_WRITE;
- list_move(&bp->b_list, &tmp);
- }
- }
- spin_unlock(dwlk);
+ xfs_buf_delwri_split(target, &tmp,
+ xfs_buf_age_centisecs * msecs_to_jiffies(10));
+ count = 0;
while (!list_empty(&tmp)) {
bp = list_entry(tmp.next, xfs_buf_t, b_list);
ASSERT(target == bp->b_target);
list_del_init(&bp->b_list);
xfs_buf_iostrategy(bp);
-
- blk_run_address_space(target->bt_mapping);
+ count++;
}
if (as_list_len > 0)
purge_addresses();
+ if (count)
+ blk_run_address_space(target->bt_mapping);
- clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
} while (!kthread_should_stop());
return 0;
*/
int
xfs_flush_buftarg(
- xfs_buftarg_t *target,
- int wait)
+ xfs_buftarg_t *target,
+ int wait)
{
- struct list_head tmp;
- xfs_buf_t *bp, *n;
- int pincount = 0;
- struct list_head *dwq = &target->bt_delwrite_queue;
- spinlock_t *dwlk = &target->bt_delwrite_lock;
+ struct list_head tmp;
+ xfs_buf_t *bp, *n;
+ int pincount = 0;
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
- INIT_LIST_HEAD(&tmp);
- spin_lock(dwlk);
- list_for_each_entry_safe(bp, n, dwq, b_list) {
- ASSERT(bp->b_target == target);
- ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
- XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
- if (xfs_buf_ispin(bp)) {
- pincount++;
- continue;
- }
-
- list_move(&bp->b_list, &tmp);
- }
- spin_unlock(dwlk);
+ set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+ pincount = xfs_buf_delwri_split(target, &tmp, 0);
/*
* Dropped the delayed write list lock, now walk the temporary list
*/
list_for_each_entry_safe(bp, n, &tmp, b_list) {
- xfs_buf_lock(bp);
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
- bp->b_flags |= XBF_WRITE;
+ ASSERT(target == bp->b_target);
if (wait)
bp->b_flags &= ~XBF_ASYNC;
else
xfs_buf_iostrategy(bp);
}
+ if (wait)
+ blk_run_address_space(target->bt_mapping);
+
/*
* Remaining list items must be flushed before returning
*/
xfs_buf_relse(bp);
}
- if (wait)
- blk_run_address_space(target->bt_mapping);
-
return pincount;
}
int __init
xfs_buf_init(void)
{
- int error = -ENOMEM;
-
#ifdef XFS_BUF_TRACE
xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
#endif
- xfs_buf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf");
+ xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
+ KM_ZONE_HWALIGN, NULL);
if (!xfs_buf_zone)
goto out_free_trace_buf;
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
- xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup);
+ xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup);
if (!xfs_buf_shake)
goto out_destroy_xfsdatad_workqueue;
#ifdef XFS_BUF_TRACE
ktrace_free(xfs_buf_trace_buf);
#endif
- return error;
+ return -ENOMEM;
}
void
xfs_buf_terminate(void)
{
- kmem_shake_deregister(xfs_buf_shake);
+ remove_shrinker(xfs_buf_shake);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);
ktrace_free(xfs_buf_trace_buf);
#endif
}
+
+#ifdef CONFIG_KDB_MODULES
+struct list_head *
+xfs_get_buftarg_list(void)
+{
+ return &xfs_buftarg_list;
+}
+#endif