#include <linux/backing-dev.h>
#include <linux/freezer.h>
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
#ifdef XFS_BUF_TRACE
void
bp, id,
(void *)(unsigned long)bp->b_flags,
(void *)(unsigned long)bp->b_hold.counter,
- (void *)(unsigned long)bp->b_sema.count.counter,
+ (void *)(unsigned long)bp->b_sema.count,
(void *)current,
data, ra,
(void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff),
{
a_list_t *aentry;
+#ifdef CONFIG_XEN
+ /*
+ * Xen needs to be able to make sure it can get an exclusive
+ * RO mapping of pages it wants to turn into a pagetable. If
+ * a newly allocated page is also still being vmap()ed by xfs,
+ * it will cause pagetable construction to fail. This is a
+ * quick workaround to always eagerly unmap pages so that Xen
+ * is happy.
+ */
+ vunmap(addr);
+ return;
+#endif
+
aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
if (likely(aentry)) {
spin_lock(&as_lock);
memset(bp, 0, sizeof(xfs_buf_t));
atomic_set(&bp->b_hold, 1);
- init_MUTEX_LOCKED(&bp->b_iodonesema);
+ init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_list);
INIT_LIST_HEAD(&bp->b_hash_list);
init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
xfs_buf_t *bp)
{
if (bp->b_pages != bp->b_page_array) {
- kmem_free(bp->b_pages,
- bp->b_page_count * sizeof(struct page *));
+ kmem_free(bp->b_pages);
}
}
printk(KERN_ERR
"XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n",
- __FUNCTION__, gfp_mask);
+ __func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
xfsbufd_wakeup(0, gfp_mask);
- congestion_wait(WRITE, HZ/50);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
page_count--;
if (blocksize >= PAGE_CACHE_SIZE) {
if (flags & XBF_READ)
- bp->b_locked = 1;
+ bp->b_flags |= _XBF_PAGE_LOCKED;
} else if (!PagePrivate(page)) {
if (test_page_region(page, offset, nbytes))
page_count++;
offset = 0;
}
- if (!bp->b_locked) {
+ if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
for (i = 0; i < bp->b_page_count; i++)
unlock_page(bp->b_pages[i]);
}
error = _xfs_buf_map_pages(bp, flags);
if (unlikely(error)) {
printk(KERN_WARNING "%s: failed to map pages\n",
- __FUNCTION__);
+ __func__);
goto no_buffer;
}
}
return NULL;
}
+STATIC int
+_xfs_buf_read(
+ xfs_buf_t *bp,
+ xfs_buf_flags_t flags)
+{
+ int status;
+
+ XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
+
+ ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
+ ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+
+ bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+ bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
+ XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+
+ status = xfs_buf_iorequest(bp);
+ if (!status && !(flags & XBF_ASYNC))
+ status = xfs_buf_iowait(bp);
+ return status;
+}
+
xfs_buf_t *
xfs_buf_read_flags(
xfs_buftarg_t *target,
if (!XFS_BUF_ISDONE(bp)) {
XB_TRACE(bp, "read", (unsigned long)flags);
XFS_STATS_INC(xb_get_read);
- xfs_buf_iostart(bp, flags);
+ _xfs_buf_read(bp, flags);
} else if (flags & XBF_ASYNC) {
XB_TRACE(bp, "read_async", (unsigned long)flags);
/*
mem_to_page(
void *addr)
{
- if (((unsigned long)addr < VMALLOC_START) ||
- ((unsigned long)addr >= VMALLOC_END)) {
+ if ((!is_vmalloc_addr(addr))) {
return virt_to_page(addr);
} else {
return vmalloc_to_page(addr);
{
int rval;
int i = 0;
- size_t ptr;
- size_t end, end_cur;
- off_t offset;
+ unsigned long pageaddr;
+ unsigned long offset;
+ size_t buflen;
int page_count;
- page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
- offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
- if (offset && (len > PAGE_CACHE_SIZE))
- page_count++;
+ pageaddr = (unsigned long)mem & PAGE_CACHE_MASK;
+ offset = (unsigned long)mem - pageaddr;
+ buflen = PAGE_CACHE_ALIGN(len + offset);
+ page_count = buflen >> PAGE_CACHE_SHIFT;
/* Free any previous set of page pointers */
if (bp->b_pages)
bp->b_pages = NULL;
bp->b_addr = mem;
- rval = _xfs_buf_get_pages(bp, page_count, 0);
+ rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
if (rval)
return rval;
bp->b_offset = offset;
- ptr = (size_t) mem & PAGE_CACHE_MASK;
- end = PAGE_CACHE_ALIGN((size_t) mem + len);
- end_cur = end;
- /* set up first page */
- bp->b_pages[0] = mem_to_page(mem);
-
- ptr += PAGE_CACHE_SIZE;
- bp->b_page_count = ++i;
- while (ptr < end) {
- bp->b_pages[i] = mem_to_page((void *)ptr);
- bp->b_page_count = ++i;
- ptr += PAGE_CACHE_SIZE;
+
+ for (i = 0; i < bp->b_page_count; i++) {
+ bp->b_pages[i] = mem_to_page((void *)pageaddr);
+ pageaddr += PAGE_CACHE_SIZE;
}
- bp->b_locked = 0;
- bp->b_count_desired = bp->b_buffer_length = len;
+ bp->b_count_desired = len;
+ bp->b_buffer_length = buflen;
bp->b_flags |= XBF_MAPPED;
+ bp->b_flags &= ~_XBF_PAGE_LOCKED;
return 0;
}
error = _xfs_buf_map_pages(bp, XBF_MAPPED);
if (unlikely(error)) {
printk(KERN_WARNING "%s: failed to map pages\n",
- __FUNCTION__);
+ __func__);
goto fail_free_mem;
}
return;
}
+ ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
spin_unlock(&hash->bh_lock);
xfs_buf_free(bp);
}
- } else {
- /*
- * Catch reference count leaks
- */
- ASSERT(atomic_read(&bp->b_hold) >= 0);
}
}
xfs_buf_lock_value(
xfs_buf_t *bp)
{
- return atomic_read(&bp->b_sema.count);
+ return bp->b_sema.count;
}
#endif
xfs_buf_t *bp =
container_of(work, xfs_buf_t, b_iodone_work);
- if (bp->b_iodone)
+ /*
+ * We can get an EOPNOTSUPP to ordered writes. Here we clear the
+ * ordered flag and reissue them. Because we can't tell the higher
+ * layers directly that they should not issue ordered I/O anymore, they
+ * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
+ */
+ if ((bp->b_error == EOPNOTSUPP) &&
+ (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
+ XB_TRACE(bp, "ordered_retry", bp->b_iodone);
+ bp->b_flags &= ~XBF_ORDERED;
+ bp->b_flags |= _XFS_BARRIER_FAILED;
+ xfs_buf_iorequest(bp);
+ } else if (bp->b_iodone)
(*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
xfs_buf_t *bp,
int schedule)
{
- bp->b_flags &= ~(XBF_READ | XBF_WRITE);
+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
if (bp->b_error == 0)
bp->b_flags |= XBF_DONE;
xfs_buf_iodone_work(&bp->b_iodone_work);
}
} else {
- up(&bp->b_iodonesema);
+ complete(&bp->b_iowait);
}
}
XB_TRACE(bp, "ioerror", (unsigned long)error);
}
-/*
- * Initiate I/O on a buffer, based on the flags supplied.
- * The b_iodone routine in the buffer supplied will only be called
- * when all of the subsidiary I/O requests, if any, have been completed.
- */
int
-xfs_buf_iostart(
- xfs_buf_t *bp,
- xfs_buf_flags_t flags)
+xfs_bawrite(
+ void *mp,
+ struct xfs_buf *bp)
{
- int status = 0;
+ XB_TRACE(bp, "bawrite", 0);
- XB_TRACE(bp, "iostart", (unsigned long)flags);
+ ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
- if (flags & XBF_DELWRI) {
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
- bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
- xfs_buf_delwri_queue(bp, 1);
- return status;
- }
+ xfs_buf_delwri_dequeue(bp);
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
- bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+ bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD);
+ bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
- BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL);
+ bp->b_mount = mp;
+ bp->b_strat = xfs_bdstrat_cb;
+ return xfs_bdstrat_cb(bp);
+}
- /* For writes allow an alternate strategy routine to precede
- * the actual I/O request (which may not be issued at all in
- * a shutdown situation, for example).
- */
- status = (flags & XBF_WRITE) ?
- xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
+void
+xfs_bdwrite(
+ void *mp,
+ struct xfs_buf *bp)
+{
+ XB_TRACE(bp, "bdwrite", 0);
- /* Wait for I/O if we are not an async request.
- * Note: async I/O request completion will release the buffer,
- * and that can already be done by this point. So using the
- * buffer pointer from here on, after async I/O, is invalid.
- */
- if (!status && !(flags & XBF_ASYNC))
- status = xfs_buf_iowait(bp);
+ bp->b_strat = xfs_bdstrat_cb;
+ bp->b_mount = mp;
- return status;
-}
+ bp->b_flags &= ~XBF_READ;
+ bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
-STATIC_INLINE int
-_xfs_buf_iolocked(
- xfs_buf_t *bp)
-{
- ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
- if (bp->b_flags & XBF_READ)
- return bp->b_locked;
- return 0;
+ xfs_buf_delwri_queue(bp, 1);
}
STATIC_INLINE void
int schedule)
{
if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
- bp->b_locked = 0;
+ bp->b_flags &= ~_XBF_PAGE_LOCKED;
xfs_buf_ioend(bp, schedule);
}
}
-STATIC int
+STATIC void
xfs_buf_bio_end_io(
struct bio *bio,
int error)
unsigned int blocksize = bp->b_target->bt_bsize;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- bp->b_error = EIO;
+ xfs_buf_ioerror(bp, -error);
do {
struct page *page = bvec->bv_page;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
- if (_xfs_buf_iolocked(bp)) {
+ if (bp->b_flags & _XBF_PAGE_LOCKED)
unlock_page(page);
- }
} while (bvec >= bio->bi_io_vec);
_xfs_buf_ioend(bp, 1);
bio_put(bio);
- return 0;
}
STATIC void
_xfs_buf_ioapply(
xfs_buf_t *bp)
{
- int i, rw, map_i, total_nr_pages, nr_pages;
+ int rw, map_i, total_nr_pages, nr_pages;
struct bio *bio;
int offset = bp->b_offset;
int size = bp->b_count_desired;
sector_t sector = bp->b_bn;
unsigned int blocksize = bp->b_target->bt_bsize;
- int locking = _xfs_buf_iolocked(bp);
total_nr_pages = bp->b_page_count;
map_i = 0;
* filesystem block size is not smaller than the page size.
*/
if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
- (bp->b_flags & XBF_READ) && locking &&
+ ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
+ (XBF_READ|_XBF_PAGE_LOCKED)) &&
(blocksize >= PAGE_CACHE_SIZE)) {
bio = bio_alloc(GFP_NOIO, 1);
goto submit_io;
}
- /* Lock down the pages which we need to for the request */
- if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
- for (i = 0; size; i++) {
- int nbytes = PAGE_CACHE_SIZE - offset;
- struct page *page = bp->b_pages[i];
-
- if (nbytes > size)
- nbytes = size;
-
- lock_page(page);
-
- size -= nbytes;
- offset = 0;
- }
- offset = bp->b_offset;
- size = bp->b_count_desired;
- }
-
next_chunk:
atomic_inc(&bp->b_io_remaining);
nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
XB_TRACE(bp, "iowait", 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
- down(&bp->b_iodonesema);
+ wait_for_completion(&bp->b_iowait);
XB_TRACE(bp, "iowaited", (long)bp->b_error);
return bp->b_error;
}
xfs_free_bufhash(
xfs_buftarg_t *btp)
{
- kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+ kmem_free(btp->bt_hash);
btp->bt_hash = NULL;
}
void
xfs_free_buftarg(
- xfs_buftarg_t *btp,
- int external)
+ struct xfs_mount *mp,
+ struct xfs_buftarg *btp)
{
xfs_flush_buftarg(btp, 1);
- xfs_blkdev_issue_flush(btp);
- if (external)
- xfs_blkdev_put(btp->bt_bdev);
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ xfs_blkdev_issue_flush(btp);
xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
xfs_unregister_buftarg(btp);
kthread_stop(btp->bt_task);
- kmem_free(btp, sizeof(*btp));
+ kmem_free(btp);
}
STATIC int
struct block_device *bdev)
{
return xfs_setsize_buftarg_flags(btp,
- PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+ PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0);
}
int
INIT_LIST_HEAD(&btp->bt_list);
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
- spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
+ spin_lock_init(&btp->bt_delwrite_lock);
btp->bt_flags = 0;
btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
if (IS_ERR(btp->bt_task)) {
return btp;
error:
- kmem_free(btp, sizeof(*btp));
+ kmem_free(btp);
return NULL;
}
current->flags |= PF_MEMALLOC;
+ set_freezable();
+
do {
if (unlikely(freezing(current))) {
set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
xfs_buf_t *bp, *n;
int pincount = 0;
+ xfs_buf_runall_queues(xfsconvertd_workqueue);
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
xfs_buf_init(void)
{
#ifdef XFS_BUF_TRACE
- xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);
+ xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS);
#endif
xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
+ xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+ if (!xfsconvertd_workqueue)
+ goto out_destroy_xfsdatad_workqueue;
+
register_shrinker(&xfs_buf_shake);
return 0;
+ out_destroy_xfsdatad_workqueue:
+ destroy_workqueue(xfsdatad_workqueue);
out_destroy_xfslogd_workqueue:
destroy_workqueue(xfslogd_workqueue);
out_free_buf_zone:
xfs_buf_terminate(void)
{
unregister_shrinker(&xfs_buf_shake);
+ destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);