*
* O_DIRECT
*
- * 04Jul2002 akpm@zip.com.au
+ * 04Jul2002 Andrew Morton
* Initial version
* 11Sep2002 janetinc@us.ibm.com
* added readv/writev support.
- * 29Oct2002 akpm@zip.com.au
+ * 29Oct2002 Andrew Morton
* rewrote bio_add_page() support.
* 30Oct2002 pbadari@us.ibm.com
* added support for non-aligned IO.
int nr_pages;
nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES);
- down_read(¤t->mm->mmap_sem);
- ret = get_user_pages(
- current, /* Task for fault acounting */
- current->mm, /* whose pages? */
+ ret = get_user_pages_fast(
dio->curr_user_address, /* Where from? */
nr_pages, /* How many pages? */
dio->rw == READ, /* Write to memory? */
- 0, /* force (?) */
- &dio->pages[0],
- NULL); /* vmas */
- up_read(¤t->mm->mmap_sem);
+ &dio->pages[0]); /* Put results here */
if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
- struct page *page = ZERO_PAGE(dio->curr_user_address);
+ struct page *page = ZERO_PAGE(0);
/*
* A memory fault, but the filesystem has some outstanding
* mapped blocks. We need to use those blocks up to avoid
/*
* Asynchronous IO callback.
*/
-static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
+static void dio_bio_end_aio(struct bio *bio, int error)
{
struct dio *dio = bio->bi_private;
unsigned long remaining;
unsigned long flags;
- if (bio->bi_size)
- return 1;
-
/* cleanup the bio */
dio_bio_complete(dio, bio);
aio_complete(dio->iocb, ret, 0);
kfree(dio);
}
-
- return 0;
}
/*
* During I/O bi_private points at the dio. After I/O, bi_private is used to
* implement a singly-linked list of completed BIOs, at dio->bio_list.
*/
-static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
+static void dio_bio_end_io(struct bio *bio, int error)
{
struct dio *dio = bio->bi_private;
unsigned long flags;
- if (bio->bi_size)
- return 1;
-
spin_lock_irqsave(&dio->bio_lock, flags);
bio->bi_private = dio->bio_list;
dio->bio_list = bio;
if (--dio->refcount == 1 && dio->waiter)
wake_up_process(dio->waiter);
spin_unlock_irqrestore(&dio->bio_lock, flags);
- return 0;
}
static int
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, nr_vecs);
- if (bio == NULL)
- return -ENOMEM;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
this_chunk_bytes = this_chunk_blocks << dio->blkbits;
- page = ZERO_PAGE(dio->curr_user_address);
+ page = ZERO_PAGE(0);
if (submit_page_section(dio, page, 0, this_chunk_bytes,
dio->next_block_for_io))
return;
page_cache_release(page);
goto out;
}
- zero_user_page(page, block_in_page << blkbits,
- 1 << blkbits, KM_USER0);
+ zero_user(page, block_in_page << blkbits,
+ 1 << blkbits);
dio->block_in_file++;
block_in_page++;
goto next_block;
ssize_t ret2;
size_t bytes;
- dio->bio = NULL;
dio->inode = inode;
dio->rw = rw;
dio->blkbits = blkbits;
dio->blkfactor = inode->i_blkbits - blkbits;
- dio->start_zero_done = 0;
- dio->size = 0;
dio->block_in_file = offset >> blkbits;
- dio->blocks_available = 0;
- dio->cur_page = NULL;
- dio->boundary = 0;
- dio->reap_counter = 0;
dio->get_block = get_block;
dio->end_io = end_io;
- dio->map_bh.b_private = NULL;
dio->final_block_in_bio = -1;
dio->next_block_for_io = -1;
- dio->page_errors = 0;
- dio->io_error = 0;
- dio->result = 0;
dio->iocb = iocb;
dio->i_size = i_size_read(inode);
spin_lock_init(&dio->bio_lock);
dio->refcount = 1;
- dio->bio_list = NULL;
- dio->waiter = NULL;
/*
* In case of non-aligned buffers, we may need 2 more
*/
if (unlikely(dio->blkfactor))
dio->pages_in_io = 2;
- else
- dio->pages_in_io = 0;
for (seg = 0; seg < nr_segs; seg++) {
user_addr = (unsigned long)iov[seg].iov_base;
if (dio->bio)
dio_bio_submit(dio);
- /* All IO is now issued, send it on its way */
- blk_run_address_space(inode->i_mapping);
-
/*
* It is possible that, we return short IO due to end of file.
* In that case, we need to release all the pages we got hold on.
((rw & READ) || (dio->result == dio->size)))
ret = -EIOCBQUEUED;
- if (ret != -EIOCBQUEUED)
+ if (ret != -EIOCBQUEUED) {
+ /* All IO is now issued, send it on its way */
+ blk_run_address_space(inode->i_mapping);
dio_await_completion(dio);
+ }
/*
* Sync will always be dropping the final ref and completing the
spin_lock_irqsave(&dio->bio_lock, flags);
ret2 = --dio->refcount;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- BUG_ON(!dio->is_async && ret2 != 0);
+
if (ret2 == 0) {
ret = dio_complete(dio, offset, ret);
kfree(dio);
int acquire_i_mutex = 0;
if (rw & WRITE)
- rw = WRITE_SYNC;
+ rw = WRITE_ODIRECT_PLUG;
if (bdev)
- bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
+ bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
if (offset & blocksize_mask) {
if (bdev)
}
}
- dio = kmalloc(sizeof(*dio), GFP_KERNEL);
+ dio = kzalloc(sizeof(*dio), GFP_KERNEL);
retval = -ENOMEM;
if (!dio)
goto out;
retval = direct_io_worker(rw, iocb, inode, iov, offset,
nr_segs, blkbits, get_block, end_io, dio);
+ /*
+ * In case of error extending write may have instantiated a few
+ * blocks outside i_size. Trim these off again for DIO_LOCKING.
+ * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
+ * it's own meaner.
+ */
+ if (unlikely(retval < 0 && (rw & WRITE))) {
+ loff_t isize = i_size_read(inode);
+
+ if (end > isize && dio_lock_type == DIO_LOCKING)
+ vmtruncate(inode, isize);
+ }
+
if (rw == READ && dio_lock_type == DIO_LOCKING)
release_i_mutex = 0;