From 86093aaff5be5b214613eb60553e236bdb389c84 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 28 Jan 2010 18:24:06 +0200 Subject: [PATCH] exofs: convert io_state to use pages array instead of bio at input * inode.c operations are full-pages based, and not actually true scatter-gather * Lets us use more pages at once upto 512 (from 249) in 64 bit * Brings us much much closer to be able to use exofs's io_state engine from objlayout driver. (Once I decide where to put the common code) After RAID0 patch the outer (input) bio was never used as a bio, but was simply a page carrier into the raid engine. Even in the simple mirror/single-dev arrangement pages info was copied into a second bio. It is now easer to just pass a pages array into the io_state and prepare bio(s) once. Signed-off-by: Boaz Harrosh --- fs/exofs/exofs.h | 5 +++- fs/exofs/inode.c | 81 ++++++++++++++++++++++++++++---------------------------- fs/exofs/ios.c | 46 ++++++++++++++++++-------------- 3 files changed, 71 insertions(+), 61 deletions(-) diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 0d8a34b..acfebd3 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -128,7 +128,10 @@ struct exofs_io_state { loff_t offset; unsigned long length; void *kern_buff; - struct bio *bio; + + struct page **pages; + unsigned nr_pages; + unsigned pgbase; /* Attributes */ unsigned in_attr_len; diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 2b3163e..6ca0b01 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -41,16 +41,18 @@ enum { BIO_MAX_PAGES_KMALLOC = (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), + MAX_PAGES_KMALLOC = + PAGE_SIZE / sizeof(struct page *), }; struct page_collect { struct exofs_sb_info *sbi; - struct request_queue *req_q; struct inode *inode; unsigned expected_pages; struct exofs_io_state *ios; - struct bio *bio; + struct page **pages; + unsigned alloc_pages; unsigned nr_pages; unsigned long length; loff_t pg_first; /* keep 64bit also in 32-arches */ @@ -62,15 +64,12 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; pcol->sbi = sbi; - /* Create master bios on first Q, later on cloning, each clone will be - * allocated on it's destination Q - */ - pcol->req_q = osd_request_queue(sbi->layout.s_ods[0]); pcol->inode = inode; pcol->expected_pages = expected_pages; pcol->ios = NULL; - pcol->bio = NULL; + pcol->pages = NULL; + pcol->alloc_pages = 0; pcol->nr_pages = 0; pcol->length = 0; pcol->pg_first = -1; @@ -80,7 +79,8 @@ static void _pcol_reset(struct page_collect *pcol) { pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); - pcol->bio = NULL; + pcol->pages = NULL; + pcol->alloc_pages = 0; pcol->nr_pages = 0; pcol->length = 0; pcol->pg_first = -1; @@ -90,13 +90,13 @@ static void _pcol_reset(struct page_collect *pcol) * it might not end here. don't be left with nothing */ if (!pcol->expected_pages) - pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; + pcol->expected_pages = MAX_PAGES_KMALLOC; } static int pcol_try_alloc(struct page_collect *pcol) { - int pages = min_t(unsigned, pcol->expected_pages, - BIO_MAX_PAGES_KMALLOC); + unsigned pages = min_t(unsigned, pcol->expected_pages, + MAX_PAGES_KMALLOC); if (!pcol->ios) { /* First time allocate io_state */ int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); @@ -105,23 +105,28 @@ static int pcol_try_alloc(struct page_collect *pcol) return ret; } + /* TODO: easily support bio chaining */ + pages = min_t(unsigned, pages, + pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC); + for (; pages; pages >>= 1) { - pcol->bio = bio_kmalloc(GFP_KERNEL, pages); - if (likely(pcol->bio)) + pcol->pages = kmalloc(pages * sizeof(struct page *), + GFP_KERNEL); + if (likely(pcol->pages)) { + pcol->alloc_pages = pages; return 0; + } } - EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", + EXOFS_ERR("Failed to kmalloc expected_pages=%u\n", pcol->expected_pages); return -ENOMEM; } static void pcol_free(struct page_collect *pcol) { - if (pcol->bio) { - bio_put(pcol->bio); - pcol->bio = NULL; - } + kfree(pcol->pages); + pcol->pages = NULL; if (pcol->ios) { exofs_put_io_state(pcol->ios); @@ -132,11 +137,10 @@ static void pcol_free(struct page_collect *pcol) static int pcol_add_page(struct page_collect *pcol, struct page *page, unsigned len) { - int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); - if (unlikely(len != added_len)) + if (unlikely(pcol->nr_pages >= pcol->alloc_pages)) return -ENOMEM; - ++pcol->nr_pages; + pcol->pages[pcol->nr_pages++] = page; pcol->length += len; return 0; } @@ -181,7 +185,6 @@ static void update_write_page(struct page *page, int ret) */ static int __readpages_done(struct page_collect *pcol, bool do_unlock) { - struct bio_vec *bvec; int i; u64 resid; u64 good_bytes; @@ -198,8 +201,8 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock) pcol->inode->i_ino, _LLU(good_bytes), pcol->length, pcol->nr_pages); - __bio_for_each_segment(bvec, pcol->bio, i, 0) { - struct page *page = bvec->bv_page; + for (i = 0; i < pcol->nr_pages; i++) { + struct page *page = pcol->pages[i]; struct inode *inode = page->mapping->host; int page_stat; @@ -218,7 +221,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock) ret = update_read_page(page, page_stat); if (do_unlock) unlock_page(page); - length += bvec->bv_len; + length += PAGE_SIZE; } pcol_free(pcol); @@ -238,11 +241,10 @@ static void readpages_done(struct exofs_io_state *ios, void *p) static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) { - struct bio_vec *bvec; int i; - __bio_for_each_segment(bvec, pcol->bio, i, 0) { - struct page *page = bvec->bv_page; + for (i = 0; i < pcol->nr_pages; i++) { + struct page *page = pcol->pages[i]; if (rw == READ) update_read_page(page, ret); @@ -260,13 +262,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync) struct page_collect *pcol_copy = NULL; int ret; - if (!pcol->bio) + if (!pcol->pages) return 0; /* see comment in _readpage() about sync reads */ WARN_ON(is_sync && (pcol->nr_pages != 1)); - ios->bio = pcol->bio; + ios->pages = pcol->pages; + ios->nr_pages = pcol->nr_pages; ios->length = pcol->length; ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; @@ -366,7 +369,7 @@ try_again: goto try_again; } - if (!pcol->bio) { + if (!pcol->pages) { ret = pcol_try_alloc(pcol); if (unlikely(ret)) goto fail; @@ -448,7 +451,6 @@ static int exofs_readpage(struct file *file, struct page *page) static void writepages_done(struct exofs_io_state *ios, void *p) { struct page_collect *pcol = p; - struct bio_vec *bvec; int i; u64 resid; u64 good_bytes; @@ -467,8 +469,8 @@ static void writepages_done(struct exofs_io_state *ios, void *p) pcol->inode->i_ino, _LLU(good_bytes), pcol->length, pcol->nr_pages); - __bio_for_each_segment(bvec, pcol->bio, i, 0) { - struct page *page = bvec->bv_page; + for (i = 0; i < pcol->nr_pages; i++) { + struct page *page = pcol->pages[i]; struct inode *inode = page->mapping->host; int page_stat; @@ -485,7 +487,7 @@ static void writepages_done(struct exofs_io_state *ios, void *p) EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", inode->i_ino, page->index, page_stat); - length += bvec->bv_len; + length += PAGE_SIZE; } pcol_free(pcol); @@ -500,7 +502,7 @@ static int write_exec(struct page_collect *pcol) struct page_collect *pcol_copy = NULL; int ret; - if (!pcol->bio) + if (!pcol->pages) return 0; pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); @@ -512,9 +514,8 @@ static int write_exec(struct page_collect *pcol) *pcol_copy = *pcol; - pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ - - ios->bio = pcol_copy->bio; + ios->pages = pcol_copy->pages; + ios->nr_pages = pcol_copy->nr_pages; ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; ios->length = pcol_copy->length; ios->done = writepages_done; @@ -605,7 +606,7 @@ try_again: goto try_again; } - if (!pcol->bio) { + if (!pcol->pages) { ret = pcol_try_alloc(pcol); if (unlikely(ret)) goto fail; diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 6e446b2..263052c 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -283,10 +283,11 @@ static void _offset_dev_unit_off(struct exofs_io_state *ios, u64 file_offset, *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1; } -static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, - struct exofs_per_dev_state *per_dev, int cur_len) +static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, + unsigned pgbase, struct exofs_per_dev_state *per_dev, + int cur_len) { - unsigned bv = *cur_bvec; + unsigned pg = *cur_pg; struct request_queue *q = osd_request_queue(exofs_ios_od(ios, per_dev->dev)); @@ -295,7 +296,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, if (per_dev->bio == NULL) { unsigned pages_in_stripe = ios->layout->group_width * (ios->layout->stripe_unit / PAGE_SIZE); - unsigned bio_size = (ios->bio->bi_vcnt + pages_in_stripe) / + unsigned bio_size = (ios->nr_pages + pages_in_stripe) / ios->layout->group_width; per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); @@ -307,21 +308,22 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, } while (cur_len > 0) { - int added_len; - struct bio_vec *bvec = &ios->bio->bi_io_vec[bv]; + unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); + unsigned added_len; - BUG_ON(ios->bio->bi_vcnt <= bv); - cur_len -= bvec->bv_len; + BUG_ON(ios->nr_pages <= pg); + cur_len -= pglen; - added_len = bio_add_pc_page(q, per_dev->bio, bvec->bv_page, - bvec->bv_len, bvec->bv_offset); - if (unlikely(bvec->bv_len != added_len)) + added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg], + pglen, pgbase); + if (unlikely(pglen != added_len)) return -ENOMEM; - ++bv; + pgbase = 0; + ++pg; } BUG_ON(cur_len); - *cur_bvec = bv; + *cur_pg = pg; return 0; } @@ -332,10 +334,10 @@ static int _prepare_for_striping(struct exofs_io_state *ios) unsigned stripe_unit = ios->layout->stripe_unit; unsigned comp = 0; unsigned stripes = 0; - unsigned cur_bvec = 0; - int ret; + unsigned cur_pg = 0; + int ret = 0; - if (!ios->bio) { + if (!ios->pages) { if (ios->kern_buff) { struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; unsigned unit_off; @@ -352,7 +354,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios) while (length) { struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; - unsigned cur_len; + unsigned cur_len, page_off; if (!per_dev->length) { unsigned unit_off; @@ -362,11 +364,15 @@ static int _prepare_for_striping(struct exofs_io_state *ios) stripes++; cur_len = min_t(u64, stripe_unit - unit_off, length); offset += cur_len; + page_off = unit_off & ~PAGE_MASK; + BUG_ON(page_off != ios->pgbase); } else { cur_len = min_t(u64, stripe_unit, length); + page_off = 0; } - ret = _add_stripe_unit(ios, &cur_bvec, per_dev, cur_len); + ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, + cur_len); if (unlikely(ret)) goto out; @@ -448,7 +454,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) per_dev->or = or; per_dev->offset = master_dev->offset; - if (ios->bio) { + if (ios->pages) { struct bio *bio; if (per_dev != master_dev) { @@ -541,7 +547,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) } per_dev->or = or; - if (ios->bio) { + if (ios->pages) { osd_req_read(or, &ios->obj, per_dev->offset, per_dev->bio, per_dev->length); EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" -- 1.8.2.3