exofs: RAID0 support
[safe/jmp/linux-2.6] / fs / exofs / ios.c
index 2b81f99..6e446b2 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 #include <scsi/scsi_device.h>
+#include <asm/div64.h>
 
 #include "exofs.h"
 
@@ -110,7 +111,17 @@ void exofs_put_io_state(struct exofs_io_state *ios)
 unsigned exofs_layout_od_id(struct exofs_layout *layout,
                            osd_id obj_no, unsigned layout_index)
 {
-       return layout_index;
+/*     switch (layout->lay_func) {
+       case LAYOUT_MOVING_WINDOW:
+       {*/
+               unsigned dev_mod = obj_no;
+
+               return (layout_index + dev_mod * layout->mirrors_p1) %
+                                                             layout->s_numdevs;
+/*     }
+       case LAYOUT_FUNC_IMPLICT:
+               return layout->devs[layout_index];
+       }*/
 }
 
 static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
@@ -225,8 +236,8 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
                        _clear_bio(ios->per_dev[i].bio);
                        EXOFS_DBGMSG("start read offset passed end of file "
                                "offset=0x%llx, length=0x%llx\n",
-                               _LLU(ios->offset),
-                               _LLU(ios->length));
+                               _LLU(ios->per_dev[i].offset),
+                               _LLU(ios->per_dev[i].length));
 
                        continue; /* we recovered */
                }
@@ -248,6 +259,127 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
        return acumulated_lin_err;
 }
 
+/* REMOVEME: After review
+   Some quoteing from the standard
+
+   L = logical offset into the file
+   W = number of data components in a stripe
+   S = W * stripe_unit (S is Stripe length)
+   N = L / S (N is the stripe Number)
+   C = (L-(N*S)) / stripe_unit (C is the component)
+   O = (N*stripe_unit)+(L%stripe_unit) (O is the object's offset)
+*/
+
+static void _offset_dev_unit_off(struct exofs_io_state *ios, u64 file_offset,
+                       u64 *obj_offset, unsigned *dev, unsigned *unit_off)
+{
+       unsigned stripe_unit = ios->layout->stripe_unit;
+       unsigned stripe_length = stripe_unit * ios->layout->group_width;
+       u64 stripe_no = file_offset;
+       unsigned stripe_mod = do_div(stripe_no, stripe_length);
+
+       *unit_off = stripe_mod % stripe_unit;
+       *obj_offset = stripe_no * stripe_unit + *unit_off;
+       *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1;
+}
+
+static int _add_stripe_unit(struct exofs_io_state *ios,  unsigned *cur_bvec,
+                    struct exofs_per_dev_state *per_dev, int cur_len)
+{
+       unsigned bv = *cur_bvec;
+       struct request_queue *q =
+                       osd_request_queue(exofs_ios_od(ios, per_dev->dev));
+
+       per_dev->length += cur_len;
+
+       if (per_dev->bio == NULL) {
+               unsigned pages_in_stripe = ios->layout->group_width *
+                                       (ios->layout->stripe_unit / PAGE_SIZE);
+               unsigned bio_size = (ios->bio->bi_vcnt + pages_in_stripe) /
+                                               ios->layout->group_width;
+
+               per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
+               if (unlikely(!per_dev->bio)) {
+                       EXOFS_DBGMSG("Faild to allocate BIO size=%u\n",
+                                    bio_size);
+                       return -ENOMEM;
+               }
+       }
+
+       while (cur_len > 0) {
+               int added_len;
+               struct bio_vec *bvec = &ios->bio->bi_io_vec[bv];
+
+               BUG_ON(ios->bio->bi_vcnt <= bv);
+               cur_len -= bvec->bv_len;
+
+               added_len = bio_add_pc_page(q, per_dev->bio, bvec->bv_page,
+                                           bvec->bv_len, bvec->bv_offset);
+               if (unlikely(bvec->bv_len != added_len))
+                       return -ENOMEM;
+               ++bv;
+       }
+       BUG_ON(cur_len);
+
+       *cur_bvec = bv;
+       return 0;
+}
+
+static int _prepare_for_striping(struct exofs_io_state *ios)
+{
+       u64 length = ios->length;
+       u64 offset = ios->offset;
+       unsigned stripe_unit = ios->layout->stripe_unit;
+       unsigned comp = 0;
+       unsigned stripes = 0;
+       unsigned cur_bvec = 0;
+       int ret;
+
+       if (!ios->bio) {
+               if (ios->kern_buff) {
+                       struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
+                       unsigned unit_off;
+
+                       _offset_dev_unit_off(ios, offset, &per_dev->offset,
+                                            &per_dev->dev, &unit_off);
+                       /* no cross device without page array */
+                       BUG_ON((ios->layout->group_width > 1) &&
+                              (unit_off + length > stripe_unit));
+               }
+               ios->numdevs = ios->layout->mirrors_p1;
+               return 0;
+       }
+
+       while (length) {
+               struct exofs_per_dev_state *per_dev = &ios->per_dev[comp];
+               unsigned cur_len;
+
+               if (!per_dev->length) {
+                       unsigned unit_off;
+
+                       _offset_dev_unit_off(ios, offset, &per_dev->offset,
+                                            &per_dev->dev, &unit_off);
+                       stripes++;
+                       cur_len = min_t(u64, stripe_unit - unit_off, length);
+                       offset += cur_len;
+               } else {
+                       cur_len = min_t(u64, stripe_unit, length);
+               }
+
+               ret = _add_stripe_unit(ios, &cur_bvec, per_dev, cur_len);
+               if (unlikely(ret))
+                       goto out;
+
+               comp += ios->layout->mirrors_p1;
+               comp %= ios->layout->s_numdevs;
+
+               length -= cur_len;
+       }
+out:
+       ios->numdevs = stripes * ios->layout->mirrors_p1;
+       return ret;
+}
+
 int exofs_sbi_create(struct exofs_io_state *ios)
 {
        int i, ret;
@@ -296,61 +428,71 @@ out:
        return ret;
 }
 
-int exofs_sbi_write(struct exofs_io_state *ios)
+static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
 {
-       int i, ret;
+       struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
+       unsigned dev = ios->per_dev[cur_comp].dev;
+       unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
+       int ret = 0;
 
-       for (i = 0; i < ios->layout->s_numdevs; i++) {
+       for (; cur_comp < last_comp; ++cur_comp, ++dev) {
+               struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
                struct osd_request *or;
 
-               or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
+               or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
                if (unlikely(!or)) {
                        EXOFS_ERR("%s: osd_start_request failed\n", __func__);
                        ret = -ENOMEM;
                        goto out;
                }
-               ios->per_dev[i].or = or;
-               ios->numdevs++;
+               per_dev->or = or;
+               per_dev->offset = master_dev->offset;
 
                if (ios->bio) {
                        struct bio *bio;
 
-                       if (i != 0) {
+                       if (per_dev != master_dev) {
                                bio = bio_kmalloc(GFP_KERNEL,
-                                                 ios->bio->bi_max_vecs);
+                                                 master_dev->bio->bi_max_vecs);
                                if (unlikely(!bio)) {
                                        EXOFS_DBGMSG(
                                              "Faild to allocate BIO size=%u\n",
-                                             ios->bio->bi_max_vecs);
+                                             master_dev->bio->bi_max_vecs);
                                        ret = -ENOMEM;
                                        goto out;
                                }
 
-                               __bio_clone(bio, ios->bio);
+                               __bio_clone(bio, master_dev->bio);
                                bio->bi_bdev = NULL;
                                bio->bi_next = NULL;
-                               ios->per_dev[i].bio =  bio;
+                               per_dev->length = master_dev->length;
+                               per_dev->bio =  bio;
+                               per_dev->dev = dev;
                        } else {
-                               bio = ios->bio;
+                               bio = master_dev->bio;
+                               /* FIXME: bio_set_dir() */
+                               bio->bi_rw |= (1 << BIO_RW);
                        }
 
-                       osd_req_write(or, &ios->obj, ios->offset, bio,
-                                     ios->length);
+                       osd_req_write(or, &ios->obj, per_dev->offset, bio,
+                                     per_dev->length);
                        EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
                                      "length=0x%llx dev=%d\n",
-                                    _LLU(ios->obj.id), _LLU(ios->offset),
-                                    _LLU(ios->length), i);
+                                    _LLU(ios->obj.id), _LLU(per_dev->offset),
+                                    _LLU(per_dev->length), dev);
                } else if (ios->kern_buff) {
-                       osd_req_write_kern(or, &ios->obj, ios->offset,
+                       ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
                                           ios->kern_buff, ios->length);
+                       if (unlikely(ret))
+                               goto out;
                        EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
                                      "length=0x%llx dev=%d\n",
-                                    _LLU(ios->obj.id), _LLU(ios->offset),
-                                    _LLU(ios->length), i);
+                                    _LLU(ios->obj.id), _LLU(per_dev->offset),
+                                    _LLU(ios->length), dev);
                } else {
                        osd_req_set_attributes(or, &ios->obj);
                        EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
-                                    _LLU(ios->obj.id), ios->out_attr_len, i);
+                                    _LLU(ios->obj.id), ios->out_attr_len, dev);
                }
 
                if (ios->out_attr)
@@ -361,40 +503,57 @@ int exofs_sbi_write(struct exofs_io_state *ios)
                        osd_req_add_get_attr_list(or, ios->in_attr,
                                                  ios->in_attr_len);
        }
-       ret = exofs_io_execute(ios);
 
 out:
        return ret;
 }
 
-int exofs_sbi_read(struct exofs_io_state *ios)
+int exofs_sbi_write(struct exofs_io_state *ios)
+{
+       int i;
+       int ret;
+
+       ret = _prepare_for_striping(ios);
+       if (unlikely(ret))
+               return ret;
+
+       for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
+               ret = _sbi_write_mirror(ios, i);
+               if (unlikely(ret))
+                       return ret;
+       }
+
+       ret = exofs_io_execute(ios);
+       return ret;
+}
+
+static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
 {
        struct osd_request *or;
-       struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
+       struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
        unsigned first_dev = (unsigned)ios->obj.id;
 
-       first_dev %= ios->layout->s_numdevs;
+       first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
        or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
        if (unlikely(!or)) {
                EXOFS_ERR("%s: osd_start_request failed\n", __func__);
                return -ENOMEM;
        }
        per_dev->or = or;
-       ios->numdevs++;
 
        if (ios->bio) {
-               osd_req_read(or, &ios->obj, ios->offset, ios->bio, ios->length);
+               osd_req_read(or, &ios->obj, per_dev->offset,
+                               per_dev->bio, per_dev->length);
                EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
                             " dev=%d\n", _LLU(ios->obj.id),
-                            _LLU(ios->offset), _LLU(ios->length),
+                            _LLU(per_dev->offset), _LLU(per_dev->length),
                             first_dev);
        } else if (ios->kern_buff) {
-               int ret = osd_req_read_kern(or, &ios->obj, ios->offset,
+               int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
                                            ios->kern_buff, ios->length);
-
                EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
                              "length=0x%llx dev=%d ret=>%d\n",
-                             _LLU(ios->obj.id), _LLU(ios->offset),
+                             _LLU(ios->obj.id), _LLU(per_dev->offset),
                              _LLU(ios->length), first_dev, ret);
                if (unlikely(ret))
                        return ret;
@@ -403,14 +562,32 @@ int exofs_sbi_read(struct exofs_io_state *ios)
                EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
                              _LLU(ios->obj.id), ios->in_attr_len, first_dev);
        }
-
        if (ios->out_attr)
                osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
 
        if (ios->in_attr)
                osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
 
-       return exofs_io_execute(ios);
+       return 0;
+}
+
+int exofs_sbi_read(struct exofs_io_state *ios)
+{
+       int i;
+       int ret;
+
+       ret = _prepare_for_striping(ios);
+       if (unlikely(ret))
+               return ret;
+
+       for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
+               ret = _sbi_read_mirror(ios, i);
+               if (unlikely(ret))
+                       return ret;
+       }
+
+       ret = exofs_io_execute(ios);
+       return ret;
 }
 
 int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
@@ -434,42 +611,84 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
        return -EIO;
 }
 
+static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
+                            struct osd_attr *attr)
+{
+       int last_comp = cur_comp + ios->layout->mirrors_p1;
+
+       for (; cur_comp < last_comp; ++cur_comp) {
+               struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
+               struct osd_request *or;
+
+               or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
+               if (unlikely(!or)) {
+                       EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+                       return -ENOMEM;
+               }
+               per_dev->or = or;
+
+               osd_req_set_attributes(or, &ios->obj);
+               osd_req_add_set_attr_list(or, attr, 1);
+       }
+
+       return 0;
+}
+
 int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
 {
        struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
        struct exofs_io_state *ios;
-       struct osd_attr attr;
-       __be64 newsize;
+       struct exofs_trunc_attr {
+               struct osd_attr attr;
+               __be64 newsize;
+       } *size_attrs;
+       u64 this_obj_size;
+       unsigned dev;
+       unsigned unit_off;
        int i, ret;
 
-       if (exofs_get_io_state(&sbi->layout, &ios))
-               return -ENOMEM;
+       ret = exofs_get_io_state(&sbi->layout, &ios);
+       if (unlikely(ret))
+               return ret;
+
+       size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs),
+                            GFP_KERNEL);
+       if (unlikely(!size_attrs)) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        ios->obj.id = exofs_oi_objno(oi);
        ios->cred = oi->i_cred;
 
-       newsize = cpu_to_be64(size);
-       attr = g_attr_logical_length;
-       attr.val_ptr = &newsize;
+       ios->numdevs = ios->layout->s_numdevs;
+       _offset_dev_unit_off(ios, size, &this_obj_size, &dev, &unit_off);
 
-       for (i = 0; i < sbi->layout.s_numdevs; i++) {
-               struct osd_request *or;
+       for (i = 0; i < ios->layout->group_width; ++i) {
+               struct exofs_trunc_attr *size_attr = &size_attrs[i];
+               u64 obj_size;
 
-               or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
-               if (unlikely(!or)) {
-                       EXOFS_ERR("%s: osd_start_request failed\n", __func__);
-                       ret = -ENOMEM;
-                       goto out;
-               }
-               ios->per_dev[i].or = or;
-               ios->numdevs++;
+               if (i < dev)
+                       obj_size = this_obj_size +
+                                       ios->layout->stripe_unit - unit_off;
+               else if (i == dev)
+                       obj_size = this_obj_size;
+               else /* i > dev */
+                       obj_size = this_obj_size - unit_off;
 
-               osd_req_set_attributes(or, &ios->obj);
-               osd_req_add_set_attr_list(or, &attr, 1);
+               size_attr->newsize = cpu_to_be64(obj_size);
+               size_attr->attr = g_attr_logical_length;
+               size_attr->attr.val_ptr = &size_attr->newsize;
+
+               ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
+                                       &size_attr->attr);
+               if (unlikely(ret))
+                       goto out;
        }
        ret = exofs_io_execute(ios);
 
 out:
+       kfree(size_attrs);
        exofs_put_io_state(ios);
        return ret;
 }