Btrfs: Lower contention on the csum mutex
[safe/jmp/linux-2.6] / fs / btrfs / file-item.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/bio.h>
20 #include <linux/pagemap.h>
21 #include <linux/highmem.h>
22 #include "ctree.h"
23 #include "disk-io.h"
24 #include "transaction.h"
25 #include "print-tree.h"
26
27 #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \
28                                sizeof(struct btrfs_item) * 2) / \
29                                BTRFS_CRC32_SIZE) - 1))
30 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
31                              struct btrfs_root *root,
32                              u64 objectid, u64 pos,
33                              u64 disk_offset, u64 disk_num_bytes,
34                              u64 num_bytes, u64 offset)
35 {
36         int ret = 0;
37         struct btrfs_file_extent_item *item;
38         struct btrfs_key file_key;
39         struct btrfs_path *path;
40         struct extent_buffer *leaf;
41
42         path = btrfs_alloc_path();
43         BUG_ON(!path);
44         file_key.objectid = objectid;
45         file_key.offset = pos;
46         btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
47
48         ret = btrfs_insert_empty_item(trans, root, path, &file_key,
49                                       sizeof(*item));
50         if (ret < 0)
51                 goto out;
52         BUG_ON(ret);
53         leaf = path->nodes[0];
54         item = btrfs_item_ptr(leaf, path->slots[0],
55                               struct btrfs_file_extent_item);
56         btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
57         btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
58         btrfs_set_file_extent_offset(leaf, item, offset);
59         btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
60         btrfs_set_file_extent_generation(leaf, item, trans->transid);
61         btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
62         btrfs_mark_buffer_dirty(leaf);
63 out:
64         btrfs_free_path(path);
65         return ret;
66 }
67
68 struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
69                                           struct btrfs_root *root,
70                                           struct btrfs_path *path,
71                                           u64 objectid, u64 offset,
72                                           int cow)
73 {
74         int ret;
75         struct btrfs_key file_key;
76         struct btrfs_key found_key;
77         struct btrfs_csum_item *item;
78         struct extent_buffer *leaf;
79         u64 csum_offset = 0;
80         int csums_in_item;
81
82         file_key.objectid = objectid;
83         file_key.offset = offset;
84         btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
85         ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
86         if (ret < 0)
87                 goto fail;
88         leaf = path->nodes[0];
89         if (ret > 0) {
90                 ret = 1;
91                 if (path->slots[0] == 0)
92                         goto fail;
93                 path->slots[0]--;
94                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
95                 if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
96                     found_key.objectid != objectid) {
97                         goto fail;
98                 }
99                 csum_offset = (offset - found_key.offset) >>
100                                 root->fs_info->sb->s_blocksize_bits;
101                 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
102                 csums_in_item /= BTRFS_CRC32_SIZE;
103
104                 if (csum_offset >= csums_in_item) {
105                         ret = -EFBIG;
106                         goto fail;
107                 }
108         }
109         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
110         item = (struct btrfs_csum_item *)((unsigned char *)item +
111                                           csum_offset * BTRFS_CRC32_SIZE);
112         return item;
113 fail:
114         if (ret > 0)
115                 ret = -ENOENT;
116         return ERR_PTR(ret);
117 }
118
119
120 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
121                              struct btrfs_root *root,
122                              struct btrfs_path *path, u64 objectid,
123                              u64 offset, int mod)
124 {
125         int ret;
126         struct btrfs_key file_key;
127         int ins_len = mod < 0 ? -1 : 0;
128         int cow = mod != 0;
129
130         file_key.objectid = objectid;
131         file_key.offset = offset;
132         btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
133         ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
134         return ret;
135 }
136
137 #if 0 /* broken */
138 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
139                           struct bio *bio)
140 {
141         u32 sum;
142         struct bio_vec *bvec = bio->bi_io_vec;
143         int bio_index = 0;
144         u64 offset;
145         u64 item_start_offset = 0;
146         u64 item_last_offset = 0;
147         u32 diff;
148         int ret;
149         struct btrfs_path *path;
150         struct btrfs_csum_item *item = NULL;
151         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
152
153         path = btrfs_alloc_path();
154
155         WARN_ON(bio->bi_vcnt <= 0);
156
157         while(bio_index < bio->bi_vcnt) {
158                 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
159                 ret = btrfs_find_ordered_sum(inode, offset, &sum);
160                 if (ret == 0)
161                         goto found;
162
163                 if (!item || offset < item_start_offset ||
164                     offset >= item_last_offset) {
165                         struct btrfs_key found_key;
166                         u32 item_size;
167
168                         if (item)
169                                 btrfs_release_path(root, path);
170                         item = btrfs_lookup_csum(NULL, root, path,
171                                                  inode->i_ino, offset, 0);
172                         if (IS_ERR(item)) {
173                                 ret = PTR_ERR(item);
174                                 if (ret == -ENOENT || ret == -EFBIG)
175                                         ret = 0;
176                                 sum = 0;
177                                 printk("no csum found for inode %lu start "
178                                        "%llu\n", inode->i_ino,
179                                        (unsigned long long)offset);
180                                 item = NULL;
181                                 goto found;
182                         }
183                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
184                                               path->slots[0]);
185
186                         item_start_offset = found_key.offset;
187                         item_size = btrfs_item_size_nr(path->nodes[0],
188                                                        path->slots[0]);
189                         item_last_offset = item_start_offset +
190                                 (item_size / BTRFS_CRC32_SIZE) *
191                                 root->sectorsize;
192                         item = btrfs_item_ptr(path->nodes[0], path->slots[0],
193                                               struct btrfs_csum_item);
194                 }
195                 /*
196                  * this byte range must be able to fit inside
197                  * a single leaf so it will also fit inside a u32
198                  */
199                 diff = offset - item_start_offset;
200                 diff = diff / root->sectorsize;
201                 diff = diff * BTRFS_CRC32_SIZE;
202
203                 read_extent_buffer(path->nodes[0], &sum,
204                                    ((unsigned long)item) + diff,
205                                    BTRFS_CRC32_SIZE);
206 found:
207                 set_state_private(io_tree, offset, sum);
208                 bio_index++;
209                 bvec++;
210         }
211         btrfs_free_path(path);
212         return 0;
213 }
214 #endif
215
216 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
217                        struct bio *bio)
218 {
219         struct btrfs_ordered_sum *sums;
220         struct btrfs_sector_sum *sector_sum;
221         struct btrfs_ordered_extent *ordered;
222         char *data;
223         struct bio_vec *bvec = bio->bi_io_vec;
224         int bio_index = 0;
225         unsigned long total_bytes = 0;
226         unsigned long this_sum_bytes = 0;
227         u64 offset;
228
229         WARN_ON(bio->bi_vcnt <= 0);
230         sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
231         if (!sums)
232                 return -ENOMEM;
233
234         sector_sum = sums->sums;
235         sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset;
236         sums->len = bio->bi_size;
237         INIT_LIST_HEAD(&sums->list);
238         ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
239         BUG_ON(!ordered);
240
241         while(bio_index < bio->bi_vcnt) {
242                 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
243                 if (offset >= ordered->file_offset + ordered->len ||
244                     offset < ordered->file_offset) {
245                         unsigned long bytes_left;
246                         sums->len = this_sum_bytes;
247                         this_sum_bytes = 0;
248                         btrfs_add_ordered_sum(inode, ordered, sums);
249                         btrfs_put_ordered_extent(ordered);
250
251                         bytes_left = bio->bi_size - total_bytes;
252
253                         sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
254                                        GFP_NOFS);
255                         BUG_ON(!sums);
256                         sector_sum = sums->sums;
257                         sums->len = bytes_left;
258                         sums->file_offset = offset;
259                         ordered = btrfs_lookup_ordered_extent(inode,
260                                                       sums->file_offset);
261                         BUG_ON(!ordered);
262                 }
263
264                 data = kmap_atomic(bvec->bv_page, KM_USER0);
265                 sector_sum->sum = ~(u32)0;
266                 sector_sum->sum = btrfs_csum_data(root,
267                                                   data + bvec->bv_offset,
268                                                   sector_sum->sum,
269                                                   bvec->bv_len);
270                 kunmap_atomic(data, KM_USER0);
271                 btrfs_csum_final(sector_sum->sum,
272                                  (char *)&sector_sum->sum);
273                 sector_sum->offset = page_offset(bvec->bv_page) +
274                         bvec->bv_offset;
275
276                 sector_sum++;
277                 bio_index++;
278                 total_bytes += bvec->bv_len;
279                 this_sum_bytes += bvec->bv_len;
280                 bvec++;
281         }
282         this_sum_bytes = 0;
283         btrfs_add_ordered_sum(inode, ordered, sums);
284         btrfs_put_ordered_extent(ordered);
285         return 0;
286 }
287
288 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
289                            struct btrfs_root *root, struct inode *inode,
290                            struct btrfs_ordered_sum *sums)
291 {
292         u64 objectid = inode->i_ino;
293         u64 offset;
294         int ret;
295         struct btrfs_key file_key;
296         struct btrfs_key found_key;
297         u64 next_offset;
298         u64 total_bytes = 0;
299         int found_next;
300         struct btrfs_path *path;
301         struct btrfs_csum_item *item;
302         struct btrfs_csum_item *item_end;
303         struct extent_buffer *leaf = NULL;
304         u64 csum_offset;
305         struct btrfs_sector_sum *sector_sum;
306         u32 nritems;
307         u32 ins_size;
308         char *eb_map;
309         char *eb_token;
310         unsigned long map_len;
311         unsigned long map_start;
312
313         path = btrfs_alloc_path();
314         BUG_ON(!path);
315         sector_sum = sums->sums;
316 again:
317         next_offset = (u64)-1;
318         found_next = 0;
319         offset = sector_sum->offset;
320         file_key.objectid = objectid;
321         file_key.offset = offset;
322         btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
323
324         mutex_lock(&BTRFS_I(inode)->csum_mutex);
325         item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1);
326         if (!IS_ERR(item)) {
327                 leaf = path->nodes[0];
328                 goto found;
329         }
330         ret = PTR_ERR(item);
331         if (ret == -EFBIG) {
332                 u32 item_size;
333                 /* we found one, but it isn't big enough yet */
334                 leaf = path->nodes[0];
335                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
336                 if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) {
337                         /* already at max size, make a new one */
338                         goto insert;
339                 }
340         } else {
341                 int slot = path->slots[0] + 1;
342                 /* we didn't find a csum item, insert one */
343                 nritems = btrfs_header_nritems(path->nodes[0]);
344                 if (path->slots[0] >= nritems - 1) {
345                         ret = btrfs_next_leaf(root, path);
346                         if (ret == 1)
347                                 found_next = 1;
348                         if (ret != 0)
349                                 goto insert;
350                         slot = 0;
351                 }
352                 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
353                 if (found_key.objectid != objectid ||
354                     found_key.type != BTRFS_CSUM_ITEM_KEY) {
355                         found_next = 1;
356                         goto insert;
357                 }
358                 next_offset = found_key.offset;
359                 found_next = 1;
360                 goto insert;
361         }
362
363         /*
364          * at this point, we know the tree has an item, but it isn't big
365          * enough yet to put our csum in.  Grow it
366          */
367         btrfs_release_path(root, path);
368         ret = btrfs_search_slot(trans, root, &file_key, path,
369                                 BTRFS_CRC32_SIZE, 1);
370         if (ret < 0)
371                 goto fail_unlock;
372         if (ret == 0) {
373                 BUG();
374         }
375         if (path->slots[0] == 0) {
376                 goto insert;
377         }
378         path->slots[0]--;
379         leaf = path->nodes[0];
380         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
381         csum_offset = (offset - found_key.offset) >>
382                         root->fs_info->sb->s_blocksize_bits;
383         if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
384             found_key.objectid != objectid ||
385             csum_offset >= MAX_CSUM_ITEMS(root)) {
386                 goto insert;
387         }
388         if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
389             BTRFS_CRC32_SIZE) {
390                 u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE;
391                 diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
392                 if (diff != BTRFS_CRC32_SIZE)
393                         goto insert;
394                 ret = btrfs_extend_item(trans, root, path, diff);
395                 BUG_ON(ret);
396                 goto csum;
397         }
398
399 insert:
400         btrfs_release_path(root, path);
401         csum_offset = 0;
402         if (found_next) {
403                 u64 tmp = min((u64)i_size_read(inode), next_offset);
404                 tmp -= offset & ~((u64)root->sectorsize -1);
405                 tmp >>= root->fs_info->sb->s_blocksize_bits;
406                 tmp = max((u64)1, tmp);
407                 tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root));
408                 ins_size = BTRFS_CRC32_SIZE * tmp;
409         } else {
410                 ins_size = BTRFS_CRC32_SIZE;
411         }
412         ret = btrfs_insert_empty_item(trans, root, path, &file_key,
413                                       ins_size);
414         if (ret < 0)
415                 goto fail_unlock;
416         if (ret != 0) {
417                 WARN_ON(1);
418                 goto fail_unlock;
419         }
420 csum:
421         leaf = path->nodes[0];
422         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
423         ret = 0;
424         item = (struct btrfs_csum_item *)((unsigned char *)item +
425                                           csum_offset * BTRFS_CRC32_SIZE);
426 found:
427         item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
428         item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
429                                       btrfs_item_size_nr(leaf, path->slots[0]));
430         eb_token = NULL;
431         mutex_unlock(&BTRFS_I(inode)->csum_mutex);
432         cond_resched();
433 next_sector:
434
435         if (!eb_token ||
436            (unsigned long)item  + BTRFS_CRC32_SIZE >= map_start + map_len) {
437                 int err;
438
439                 if (eb_token)
440                         unmap_extent_buffer(leaf, eb_token, KM_USER1);
441                 eb_token = NULL;
442                 err = map_private_extent_buffer(leaf, (unsigned long)item,
443                                                 BTRFS_CRC32_SIZE,
444                                                 &eb_token, &eb_map,
445                                                 &map_start, &map_len, KM_USER1);
446                 if (err)
447                         eb_token = NULL;
448         }
449         if (eb_token) {
450                 memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
451                        &sector_sum->sum, BTRFS_CRC32_SIZE);
452         } else {
453                 write_extent_buffer(leaf, &sector_sum->sum,
454                                     (unsigned long)item, BTRFS_CRC32_SIZE);
455         }
456
457         total_bytes += root->sectorsize;
458         sector_sum++;
459         if (total_bytes < sums->len) {
460                 item = (struct btrfs_csum_item *)((char *)item +
461                                                   BTRFS_CRC32_SIZE);
462                 if (item < item_end && offset + PAGE_CACHE_SIZE ==
463                     sector_sum->offset) {
464                             offset = sector_sum->offset;
465                         goto next_sector;
466                 }
467         }
468         if (eb_token) {
469                 unmap_extent_buffer(leaf, eb_token, KM_USER1);
470                 eb_token = NULL;
471         }
472         btrfs_mark_buffer_dirty(path->nodes[0]);
473         cond_resched();
474         if (total_bytes < sums->len) {
475                 btrfs_release_path(root, path);
476                 goto again;
477         }
478 out:
479         btrfs_free_path(path);
480         return ret;
481
482 fail_unlock:
483         mutex_unlock(&BTRFS_I(inode)->csum_mutex);
484         goto out;
485 }
486
487 int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
488                         struct btrfs_root *root, struct btrfs_path *path,
489                         u64 isize)
490 {
491         struct btrfs_key key;
492         struct extent_buffer *leaf = path->nodes[0];
493         int slot = path->slots[0];
494         int ret;
495         u32 new_item_size;
496         u64 new_item_span;
497         u64 blocks;
498
499         btrfs_item_key_to_cpu(leaf, &key, slot);
500         if (isize <= key.offset)
501                 return 0;
502         new_item_span = isize - key.offset;
503         blocks = (new_item_span + root->sectorsize - 1) >>
504                 root->fs_info->sb->s_blocksize_bits;
505         new_item_size = blocks * BTRFS_CRC32_SIZE;
506         if (new_item_size >= btrfs_item_size_nr(leaf, slot))
507                 return 0;
508         ret = btrfs_truncate_item(trans, root, path, new_item_size, 1);
509         BUG_ON(ret);
510         return ret;
511 }