e7630a77a6c86e86fac18496af9090e41e43fb57
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60
61 struct ocfs2_xattr_def_value_root {
62         struct ocfs2_xattr_value_root   xv;
63         struct ocfs2_extent_rec         er;
64 };
65
66 struct ocfs2_xattr_bucket {
67         /* The inode these xattrs are associated with */
68         struct inode *bu_inode;
69
70         /* The actual buffers that make up the bucket */
71         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72
73         /* How many blocks make up one bucket for this filesystem */
74         int bu_blocks;
75 };
76
77 struct ocfs2_xattr_set_ctxt {
78         handle_t *handle;
79         struct ocfs2_alloc_context *meta_ac;
80         struct ocfs2_alloc_context *data_ac;
81         struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83
84 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP  4
87 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
88                                          - sizeof(struct ocfs2_xattr_header) \
89                                          - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
91                                          - sizeof(struct ocfs2_xattr_block) \
92                                          - sizeof(struct ocfs2_xattr_header) \
93                                          - OCFS2_XATTR_HEADER_GAP)
94
95 static struct ocfs2_xattr_def_value_root def_xv = {
96         .xv.xr_list.l_count = cpu_to_le16(1),
97 };
98
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100         &ocfs2_xattr_user_handler,
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103         &ocfs2_xattr_trusted_handler,
104         &ocfs2_xattr_security_handler,
105         NULL
106 };
107
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
115         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
116 };
117
118 struct ocfs2_xattr_info {
119         int             xi_name_index;
120         const char      *xi_name;
121         int             xi_name_len;
122         const void      *xi_value;
123         size_t          xi_value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144         /*
145          * Journal functions
146          */
147         int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
148                                   int type);
149         void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
150
151         /*
152          * Return a pointer to the appropriate buffer in loc->xl_storage
153          * at the given offset from loc->xl_header.
154          */
155         void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
156
157         /* Can we reuse the existing entry for the new value? */
158         int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
159                              struct ocfs2_xattr_info *xi);
160
161         /* How much space is needed for the new value? */
162         int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
163                                struct ocfs2_xattr_info *xi);
164
165         /*
166          * Return the offset of the first name+value pair.  This is
167          * the start of our downward-filling free space.
168          */
169         int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
170
171         /*
172          * Remove the name+value at this location.  Do whatever is
173          * appropriate with the remaining name+value pairs.
174          */
175         void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
176
177         /* Fill xl_entry with a new entry */
178         void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
179
180         /* Add name+value storage to an entry */
181         void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
182
183         /*
184          * Initialize the value buf's access and bh fields for this entry.
185          * ocfs2_xa_fill_value_buf() will handle the xv pointer.
186          */
187         void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
188                                    struct ocfs2_xattr_value_buf *vb);
189 };
190
191 /*
192  * Describes an xattr entry location.  This is a memory structure
193  * tracking the on-disk structure.
194  */
195 struct ocfs2_xa_loc {
196         /* This xattr belongs to this inode */
197         struct inode *xl_inode;
198
199         /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
200         struct ocfs2_xattr_header *xl_header;
201
202         /* Bytes from xl_header to the end of the storage */
203         int xl_size;
204
205         /*
206          * The ocfs2_xattr_entry this location describes.  If this is
207          * NULL, this location describes the on-disk structure where it
208          * would have been.
209          */
210         struct ocfs2_xattr_entry *xl_entry;
211
212         /*
213          * Internal housekeeping
214          */
215
216         /* Buffer(s) containing this entry */
217         void *xl_storage;
218
219         /* Operations on the storage backing this location */
220         const struct ocfs2_xa_loc_operations *xl_ops;
221 };
222
223 /*
224  * Convenience functions to calculate how much space is needed for a
225  * given name+value pair
226  */
227 static int namevalue_size(int name_len, uint64_t value_len)
228 {
229         if (value_len > OCFS2_XATTR_INLINE_SIZE)
230                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
231         else
232                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
233 }
234
235 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
236 {
237         return namevalue_size(xi->xi_name_len, xi->xi_value_len);
238 }
239
240 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
241 {
242         u64 value_len = le64_to_cpu(xe->xe_value_size);
243
244         BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
245                ocfs2_xattr_is_local(xe));
246         return namevalue_size(xe->xe_name_len, value_len);
247 }
248
249
250 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
251                                              struct ocfs2_xattr_header *xh,
252                                              int index,
253                                              int *block_off,
254                                              int *new_offset);
255
256 static int ocfs2_xattr_block_find(struct inode *inode,
257                                   int name_index,
258                                   const char *name,
259                                   struct ocfs2_xattr_search *xs);
260 static int ocfs2_xattr_index_block_find(struct inode *inode,
261                                         struct buffer_head *root_bh,
262                                         int name_index,
263                                         const char *name,
264                                         struct ocfs2_xattr_search *xs);
265
266 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
267                                         struct buffer_head *blk_bh,
268                                         char *buffer,
269                                         size_t buffer_size);
270
271 static int ocfs2_xattr_create_index_block(struct inode *inode,
272                                           struct ocfs2_xattr_search *xs,
273                                           struct ocfs2_xattr_set_ctxt *ctxt);
274
275 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
276                                              struct ocfs2_xattr_info *xi,
277                                              struct ocfs2_xattr_search *xs,
278                                              struct ocfs2_xattr_set_ctxt *ctxt);
279
280 typedef int (xattr_tree_rec_func)(struct inode *inode,
281                                   struct buffer_head *root_bh,
282                                   u64 blkno, u32 cpos, u32 len, void *para);
283 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
284                                            struct buffer_head *root_bh,
285                                            xattr_tree_rec_func *rec_func,
286                                            void *para);
287 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
288                                         struct ocfs2_xattr_bucket *bucket,
289                                         void *para);
290 static int ocfs2_rm_xattr_cluster(struct inode *inode,
291                                   struct buffer_head *root_bh,
292                                   u64 blkno,
293                                   u32 cpos,
294                                   u32 len,
295                                   void *para);
296
297 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
298                                   u64 src_blk, u64 last_blk, u64 to_blk,
299                                   unsigned int start_bucket,
300                                   u32 *first_hash);
301 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
302                                         struct ocfs2_dinode *di,
303                                         struct ocfs2_xattr_info *xi,
304                                         struct ocfs2_xattr_search *xis,
305                                         struct ocfs2_xattr_search *xbs,
306                                         struct ocfs2_refcount_tree **ref_tree,
307                                         int *meta_need,
308                                         int *credits);
309 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
310                                            struct ocfs2_xattr_bucket *bucket,
311                                            int offset,
312                                            struct ocfs2_xattr_value_root **xv,
313                                            struct buffer_head **bh);
314
315 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
316 {
317         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
318 }
319
320 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
321 {
322         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
323 }
324
325 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
326 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
327 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
328
329 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
330 {
331         struct ocfs2_xattr_bucket *bucket;
332         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
333
334         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
335
336         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
337         if (bucket) {
338                 bucket->bu_inode = inode;
339                 bucket->bu_blocks = blks;
340         }
341
342         return bucket;
343 }
344
345 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
346 {
347         int i;
348
349         for (i = 0; i < bucket->bu_blocks; i++) {
350                 brelse(bucket->bu_bhs[i]);
351                 bucket->bu_bhs[i] = NULL;
352         }
353 }
354
355 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
356 {
357         if (bucket) {
358                 ocfs2_xattr_bucket_relse(bucket);
359                 bucket->bu_inode = NULL;
360                 kfree(bucket);
361         }
362 }
363
364 /*
365  * A bucket that has never been written to disk doesn't need to be
366  * read.  We just need the buffer_heads.  Don't call this for
367  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
368  * them fully.
369  */
370 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
371                                    u64 xb_blkno)
372 {
373         int i, rc = 0;
374
375         for (i = 0; i < bucket->bu_blocks; i++) {
376                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
377                                               xb_blkno + i);
378                 if (!bucket->bu_bhs[i]) {
379                         rc = -EIO;
380                         mlog_errno(rc);
381                         break;
382                 }
383
384                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
385                                            bucket->bu_bhs[i]))
386                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
387                                                       bucket->bu_bhs[i]);
388         }
389
390         if (rc)
391                 ocfs2_xattr_bucket_relse(bucket);
392         return rc;
393 }
394
395 /* Read the xattr bucket at xb_blkno */
396 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
397                                    u64 xb_blkno)
398 {
399         int rc;
400
401         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
402                                bucket->bu_blocks, bucket->bu_bhs, 0,
403                                NULL);
404         if (!rc) {
405                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
406                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
407                                                  bucket->bu_bhs,
408                                                  bucket->bu_blocks,
409                                                  &bucket_xh(bucket)->xh_check);
410                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
411                 if (rc)
412                         mlog_errno(rc);
413         }
414
415         if (rc)
416                 ocfs2_xattr_bucket_relse(bucket);
417         return rc;
418 }
419
420 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
421                                              struct ocfs2_xattr_bucket *bucket,
422                                              int type)
423 {
424         int i, rc = 0;
425
426         for (i = 0; i < bucket->bu_blocks; i++) {
427                 rc = ocfs2_journal_access(handle,
428                                           INODE_CACHE(bucket->bu_inode),
429                                           bucket->bu_bhs[i], type);
430                 if (rc) {
431                         mlog_errno(rc);
432                         break;
433                 }
434         }
435
436         return rc;
437 }
438
439 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
440                                              struct ocfs2_xattr_bucket *bucket)
441 {
442         int i;
443
444         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
445         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
446                                    bucket->bu_bhs, bucket->bu_blocks,
447                                    &bucket_xh(bucket)->xh_check);
448         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
449
450         for (i = 0; i < bucket->bu_blocks; i++)
451                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
452 }
453
454 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
455                                          struct ocfs2_xattr_bucket *src)
456 {
457         int i;
458         int blocksize = src->bu_inode->i_sb->s_blocksize;
459
460         BUG_ON(dest->bu_blocks != src->bu_blocks);
461         BUG_ON(dest->bu_inode != src->bu_inode);
462
463         for (i = 0; i < src->bu_blocks; i++) {
464                 memcpy(bucket_block(dest, i), bucket_block(src, i),
465                        blocksize);
466         }
467 }
468
469 static int ocfs2_validate_xattr_block(struct super_block *sb,
470                                       struct buffer_head *bh)
471 {
472         int rc;
473         struct ocfs2_xattr_block *xb =
474                 (struct ocfs2_xattr_block *)bh->b_data;
475
476         mlog(0, "Validating xattr block %llu\n",
477              (unsigned long long)bh->b_blocknr);
478
479         BUG_ON(!buffer_uptodate(bh));
480
481         /*
482          * If the ecc fails, we return the error but otherwise
483          * leave the filesystem running.  We know any error is
484          * local to this block.
485          */
486         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
487         if (rc)
488                 return rc;
489
490         /*
491          * Errors after here are fatal
492          */
493
494         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
495                 ocfs2_error(sb,
496                             "Extended attribute block #%llu has bad "
497                             "signature %.*s",
498                             (unsigned long long)bh->b_blocknr, 7,
499                             xb->xb_signature);
500                 return -EINVAL;
501         }
502
503         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
504                 ocfs2_error(sb,
505                             "Extended attribute block #%llu has an "
506                             "invalid xb_blkno of %llu",
507                             (unsigned long long)bh->b_blocknr,
508                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
509                 return -EINVAL;
510         }
511
512         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
513                 ocfs2_error(sb,
514                             "Extended attribute block #%llu has an invalid "
515                             "xb_fs_generation of #%u",
516                             (unsigned long long)bh->b_blocknr,
517                             le32_to_cpu(xb->xb_fs_generation));
518                 return -EINVAL;
519         }
520
521         return 0;
522 }
523
524 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
525                                   struct buffer_head **bh)
526 {
527         int rc;
528         struct buffer_head *tmp = *bh;
529
530         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
531                               ocfs2_validate_xattr_block);
532
533         /* If ocfs2_read_block() got us a new bh, pass it up. */
534         if (!rc && !*bh)
535                 *bh = tmp;
536
537         return rc;
538 }
539
540 static inline const char *ocfs2_xattr_prefix(int name_index)
541 {
542         struct xattr_handler *handler = NULL;
543
544         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
545                 handler = ocfs2_xattr_handler_map[name_index];
546
547         return handler ? handler->prefix : NULL;
548 }
549
550 static u32 ocfs2_xattr_name_hash(struct inode *inode,
551                                  const char *name,
552                                  int name_len)
553 {
554         /* Get hash value of uuid from super block */
555         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
556         int i;
557
558         /* hash extended attribute name */
559         for (i = 0; i < name_len; i++) {
560                 hash = (hash << OCFS2_HASH_SHIFT) ^
561                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
562                        *name++;
563         }
564
565         return hash;
566 }
567
568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
569 {
570         return namevalue_size(name_len, value_len) +
571                 sizeof(struct ocfs2_xattr_entry);
572 }
573
574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
575 {
576         return namevalue_size_xi(xi) +
577                 sizeof(struct ocfs2_xattr_entry);
578 }
579
580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
581 {
582         return namevalue_size_xe(xe) +
583                 sizeof(struct ocfs2_xattr_entry);
584 }
585
586 int ocfs2_calc_security_init(struct inode *dir,
587                              struct ocfs2_security_xattr_info *si,
588                              int *want_clusters,
589                              int *xattr_credits,
590                              struct ocfs2_alloc_context **xattr_ac)
591 {
592         int ret = 0;
593         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
594         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
595                                                  si->value_len);
596
597         /*
598          * The max space of security xattr taken inline is
599          * 256(name) + 80(value) + 16(entry) = 352 bytes,
600          * So reserve one metadata block for it is ok.
601          */
602         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
603             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
604                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
605                 if (ret) {
606                         mlog_errno(ret);
607                         return ret;
608                 }
609                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
610         }
611
612         /* reserve clusters for xattr value which will be set in B tree*/
613         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
614                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
615                                                             si->value_len);
616
617                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
618                                                            new_clusters);
619                 *want_clusters += new_clusters;
620         }
621         return ret;
622 }
623
624 int ocfs2_calc_xattr_init(struct inode *dir,
625                           struct buffer_head *dir_bh,
626                           int mode,
627                           struct ocfs2_security_xattr_info *si,
628                           int *want_clusters,
629                           int *xattr_credits,
630                           int *want_meta)
631 {
632         int ret = 0;
633         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
634         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
635
636         if (si->enable)
637                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
638                                                      si->value_len);
639
640         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
641                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
642                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
643                                         "", NULL, 0);
644                 if (acl_len > 0) {
645                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
646                         if (S_ISDIR(mode))
647                                 a_size <<= 1;
648                 } else if (acl_len != 0 && acl_len != -ENODATA) {
649                         mlog_errno(ret);
650                         return ret;
651                 }
652         }
653
654         if (!(s_size + a_size))
655                 return ret;
656
657         /*
658          * The max space of security xattr taken inline is
659          * 256(name) + 80(value) + 16(entry) = 352 bytes,
660          * The max space of acl xattr taken inline is
661          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
662          * when blocksize = 512, may reserve one more cluser for
663          * xattr bucket, otherwise reserve one metadata block
664          * for them is ok.
665          * If this is a new directory with inline data,
666          * we choose to reserve the entire inline area for
667          * directory contents and force an external xattr block.
668          */
669         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
670             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
671             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
672                 *want_meta = *want_meta + 1;
673                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
674         }
675
676         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
677             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
678                 *want_clusters += 1;
679                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
680         }
681
682         /*
683          * reserve credits and clusters for xattrs which has large value
684          * and have to be set outside
685          */
686         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
687                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
688                                                         si->value_len);
689                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
690                                                            new_clusters);
691                 *want_clusters += new_clusters;
692         }
693         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
694             acl_len > OCFS2_XATTR_INLINE_SIZE) {
695                 /* for directory, it has DEFAULT and ACCESS two types of acls */
696                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
697                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
698                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
699                                                            new_clusters);
700                 *want_clusters += new_clusters;
701         }
702
703         return ret;
704 }
705
706 static int ocfs2_xattr_extend_allocation(struct inode *inode,
707                                          u32 clusters_to_add,
708                                          struct ocfs2_xattr_value_buf *vb,
709                                          struct ocfs2_xattr_set_ctxt *ctxt)
710 {
711         int status = 0;
712         handle_t *handle = ctxt->handle;
713         enum ocfs2_alloc_restarted why;
714         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
715         struct ocfs2_extent_tree et;
716
717         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
718
719         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
720
721         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
722                               OCFS2_JOURNAL_ACCESS_WRITE);
723         if (status < 0) {
724                 mlog_errno(status);
725                 goto leave;
726         }
727
728         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
729         status = ocfs2_add_clusters_in_btree(handle,
730                                              &et,
731                                              &logical_start,
732                                              clusters_to_add,
733                                              0,
734                                              ctxt->data_ac,
735                                              ctxt->meta_ac,
736                                              &why);
737         if (status < 0) {
738                 mlog_errno(status);
739                 goto leave;
740         }
741
742         status = ocfs2_journal_dirty(handle, vb->vb_bh);
743         if (status < 0) {
744                 mlog_errno(status);
745                 goto leave;
746         }
747
748         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
749
750         /*
751          * We should have already allocated enough space before the transaction,
752          * so no need to restart.
753          */
754         BUG_ON(why != RESTART_NONE || clusters_to_add);
755
756 leave:
757
758         return status;
759 }
760
761 static int __ocfs2_remove_xattr_range(struct inode *inode,
762                                       struct ocfs2_xattr_value_buf *vb,
763                                       u32 cpos, u32 phys_cpos, u32 len,
764                                       unsigned int ext_flags,
765                                       struct ocfs2_xattr_set_ctxt *ctxt)
766 {
767         int ret;
768         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
769         handle_t *handle = ctxt->handle;
770         struct ocfs2_extent_tree et;
771
772         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
773
774         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
775                             OCFS2_JOURNAL_ACCESS_WRITE);
776         if (ret) {
777                 mlog_errno(ret);
778                 goto out;
779         }
780
781         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
782                                   &ctxt->dealloc);
783         if (ret) {
784                 mlog_errno(ret);
785                 goto out;
786         }
787
788         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
789
790         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
791         if (ret) {
792                 mlog_errno(ret);
793                 goto out;
794         }
795
796         if (ext_flags & OCFS2_EXT_REFCOUNTED)
797                 ret = ocfs2_decrease_refcount(inode, handle,
798                                         ocfs2_blocks_to_clusters(inode->i_sb,
799                                                                  phys_blkno),
800                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
801         else
802                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
803                                                   phys_blkno, len);
804         if (ret)
805                 mlog_errno(ret);
806
807 out:
808         return ret;
809 }
810
811 static int ocfs2_xattr_shrink_size(struct inode *inode,
812                                    u32 old_clusters,
813                                    u32 new_clusters,
814                                    struct ocfs2_xattr_value_buf *vb,
815                                    struct ocfs2_xattr_set_ctxt *ctxt)
816 {
817         int ret = 0;
818         unsigned int ext_flags;
819         u32 trunc_len, cpos, phys_cpos, alloc_size;
820         u64 block;
821
822         if (old_clusters <= new_clusters)
823                 return 0;
824
825         cpos = new_clusters;
826         trunc_len = old_clusters - new_clusters;
827         while (trunc_len) {
828                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
829                                                &alloc_size,
830                                                &vb->vb_xv->xr_list, &ext_flags);
831                 if (ret) {
832                         mlog_errno(ret);
833                         goto out;
834                 }
835
836                 if (alloc_size > trunc_len)
837                         alloc_size = trunc_len;
838
839                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
840                                                  phys_cpos, alloc_size,
841                                                  ext_flags, ctxt);
842                 if (ret) {
843                         mlog_errno(ret);
844                         goto out;
845                 }
846
847                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
848                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
849                                                        block, alloc_size);
850                 cpos += alloc_size;
851                 trunc_len -= alloc_size;
852         }
853
854 out:
855         return ret;
856 }
857
858 static int ocfs2_xattr_value_truncate(struct inode *inode,
859                                       struct ocfs2_xattr_value_buf *vb,
860                                       int len,
861                                       struct ocfs2_xattr_set_ctxt *ctxt)
862 {
863         int ret;
864         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
865         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
866
867         if (new_clusters == old_clusters)
868                 return 0;
869
870         if (new_clusters > old_clusters)
871                 ret = ocfs2_xattr_extend_allocation(inode,
872                                                     new_clusters - old_clusters,
873                                                     vb, ctxt);
874         else
875                 ret = ocfs2_xattr_shrink_size(inode,
876                                               old_clusters, new_clusters,
877                                               vb, ctxt);
878
879         return ret;
880 }
881
882 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
883                                   size_t *result, const char *prefix,
884                                   const char *name, int name_len)
885 {
886         char *p = buffer + *result;
887         int prefix_len = strlen(prefix);
888         int total_len = prefix_len + name_len + 1;
889
890         *result += total_len;
891
892         /* we are just looking for how big our buffer needs to be */
893         if (!size)
894                 return 0;
895
896         if (*result > size)
897                 return -ERANGE;
898
899         memcpy(p, prefix, prefix_len);
900         memcpy(p + prefix_len, name, name_len);
901         p[prefix_len + name_len] = '\0';
902
903         return 0;
904 }
905
906 static int ocfs2_xattr_list_entries(struct inode *inode,
907                                     struct ocfs2_xattr_header *header,
908                                     char *buffer, size_t buffer_size)
909 {
910         size_t result = 0;
911         int i, type, ret;
912         const char *prefix, *name;
913
914         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
915                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
916                 type = ocfs2_xattr_get_type(entry);
917                 prefix = ocfs2_xattr_prefix(type);
918
919                 if (prefix) {
920                         name = (const char *)header +
921                                 le16_to_cpu(entry->xe_name_offset);
922
923                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
924                                                      &result, prefix, name,
925                                                      entry->xe_name_len);
926                         if (ret)
927                                 return ret;
928                 }
929         }
930
931         return result;
932 }
933
934 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
935                                          struct ocfs2_dinode *di)
936 {
937         struct ocfs2_xattr_header *xh;
938         int i;
939
940         xh = (struct ocfs2_xattr_header *)
941                  ((void *)di + inode->i_sb->s_blocksize -
942                  le16_to_cpu(di->i_xattr_inline_size));
943
944         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
945                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
946                         return 1;
947
948         return 0;
949 }
950
951 static int ocfs2_xattr_ibody_list(struct inode *inode,
952                                   struct ocfs2_dinode *di,
953                                   char *buffer,
954                                   size_t buffer_size)
955 {
956         struct ocfs2_xattr_header *header = NULL;
957         struct ocfs2_inode_info *oi = OCFS2_I(inode);
958         int ret = 0;
959
960         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
961                 return ret;
962
963         header = (struct ocfs2_xattr_header *)
964                  ((void *)di + inode->i_sb->s_blocksize -
965                  le16_to_cpu(di->i_xattr_inline_size));
966
967         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
968
969         return ret;
970 }
971
972 static int ocfs2_xattr_block_list(struct inode *inode,
973                                   struct ocfs2_dinode *di,
974                                   char *buffer,
975                                   size_t buffer_size)
976 {
977         struct buffer_head *blk_bh = NULL;
978         struct ocfs2_xattr_block *xb;
979         int ret = 0;
980
981         if (!di->i_xattr_loc)
982                 return ret;
983
984         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
985                                      &blk_bh);
986         if (ret < 0) {
987                 mlog_errno(ret);
988                 return ret;
989         }
990
991         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
992         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
993                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
994                 ret = ocfs2_xattr_list_entries(inode, header,
995                                                buffer, buffer_size);
996         } else
997                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
998                                                    buffer, buffer_size);
999
1000         brelse(blk_bh);
1001
1002         return ret;
1003 }
1004
1005 ssize_t ocfs2_listxattr(struct dentry *dentry,
1006                         char *buffer,
1007                         size_t size)
1008 {
1009         int ret = 0, i_ret = 0, b_ret = 0;
1010         struct buffer_head *di_bh = NULL;
1011         struct ocfs2_dinode *di = NULL;
1012         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1013
1014         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1015                 return -EOPNOTSUPP;
1016
1017         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1018                 return ret;
1019
1020         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1021         if (ret < 0) {
1022                 mlog_errno(ret);
1023                 return ret;
1024         }
1025
1026         di = (struct ocfs2_dinode *)di_bh->b_data;
1027
1028         down_read(&oi->ip_xattr_sem);
1029         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1030         if (i_ret < 0)
1031                 b_ret = 0;
1032         else {
1033                 if (buffer) {
1034                         buffer += i_ret;
1035                         size -= i_ret;
1036                 }
1037                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1038                                                buffer, size);
1039                 if (b_ret < 0)
1040                         i_ret = 0;
1041         }
1042         up_read(&oi->ip_xattr_sem);
1043         ocfs2_inode_unlock(dentry->d_inode, 0);
1044
1045         brelse(di_bh);
1046
1047         return i_ret + b_ret;
1048 }
1049
1050 static int ocfs2_xattr_find_entry(int name_index,
1051                                   const char *name,
1052                                   struct ocfs2_xattr_search *xs)
1053 {
1054         struct ocfs2_xattr_entry *entry;
1055         size_t name_len;
1056         int i, cmp = 1;
1057
1058         if (name == NULL)
1059                 return -EINVAL;
1060
1061         name_len = strlen(name);
1062         entry = xs->here;
1063         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1064                 cmp = name_index - ocfs2_xattr_get_type(entry);
1065                 if (!cmp)
1066                         cmp = name_len - entry->xe_name_len;
1067                 if (!cmp)
1068                         cmp = memcmp(name, (xs->base +
1069                                      le16_to_cpu(entry->xe_name_offset)),
1070                                      name_len);
1071                 if (cmp == 0)
1072                         break;
1073                 entry += 1;
1074         }
1075         xs->here = entry;
1076
1077         return cmp ? -ENODATA : 0;
1078 }
1079
1080 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1081                                          struct ocfs2_xattr_value_root *xv,
1082                                          void *buffer,
1083                                          size_t len)
1084 {
1085         u32 cpos, p_cluster, num_clusters, bpc, clusters;
1086         u64 blkno;
1087         int i, ret = 0;
1088         size_t cplen, blocksize;
1089         struct buffer_head *bh = NULL;
1090         struct ocfs2_extent_list *el;
1091
1092         el = &xv->xr_list;
1093         clusters = le32_to_cpu(xv->xr_clusters);
1094         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1095         blocksize = inode->i_sb->s_blocksize;
1096
1097         cpos = 0;
1098         while (cpos < clusters) {
1099                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1100                                                &num_clusters, el, NULL);
1101                 if (ret) {
1102                         mlog_errno(ret);
1103                         goto out;
1104                 }
1105
1106                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1107                 /* Copy ocfs2_xattr_value */
1108                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1109                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1110                                                &bh, NULL);
1111                         if (ret) {
1112                                 mlog_errno(ret);
1113                                 goto out;
1114                         }
1115
1116                         cplen = len >= blocksize ? blocksize : len;
1117                         memcpy(buffer, bh->b_data, cplen);
1118                         len -= cplen;
1119                         buffer += cplen;
1120
1121                         brelse(bh);
1122                         bh = NULL;
1123                         if (len == 0)
1124                                 break;
1125                 }
1126                 cpos += num_clusters;
1127         }
1128 out:
1129         return ret;
1130 }
1131
1132 static int ocfs2_xattr_ibody_get(struct inode *inode,
1133                                  int name_index,
1134                                  const char *name,
1135                                  void *buffer,
1136                                  size_t buffer_size,
1137                                  struct ocfs2_xattr_search *xs)
1138 {
1139         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1140         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1141         struct ocfs2_xattr_value_root *xv;
1142         size_t size;
1143         int ret = 0;
1144
1145         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1146                 return -ENODATA;
1147
1148         xs->end = (void *)di + inode->i_sb->s_blocksize;
1149         xs->header = (struct ocfs2_xattr_header *)
1150                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1151         xs->base = (void *)xs->header;
1152         xs->here = xs->header->xh_entries;
1153
1154         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1155         if (ret)
1156                 return ret;
1157         size = le64_to_cpu(xs->here->xe_value_size);
1158         if (buffer) {
1159                 if (size > buffer_size)
1160                         return -ERANGE;
1161                 if (ocfs2_xattr_is_local(xs->here)) {
1162                         memcpy(buffer, (void *)xs->base +
1163                                le16_to_cpu(xs->here->xe_name_offset) +
1164                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1165                 } else {
1166                         xv = (struct ocfs2_xattr_value_root *)
1167                                 (xs->base + le16_to_cpu(
1168                                  xs->here->xe_name_offset) +
1169                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1170                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1171                                                             buffer, size);
1172                         if (ret < 0) {
1173                                 mlog_errno(ret);
1174                                 return ret;
1175                         }
1176                 }
1177         }
1178
1179         return size;
1180 }
1181
1182 static int ocfs2_xattr_block_get(struct inode *inode,
1183                                  int name_index,
1184                                  const char *name,
1185                                  void *buffer,
1186                                  size_t buffer_size,
1187                                  struct ocfs2_xattr_search *xs)
1188 {
1189         struct ocfs2_xattr_block *xb;
1190         struct ocfs2_xattr_value_root *xv;
1191         size_t size;
1192         int ret = -ENODATA, name_offset, name_len, i;
1193         int uninitialized_var(block_off);
1194
1195         xs->bucket = ocfs2_xattr_bucket_new(inode);
1196         if (!xs->bucket) {
1197                 ret = -ENOMEM;
1198                 mlog_errno(ret);
1199                 goto cleanup;
1200         }
1201
1202         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1203         if (ret) {
1204                 mlog_errno(ret);
1205                 goto cleanup;
1206         }
1207
1208         if (xs->not_found) {
1209                 ret = -ENODATA;
1210                 goto cleanup;
1211         }
1212
1213         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1214         size = le64_to_cpu(xs->here->xe_value_size);
1215         if (buffer) {
1216                 ret = -ERANGE;
1217                 if (size > buffer_size)
1218                         goto cleanup;
1219
1220                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1221                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1222                 i = xs->here - xs->header->xh_entries;
1223
1224                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1225                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1226                                                                 bucket_xh(xs->bucket),
1227                                                                 i,
1228                                                                 &block_off,
1229                                                                 &name_offset);
1230                         xs->base = bucket_block(xs->bucket, block_off);
1231                 }
1232                 if (ocfs2_xattr_is_local(xs->here)) {
1233                         memcpy(buffer, (void *)xs->base +
1234                                name_offset + name_len, size);
1235                 } else {
1236                         xv = (struct ocfs2_xattr_value_root *)
1237                                 (xs->base + name_offset + name_len);
1238                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1239                                                             buffer, size);
1240                         if (ret < 0) {
1241                                 mlog_errno(ret);
1242                                 goto cleanup;
1243                         }
1244                 }
1245         }
1246         ret = size;
1247 cleanup:
1248         ocfs2_xattr_bucket_free(xs->bucket);
1249
1250         brelse(xs->xattr_bh);
1251         xs->xattr_bh = NULL;
1252         return ret;
1253 }
1254
1255 int ocfs2_xattr_get_nolock(struct inode *inode,
1256                            struct buffer_head *di_bh,
1257                            int name_index,
1258                            const char *name,
1259                            void *buffer,
1260                            size_t buffer_size)
1261 {
1262         int ret;
1263         struct ocfs2_dinode *di = NULL;
1264         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1265         struct ocfs2_xattr_search xis = {
1266                 .not_found = -ENODATA,
1267         };
1268         struct ocfs2_xattr_search xbs = {
1269                 .not_found = -ENODATA,
1270         };
1271
1272         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1273                 return -EOPNOTSUPP;
1274
1275         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1276                 ret = -ENODATA;
1277
1278         xis.inode_bh = xbs.inode_bh = di_bh;
1279         di = (struct ocfs2_dinode *)di_bh->b_data;
1280
1281         down_read(&oi->ip_xattr_sem);
1282         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1283                                     buffer_size, &xis);
1284         if (ret == -ENODATA && di->i_xattr_loc)
1285                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1286                                             buffer_size, &xbs);
1287         up_read(&oi->ip_xattr_sem);
1288
1289         return ret;
1290 }
1291
1292 /* ocfs2_xattr_get()
1293  *
1294  * Copy an extended attribute into the buffer provided.
1295  * Buffer is NULL to compute the size of buffer required.
1296  */
1297 static int ocfs2_xattr_get(struct inode *inode,
1298                            int name_index,
1299                            const char *name,
1300                            void *buffer,
1301                            size_t buffer_size)
1302 {
1303         int ret;
1304         struct buffer_head *di_bh = NULL;
1305
1306         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1307         if (ret < 0) {
1308                 mlog_errno(ret);
1309                 return ret;
1310         }
1311         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1312                                      name, buffer, buffer_size);
1313
1314         ocfs2_inode_unlock(inode, 0);
1315
1316         brelse(di_bh);
1317
1318         return ret;
1319 }
1320
1321 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1322                                            handle_t *handle,
1323                                            struct ocfs2_xattr_value_buf *vb,
1324                                            const void *value,
1325                                            int value_len)
1326 {
1327         int ret = 0, i, cp_len;
1328         u16 blocksize = inode->i_sb->s_blocksize;
1329         u32 p_cluster, num_clusters;
1330         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1331         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1332         u64 blkno;
1333         struct buffer_head *bh = NULL;
1334         unsigned int ext_flags;
1335         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1336
1337         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1338
1339         while (cpos < clusters) {
1340                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1341                                                &num_clusters, &xv->xr_list,
1342                                                &ext_flags);
1343                 if (ret) {
1344                         mlog_errno(ret);
1345                         goto out;
1346                 }
1347
1348                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1349
1350                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1351
1352                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1353                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1354                                                &bh, NULL);
1355                         if (ret) {
1356                                 mlog_errno(ret);
1357                                 goto out;
1358                         }
1359
1360                         ret = ocfs2_journal_access(handle,
1361                                                    INODE_CACHE(inode),
1362                                                    bh,
1363                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1364                         if (ret < 0) {
1365                                 mlog_errno(ret);
1366                                 goto out;
1367                         }
1368
1369                         cp_len = value_len > blocksize ? blocksize : value_len;
1370                         memcpy(bh->b_data, value, cp_len);
1371                         value_len -= cp_len;
1372                         value += cp_len;
1373                         if (cp_len < blocksize)
1374                                 memset(bh->b_data + cp_len, 0,
1375                                        blocksize - cp_len);
1376
1377                         ret = ocfs2_journal_dirty(handle, bh);
1378                         if (ret < 0) {
1379                                 mlog_errno(ret);
1380                                 goto out;
1381                         }
1382                         brelse(bh);
1383                         bh = NULL;
1384
1385                         /*
1386                          * XXX: do we need to empty all the following
1387                          * blocks in this cluster?
1388                          */
1389                         if (!value_len)
1390                                 break;
1391                 }
1392                 cpos += num_clusters;
1393         }
1394 out:
1395         brelse(bh);
1396
1397         return ret;
1398 }
1399
1400 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1401                                        int num_entries)
1402 {
1403         int free_space;
1404
1405         if (!needed_space)
1406                 return 0;
1407
1408         free_space = free_start -
1409                 sizeof(struct ocfs2_xattr_header) -
1410                 (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1411                 OCFS2_XATTR_HEADER_GAP;
1412         if (free_space < 0)
1413                 return -EIO;
1414         if (free_space < needed_space)
1415                 return -ENOSPC;
1416
1417         return 0;
1418 }
1419
1420 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1421                                    int type)
1422 {
1423         return loc->xl_ops->xlo_journal_access(handle, loc, type);
1424 }
1425
1426 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1427 {
1428         loc->xl_ops->xlo_journal_dirty(handle, loc);
1429 }
1430
1431 /* Give a pointer into the storage for the given offset */
1432 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1433 {
1434         BUG_ON(offset >= loc->xl_size);
1435         return loc->xl_ops->xlo_offset_pointer(loc, offset);
1436 }
1437
1438 /*
1439  * Wipe the name+value pair and allow the storage to reclaim it.  This
1440  * must be followed by either removal of the entry or a call to
1441  * ocfs2_xa_add_namevalue().
1442  */
1443 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1444 {
1445         loc->xl_ops->xlo_wipe_namevalue(loc);
1446 }
1447
1448 /*
1449  * Find lowest offset to a name+value pair.  This is the start of our
1450  * downward-growing free space.
1451  */
1452 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1453 {
1454         return loc->xl_ops->xlo_get_free_start(loc);
1455 }
1456
1457 /* Can we reuse loc->xl_entry for xi? */
1458 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1459                                     struct ocfs2_xattr_info *xi)
1460 {
1461         return loc->xl_ops->xlo_can_reuse(loc, xi);
1462 }
1463
1464 /* How much free space is needed to set the new value */
1465 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1466                                 struct ocfs2_xattr_info *xi)
1467 {
1468         return loc->xl_ops->xlo_check_space(loc, xi);
1469 }
1470
1471 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1472 {
1473         loc->xl_ops->xlo_add_entry(loc, name_hash);
1474         loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1475         /*
1476          * We can't leave the new entry's xe_name_offset at zero or
1477          * add_namevalue() will go nuts.  We set it to the size of our
1478          * storage so that it can never be less than any other entry.
1479          */
1480         loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1481 }
1482
1483 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1484                                    struct ocfs2_xattr_info *xi)
1485 {
1486         int size = namevalue_size_xi(xi);
1487         int nameval_offset;
1488         char *nameval_buf;
1489
1490         loc->xl_ops->xlo_add_namevalue(loc, size);
1491         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1492         loc->xl_entry->xe_name_len = xi->xi_name_len;
1493         ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1494         ocfs2_xattr_set_local(loc->xl_entry,
1495                               xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1496
1497         nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1498         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1499         memset(nameval_buf, 0, size);
1500         memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1501 }
1502
1503 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1504                                     struct ocfs2_xattr_value_buf *vb)
1505 {
1506         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1507         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1508
1509         /* Value bufs are for value trees */
1510         BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1511         BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1512                (name_size + OCFS2_XATTR_ROOT_SIZE));
1513
1514         loc->xl_ops->xlo_fill_value_buf(loc, vb);
1515         vb->vb_xv =
1516                 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1517                                                         nameval_offset +
1518                                                         name_size);
1519 }
1520
1521 static int ocfs2_xa_block_journal_access(handle_t *handle,
1522                                          struct ocfs2_xa_loc *loc, int type)
1523 {
1524         struct buffer_head *bh = loc->xl_storage;
1525         ocfs2_journal_access_func access;
1526
1527         if (loc->xl_size == (bh->b_size -
1528                              offsetof(struct ocfs2_xattr_block,
1529                                       xb_attrs.xb_header)))
1530                 access = ocfs2_journal_access_xb;
1531         else
1532                 access = ocfs2_journal_access_di;
1533         return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1534 }
1535
1536 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1537                                          struct ocfs2_xa_loc *loc)
1538 {
1539         struct buffer_head *bh = loc->xl_storage;
1540
1541         ocfs2_journal_dirty(handle, bh);
1542 }
1543
1544 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1545                                            int offset)
1546 {
1547         return (char *)loc->xl_header + offset;
1548 }
1549
1550 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1551                                     struct ocfs2_xattr_info *xi)
1552 {
1553         /*
1554          * Block storage is strict.  If the sizes aren't exact, we will
1555          * remove the old one and reinsert the new.
1556          */
1557         return namevalue_size_xe(loc->xl_entry) ==
1558                 namevalue_size_xi(xi);
1559 }
1560
1561 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1562 {
1563         struct ocfs2_xattr_header *xh = loc->xl_header;
1564         int i, count = le16_to_cpu(xh->xh_count);
1565         int offset, free_start = loc->xl_size;
1566
1567         for (i = 0; i < count; i++) {
1568                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1569                 if (offset < free_start)
1570                         free_start = offset;
1571         }
1572
1573         return free_start;
1574 }
1575
1576 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1577                                       struct ocfs2_xattr_info *xi)
1578 {
1579         int count = le16_to_cpu(loc->xl_header->xh_count);
1580         int free_start = ocfs2_xa_get_free_start(loc);
1581         int needed_space = ocfs2_xi_entry_usage(xi);
1582
1583         /*
1584          * Block storage will reclaim the original entry before inserting
1585          * the new value, so we only need the difference.  If the new
1586          * entry is smaller than the old one, we don't need anything.
1587          */
1588         if (loc->xl_entry) {
1589                 /* Don't need space if we're reusing! */
1590                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1591                         needed_space = 0;
1592                 else
1593                         needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1594         }
1595         if (needed_space < 0)
1596                 needed_space = 0;
1597         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1598 }
1599
1600 /*
1601  * Block storage for xattrs keeps the name+value pairs compacted.  When
1602  * we remove one, we have to shift any that preceded it towards the end.
1603  */
1604 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1605 {
1606         int i, offset;
1607         int namevalue_offset, first_namevalue_offset, namevalue_size;
1608         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1609         struct ocfs2_xattr_header *xh = loc->xl_header;
1610         int count = le16_to_cpu(xh->xh_count);
1611
1612         namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1613         namevalue_size = namevalue_size_xe(entry);
1614         first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1615
1616         /* Shift the name+value pairs */
1617         memmove((char *)xh + first_namevalue_offset + namevalue_size,
1618                 (char *)xh + first_namevalue_offset,
1619                 namevalue_offset - first_namevalue_offset);
1620         memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1621
1622         /* Now tell xh->xh_entries about it */
1623         for (i = 0; i < count; i++) {
1624                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1625                 if (offset < namevalue_offset)
1626                         le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1627                                      namevalue_size);
1628         }
1629
1630         /*
1631          * Note that we don't update xh_free_start or xh_name_value_len
1632          * because they're not used in block-stored xattrs.
1633          */
1634 }
1635
1636 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1637 {
1638         int count = le16_to_cpu(loc->xl_header->xh_count);
1639         loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1640         le16_add_cpu(&loc->xl_header->xh_count, 1);
1641         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1642 }
1643
1644 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1645 {
1646         int free_start = ocfs2_xa_get_free_start(loc);
1647
1648         loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1649 }
1650
1651 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1652                                           struct ocfs2_xattr_value_buf *vb)
1653 {
1654         struct buffer_head *bh = loc->xl_storage;
1655
1656         if (loc->xl_size == (bh->b_size -
1657                              offsetof(struct ocfs2_xattr_block,
1658                                       xb_attrs.xb_header)))
1659                 vb->vb_access = ocfs2_journal_access_xb;
1660         else
1661                 vb->vb_access = ocfs2_journal_access_di;
1662         vb->vb_bh = bh;
1663 }
1664
1665 /*
1666  * Operations for xattrs stored in blocks.  This includes inline inode
1667  * storage and unindexed ocfs2_xattr_blocks.
1668  */
1669 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1670         .xlo_journal_access     = ocfs2_xa_block_journal_access,
1671         .xlo_journal_dirty      = ocfs2_xa_block_journal_dirty,
1672         .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1673         .xlo_check_space        = ocfs2_xa_block_check_space,
1674         .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1675         .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1676         .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1677         .xlo_add_entry          = ocfs2_xa_block_add_entry,
1678         .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1679         .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1680 };
1681
1682 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1683                                           struct ocfs2_xa_loc *loc, int type)
1684 {
1685         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1686
1687         return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1688 }
1689
1690 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1691                                           struct ocfs2_xa_loc *loc)
1692 {
1693         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1694
1695         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1696 }
1697
1698 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1699                                             int offset)
1700 {
1701         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1702         int block, block_offset;
1703
1704         /* The header is at the front of the bucket */
1705         block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1706         block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1707
1708         return bucket_block(bucket, block) + block_offset;
1709 }
1710
1711 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1712                                      struct ocfs2_xattr_info *xi)
1713 {
1714         return namevalue_size_xe(loc->xl_entry) >=
1715                 namevalue_size_xi(xi);
1716 }
1717
1718 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1719 {
1720         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1721         return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1722 }
1723
1724 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1725                                          int free_start, int size)
1726 {
1727         /*
1728          * We need to make sure that the name+value pair fits within
1729          * one block.
1730          */
1731         if (((free_start - size) >> sb->s_blocksize_bits) !=
1732             ((free_start - 1) >> sb->s_blocksize_bits))
1733                 free_start -= free_start % sb->s_blocksize;
1734
1735         return free_start;
1736 }
1737
1738 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1739                                        struct ocfs2_xattr_info *xi)
1740 {
1741         int rc;
1742         int count = le16_to_cpu(loc->xl_header->xh_count);
1743         int free_start = ocfs2_xa_get_free_start(loc);
1744         int needed_space = ocfs2_xi_entry_usage(xi);
1745         int size = namevalue_size_xi(xi);
1746         struct super_block *sb = loc->xl_inode->i_sb;
1747
1748         /*
1749          * Bucket storage does not reclaim name+value pairs it cannot
1750          * reuse.  They live as holes until the bucket fills, and then
1751          * the bucket is defragmented.  However, the bucket can reclaim
1752          * the ocfs2_xattr_entry.
1753          */
1754         if (loc->xl_entry) {
1755                 /* Don't need space if we're reusing! */
1756                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1757                         needed_space = 0;
1758                 else
1759                         needed_space -= sizeof(struct ocfs2_xattr_entry);
1760         }
1761         BUG_ON(needed_space < 0);
1762
1763         if (free_start < size) {
1764                 if (needed_space)
1765                         return -ENOSPC;
1766         } else {
1767                 /*
1768                  * First we check if it would fit in the first place.
1769                  * Below, we align the free start to a block.  This may
1770                  * slide us below the minimum gap.  By checking unaligned
1771                  * first, we avoid that error.
1772                  */
1773                 rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1774                                                  count);
1775                 if (rc)
1776                         return rc;
1777                 free_start = ocfs2_bucket_align_free_start(sb, free_start,
1778                                                            size);
1779         }
1780         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1781 }
1782
1783 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1784 {
1785         le16_add_cpu(&loc->xl_header->xh_name_value_len,
1786                      -namevalue_size_xe(loc->xl_entry));
1787 }
1788
1789 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1790 {
1791         struct ocfs2_xattr_header *xh = loc->xl_header;
1792         int count = le16_to_cpu(xh->xh_count);
1793         int low = 0, high = count - 1, tmp;
1794         struct ocfs2_xattr_entry *tmp_xe;
1795
1796         /*
1797          * We keep buckets sorted by name_hash, so we need to find
1798          * our insert place.
1799          */
1800         while (low <= high && count) {
1801                 tmp = (low + high) / 2;
1802                 tmp_xe = &xh->xh_entries[tmp];
1803
1804                 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1805                         low = tmp + 1;
1806                 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1807                         high = tmp - 1;
1808                 else {
1809                         low = tmp;
1810                         break;
1811                 }
1812         }
1813
1814         if (low != count)
1815                 memmove(&xh->xh_entries[low + 1],
1816                         &xh->xh_entries[low],
1817                         ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1818
1819         le16_add_cpu(&xh->xh_count, 1);
1820         loc->xl_entry = &xh->xh_entries[low];
1821         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1822 }
1823
1824 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1825 {
1826         int free_start = ocfs2_xa_get_free_start(loc);
1827         struct ocfs2_xattr_header *xh = loc->xl_header;
1828         struct super_block *sb = loc->xl_inode->i_sb;
1829         int nameval_offset;
1830
1831         free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1832         nameval_offset = free_start - size;
1833         loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1834         xh->xh_free_start = cpu_to_le16(nameval_offset);
1835         le16_add_cpu(&xh->xh_name_value_len, size);
1836
1837 }
1838
1839 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1840                                            struct ocfs2_xattr_value_buf *vb)
1841 {
1842         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1843         struct super_block *sb = loc->xl_inode->i_sb;
1844         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1845         int size = namevalue_size_xe(loc->xl_entry);
1846         int block_offset = nameval_offset >> sb->s_blocksize_bits;
1847
1848         /* Values are not allowed to straddle block boundaries */
1849         BUG_ON(block_offset !=
1850                ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1851         /* We expect the bucket to be filled in */
1852         BUG_ON(!bucket->bu_bhs[block_offset]);
1853
1854         vb->vb_access = ocfs2_journal_access;
1855         vb->vb_bh = bucket->bu_bhs[block_offset];
1856 }
1857
1858 /* Operations for xattrs stored in buckets. */
1859 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1860         .xlo_journal_access     = ocfs2_xa_bucket_journal_access,
1861         .xlo_journal_dirty      = ocfs2_xa_bucket_journal_dirty,
1862         .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1863         .xlo_check_space        = ocfs2_xa_bucket_check_space,
1864         .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1865         .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1866         .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1867         .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1868         .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1869         .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1870 };
1871
1872 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1873                                    struct ocfs2_xattr_set_ctxt *ctxt)
1874 {
1875         int trunc_rc, access_rc;
1876         struct ocfs2_xattr_value_buf vb;
1877
1878         ocfs2_xa_fill_value_buf(loc, &vb);
1879         trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1880                                               ctxt);
1881
1882         /*
1883          * The caller of ocfs2_xa_value_truncate() has already called
1884          * ocfs2_xa_journal_access on the loc.  However, The truncate code
1885          * calls ocfs2_extend_trans().  This may commit the previous
1886          * transaction and open a new one.  If this is a bucket, truncate
1887          * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1888          * the caller is expecting to dirty the entire bucket.  So we must
1889          * reset the journal work.  We do this even if truncate has failed,
1890          * as it could have failed after committing the extend.
1891          */
1892         access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1893                                             OCFS2_JOURNAL_ACCESS_WRITE);
1894
1895         /* Errors in truncate take precedence */
1896         return trunc_rc ? trunc_rc : access_rc;
1897 }
1898
1899 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1900 {
1901         int index, count;
1902         struct ocfs2_xattr_header *xh = loc->xl_header;
1903         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1904
1905         ocfs2_xa_wipe_namevalue(loc);
1906         loc->xl_entry = NULL;
1907
1908         le16_add_cpu(&xh->xh_count, -1);
1909         count = le16_to_cpu(xh->xh_count);
1910
1911         /*
1912          * Only zero out the entry if there are more remaining.  This is
1913          * important for an empty bucket, as it keeps track of the
1914          * bucket's hash value.  It doesn't hurt empty block storage.
1915          */
1916         if (count) {
1917                 index = ((char *)entry - (char *)&xh->xh_entries) /
1918                         sizeof(struct ocfs2_xattr_entry);
1919                 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1920                         (count - index) * sizeof(struct ocfs2_xattr_entry));
1921                 memset(&xh->xh_entries[count], 0,
1922                        sizeof(struct ocfs2_xattr_entry));
1923         }
1924 }
1925
1926 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
1927                            struct ocfs2_xattr_set_ctxt *ctxt)
1928 {
1929         int rc = 0;
1930
1931         if (!ocfs2_xattr_is_local(loc->xl_entry)) {
1932                 rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
1933                 if (rc) {
1934                         mlog_errno(rc);
1935                         goto out;
1936                 }
1937         }
1938
1939         ocfs2_xa_remove_entry(loc);
1940
1941 out:
1942         return rc;
1943 }
1944
1945 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
1946 {
1947         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1948         char *nameval_buf;
1949
1950         nameval_buf = ocfs2_xa_offset_pointer(loc,
1951                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
1952         memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
1953 }
1954
1955 /*
1956  * Take an existing entry and make it ready for the new value.  This
1957  * won't allocate space, but it may free space.  It should be ready for
1958  * ocfs2_xa_prepare_entry() to finish the work.
1959  */
1960 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
1961                                 struct ocfs2_xattr_info *xi,
1962                                 struct ocfs2_xattr_set_ctxt *ctxt)
1963 {
1964         int rc = 0;
1965         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
1966         char *nameval_buf;
1967         int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
1968         int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
1969
1970         BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
1971                name_size);
1972
1973         nameval_buf = ocfs2_xa_offset_pointer(loc,
1974                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
1975         if (xe_local) {
1976                 memset(nameval_buf + name_size, 0,
1977                        namevalue_size_xe(loc->xl_entry) - name_size);
1978                 if (!xi_local)
1979                         ocfs2_xa_install_value_root(loc);
1980         } else {
1981                 if (xi_local) {
1982                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
1983                         if (rc < 0) {
1984                                 mlog_errno(rc);
1985                                 goto out;
1986                         }
1987                         memset(nameval_buf + name_size, 0,
1988                                namevalue_size_xe(loc->xl_entry) -
1989                                name_size);
1990                 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
1991                            xi->xi_value_len) {
1992                         rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
1993                                                      ctxt);
1994                         if (rc < 0) {
1995                                 mlog_errno(rc);
1996                                 goto out;
1997                         }
1998                 }
1999         }
2000
2001         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2002         ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2003
2004 out:
2005         return rc;
2006 }
2007
2008 /*
2009  * Prepares loc->xl_entry to receive the new xattr.  This includes
2010  * properly setting up the name+value pair region.  If loc->xl_entry
2011  * already exists, it will take care of modifying it appropriately.
2012  *
2013  * Note that this modifies the data.  You did journal_access already,
2014  * right?
2015  */
2016 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2017                                   struct ocfs2_xattr_info *xi,
2018                                   u32 name_hash,
2019                                   struct ocfs2_xattr_set_ctxt *ctxt)
2020 {
2021         int rc = 0;
2022
2023         rc = ocfs2_xa_check_space(loc, xi);
2024         if (rc)
2025                 goto out;
2026
2027         if (loc->xl_entry) {
2028                 if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2029                         rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2030                         if (rc)
2031                                 goto out;
2032                         goto alloc_value;
2033                 }
2034
2035                 if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2036                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2037                         if (rc) {
2038                                 mlog_errno(rc);
2039                                 goto out;
2040                         }
2041                 }
2042                 ocfs2_xa_wipe_namevalue(loc);
2043         } else
2044                 ocfs2_xa_add_entry(loc, name_hash);
2045
2046         /*
2047          * If we get here, we have a blank entry.  Fill it.  We grow our
2048          * name+value pair back from the end.
2049          */
2050         ocfs2_xa_add_namevalue(loc, xi);
2051         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2052                 ocfs2_xa_install_value_root(loc);
2053
2054 alloc_value:
2055         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2056                 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2057                 if (rc < 0)
2058                         mlog_errno(rc);
2059         }
2060
2061 out:
2062         return rc;
2063 }
2064
2065 /*
2066  * Store the value portion of the name+value pair.  This will skip
2067  * values that are stored externally.  Their tree roots were set up
2068  * by ocfs2_xa_prepare_entry().
2069  */
2070 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2071                                 struct ocfs2_xattr_info *xi,
2072                                 struct ocfs2_xattr_set_ctxt *ctxt)
2073 {
2074         int rc = 0;
2075         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2076         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2077         char *nameval_buf;
2078         struct ocfs2_xattr_value_buf vb;
2079
2080         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2081         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2082                 ocfs2_xa_fill_value_buf(loc, &vb);
2083                 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2084                                                      ctxt->handle, &vb,
2085                                                      xi->xi_value,
2086                                                      xi->xi_value_len);
2087         } else
2088                 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2089
2090         return rc;
2091 }
2092
2093 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2094                         struct ocfs2_xattr_info *xi,
2095                         struct ocfs2_xattr_set_ctxt *ctxt)
2096 {
2097         int ret;
2098         u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2099                                               xi->xi_name_len);
2100
2101         ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2102                                       OCFS2_JOURNAL_ACCESS_WRITE);
2103         if (ret) {
2104                 mlog_errno(ret);
2105                 goto out;
2106         }
2107
2108         /* Don't worry, we are never called with !xi_value and !xl_entry */
2109         if (!xi->xi_value) {
2110                 ret = ocfs2_xa_remove(loc, ctxt);
2111                 goto out;
2112         }
2113
2114         ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2115         if (ret) {
2116                 if (ret != -ENOSPC)
2117                         mlog_errno(ret);
2118                 goto out;
2119         }
2120
2121         ret = ocfs2_xa_store_value(loc, xi, ctxt);
2122         if (ret) {
2123                 mlog_errno(ret);
2124                 goto out;
2125         }
2126
2127         ocfs2_xa_journal_dirty(ctxt->handle, loc);
2128
2129 out:
2130         return ret;
2131 }
2132
2133 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2134                                      struct inode *inode,
2135                                      struct buffer_head *bh,
2136                                      struct ocfs2_xattr_entry *entry)
2137 {
2138         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2139
2140         loc->xl_inode = inode;
2141         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2142         loc->xl_storage = bh;
2143         loc->xl_entry = entry;
2144
2145         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2146                 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2147         else {
2148                 BUG_ON(entry);
2149                 loc->xl_size = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2150         }
2151         loc->xl_header =
2152                 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2153                                               loc->xl_size);
2154 }
2155
2156 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2157                                           struct inode *inode,
2158                                           struct buffer_head *bh,
2159                                           struct ocfs2_xattr_entry *entry)
2160 {
2161         struct ocfs2_xattr_block *xb =
2162                 (struct ocfs2_xattr_block *)bh->b_data;
2163
2164         BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2165
2166         loc->xl_inode = inode;
2167         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2168         loc->xl_storage = bh;
2169         loc->xl_header = &(xb->xb_attrs.xb_header);
2170         loc->xl_entry = entry;
2171         loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2172                                              xb_attrs.xb_header);
2173 }
2174
2175 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2176                                            struct ocfs2_xattr_bucket *bucket,
2177                                            struct ocfs2_xattr_entry *entry)
2178 {
2179         loc->xl_inode = bucket->bu_inode;
2180         loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2181         loc->xl_storage = bucket;
2182         loc->xl_header = bucket_xh(bucket);
2183         loc->xl_entry = entry;
2184         loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2185 }
2186
2187
2188 /*
2189  * ocfs2_xattr_set_entry()
2190  *
2191  * Set extended attribute entry into inode or block.
2192  *
2193  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
2194  * We first insert tree root(ocfs2_xattr_value_root) like a normal value,
2195  * then set value in B tree with set_value_outside().
2196  */
2197 static int ocfs2_xattr_set_entry(struct inode *inode,
2198                                  struct ocfs2_xattr_info *xi,
2199                                  struct ocfs2_xattr_search *xs,
2200                                  struct ocfs2_xattr_set_ctxt *ctxt,
2201                                  int flag)
2202 {
2203         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2204         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2205         handle_t *handle = ctxt->handle;
2206         int ret;
2207         struct ocfs2_xa_loc loc;
2208
2209         if (!(flag & OCFS2_INLINE_XATTR_FL))
2210                 BUG_ON(xs->xattr_bh == xs->inode_bh);
2211         else
2212                 BUG_ON(xs->xattr_bh != xs->inode_bh);
2213
2214         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
2215                                       OCFS2_JOURNAL_ACCESS_WRITE);
2216         if (ret) {
2217                 mlog_errno(ret);
2218                 goto out;
2219         }
2220
2221         if (xs->xattr_bh == xs->inode_bh)
2222                 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2223                                          xs->not_found ? NULL : xs->here);
2224         else
2225                 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2226                                               xs->not_found ? NULL : xs->here);
2227
2228         ret = ocfs2_xa_set(&loc, xi, ctxt);
2229         if (ret) {
2230                 if (ret != -ENOSPC)
2231                         mlog_errno(ret);
2232                 goto out;
2233         }
2234         xs->here = loc.xl_entry;
2235
2236         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
2237             (flag & OCFS2_INLINE_XATTR_FL)) {
2238                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2239                 unsigned int xattrsize = osb->s_xattr_inline_size;
2240
2241                 /*
2242                  * Adjust extent record count or inline data size
2243                  * to reserve space for extended attribute.
2244                  */
2245                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2246                         struct ocfs2_inline_data *idata = &di->id2.i_data;
2247                         le16_add_cpu(&idata->id_count, -xattrsize);
2248                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2249                         struct ocfs2_extent_list *el = &di->id2.i_list;
2250                         le16_add_cpu(&el->l_count, -(xattrsize /
2251                                         sizeof(struct ocfs2_extent_rec)));
2252                 }
2253                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2254         }
2255         /* Update xattr flag */
2256         spin_lock(&oi->ip_lock);
2257         oi->ip_dyn_features |= flag;
2258         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2259         spin_unlock(&oi->ip_lock);
2260
2261         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
2262         if (ret < 0)
2263                 mlog_errno(ret);
2264
2265 out:
2266         return ret;
2267 }
2268
2269 /*
2270  * In xattr remove, if it is stored outside and refcounted, we may have
2271  * the chance to split the refcount tree. So need the allocators.
2272  */
2273 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2274                                         struct ocfs2_xattr_value_root *xv,
2275                                         struct ocfs2_caching_info *ref_ci,
2276                                         struct buffer_head *ref_root_bh,
2277                                         struct ocfs2_alloc_context **meta_ac,
2278                                         int *ref_credits)
2279 {
2280         int ret, meta_add = 0;
2281         u32 p_cluster, num_clusters;
2282         unsigned int ext_flags;
2283
2284         *ref_credits = 0;
2285         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2286                                        &num_clusters,
2287                                        &xv->xr_list,
2288                                        &ext_flags);
2289         if (ret) {
2290                 mlog_errno(ret);
2291                 goto out;
2292         }
2293
2294         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2295                 goto out;
2296
2297         ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2298                                                  ref_root_bh, xv,
2299                                                  &meta_add, ref_credits);
2300         if (ret) {
2301                 mlog_errno(ret);
2302                 goto out;
2303         }
2304
2305         ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2306                                                 meta_add, meta_ac);
2307         if (ret)
2308                 mlog_errno(ret);
2309
2310 out:
2311         return ret;
2312 }
2313
2314 static int ocfs2_remove_value_outside(struct inode*inode,
2315                                       struct ocfs2_xattr_value_buf *vb,
2316                                       struct ocfs2_xattr_header *header,
2317                                       struct ocfs2_caching_info *ref_ci,
2318                                       struct buffer_head *ref_root_bh)
2319 {
2320         int ret = 0, i, ref_credits;
2321         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2322         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2323         void *val;
2324
2325         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2326
2327         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2328                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2329
2330                 if (ocfs2_xattr_is_local(entry))
2331                         continue;
2332
2333                 val = (void *)header +
2334                         le16_to_cpu(entry->xe_name_offset);
2335                 vb->vb_xv = (struct ocfs2_xattr_value_root *)
2336                         (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2337
2338                 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2339                                                          ref_ci, ref_root_bh,
2340                                                          &ctxt.meta_ac,
2341                                                          &ref_credits);
2342
2343                 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2344                                         ocfs2_remove_extent_credits(osb->sb));
2345                 if (IS_ERR(ctxt.handle)) {
2346                         ret = PTR_ERR(ctxt.handle);
2347                         mlog_errno(ret);
2348                         break;
2349                 }
2350
2351                 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2352                 if (ret < 0) {
2353                         mlog_errno(ret);
2354                         break;
2355                 }
2356
2357                 ocfs2_commit_trans(osb, ctxt.handle);
2358                 if (ctxt.meta_ac) {
2359                         ocfs2_free_alloc_context(ctxt.meta_ac);
2360                         ctxt.meta_ac = NULL;
2361                 }
2362         }
2363
2364         if (ctxt.meta_ac)
2365                 ocfs2_free_alloc_context(ctxt.meta_ac);
2366         ocfs2_schedule_truncate_log_flush(osb, 1);
2367         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2368         return ret;
2369 }
2370
2371 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2372                                     struct buffer_head *di_bh,
2373                                     struct ocfs2_caching_info *ref_ci,
2374                                     struct buffer_head *ref_root_bh)
2375 {
2376
2377         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2378         struct ocfs2_xattr_header *header;
2379         int ret;
2380         struct ocfs2_xattr_value_buf vb = {
2381                 .vb_bh = di_bh,
2382                 .vb_access = ocfs2_journal_access_di,
2383         };
2384
2385         header = (struct ocfs2_xattr_header *)
2386                  ((void *)di + inode->i_sb->s_blocksize -
2387                  le16_to_cpu(di->i_xattr_inline_size));
2388
2389         ret = ocfs2_remove_value_outside(inode, &vb, header,
2390                                          ref_ci, ref_root_bh);
2391
2392         return ret;
2393 }
2394
2395 struct ocfs2_rm_xattr_bucket_para {
2396         struct ocfs2_caching_info *ref_ci;
2397         struct buffer_head *ref_root_bh;
2398 };
2399
2400 static int ocfs2_xattr_block_remove(struct inode *inode,
2401                                     struct buffer_head *blk_bh,
2402                                     struct ocfs2_caching_info *ref_ci,
2403                                     struct buffer_head *ref_root_bh)
2404 {
2405         struct ocfs2_xattr_block *xb;
2406         int ret = 0;
2407         struct ocfs2_xattr_value_buf vb = {
2408                 .vb_bh = blk_bh,
2409                 .vb_access = ocfs2_journal_access_xb,
2410         };
2411         struct ocfs2_rm_xattr_bucket_para args = {
2412                 .ref_ci = ref_ci,
2413                 .ref_root_bh = ref_root_bh,
2414         };
2415
2416         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2417         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2418                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2419                 ret = ocfs2_remove_value_outside(inode, &vb, header,
2420                                                  ref_ci, ref_root_bh);
2421         } else
2422                 ret = ocfs2_iterate_xattr_index_block(inode,
2423                                                 blk_bh,
2424                                                 ocfs2_rm_xattr_cluster,
2425                                                 &args);
2426
2427         return ret;
2428 }
2429
2430 static int ocfs2_xattr_free_block(struct inode *inode,
2431                                   u64 block,
2432                                   struct ocfs2_caching_info *ref_ci,
2433                                   struct buffer_head *ref_root_bh)
2434 {
2435         struct inode *xb_alloc_inode;
2436         struct buffer_head *xb_alloc_bh = NULL;
2437         struct buffer_head *blk_bh = NULL;
2438         struct ocfs2_xattr_block *xb;
2439         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2440         handle_t *handle;
2441         int ret = 0;
2442         u64 blk, bg_blkno;
2443         u16 bit;
2444
2445         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2446         if (ret < 0) {
2447                 mlog_errno(ret);
2448                 goto out;
2449         }
2450
2451         ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2452         if (ret < 0) {
2453                 mlog_errno(ret);
2454                 goto out;
2455         }
2456
2457         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2458         blk = le64_to_cpu(xb->xb_blkno);
2459         bit = le16_to_cpu(xb->xb_suballoc_bit);
2460         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2461
2462         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2463                                 EXTENT_ALLOC_SYSTEM_INODE,
2464                                 le16_to_cpu(xb->xb_suballoc_slot));
2465         if (!xb_alloc_inode) {
2466                 ret = -ENOMEM;
2467                 mlog_errno(ret);
2468                 goto out;
2469         }
2470         mutex_lock(&xb_alloc_inode->i_mutex);
2471
2472         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2473         if (ret < 0) {
2474                 mlog_errno(ret);
2475                 goto out_mutex;
2476         }
2477
2478         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2479         if (IS_ERR(handle)) {
2480                 ret = PTR_ERR(handle);
2481                 mlog_errno(ret);
2482                 goto out_unlock;
2483         }
2484
2485         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2486                                        bit, bg_blkno, 1);
2487         if (ret < 0)
2488                 mlog_errno(ret);
2489
2490         ocfs2_commit_trans(osb, handle);
2491 out_unlock:
2492         ocfs2_inode_unlock(xb_alloc_inode, 1);
2493         brelse(xb_alloc_bh);
2494 out_mutex:
2495         mutex_unlock(&xb_alloc_inode->i_mutex);
2496         iput(xb_alloc_inode);
2497 out:
2498         brelse(blk_bh);
2499         return ret;
2500 }
2501
2502 /*
2503  * ocfs2_xattr_remove()
2504  *
2505  * Free extended attribute resources associated with this inode.
2506  */
2507 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2508 {
2509         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2510         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2511         struct ocfs2_refcount_tree *ref_tree = NULL;
2512         struct buffer_head *ref_root_bh = NULL;
2513         struct ocfs2_caching_info *ref_ci = NULL;
2514         handle_t *handle;
2515         int ret;
2516
2517         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2518                 return 0;
2519
2520         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2521                 return 0;
2522
2523         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2524                 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2525                                                le64_to_cpu(di->i_refcount_loc),
2526                                                1, &ref_tree, &ref_root_bh);
2527                 if (ret) {
2528                         mlog_errno(ret);
2529                         goto out;
2530                 }
2531                 ref_ci = &ref_tree->rf_ci;
2532
2533         }
2534
2535         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2536                 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2537                                                ref_ci, ref_root_bh);
2538                 if (ret < 0) {
2539                         mlog_errno(ret);
2540                         goto out;
2541                 }
2542         }
2543
2544         if (di->i_xattr_loc) {
2545                 ret = ocfs2_xattr_free_block(inode,
2546                                              le64_to_cpu(di->i_xattr_loc),
2547                                              ref_ci, ref_root_bh);
2548                 if (ret < 0) {
2549                         mlog_errno(ret);
2550                         goto out;
2551                 }
2552         }
2553
2554         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2555                                    OCFS2_INODE_UPDATE_CREDITS);
2556         if (IS_ERR(handle)) {
2557                 ret = PTR_ERR(handle);
2558                 mlog_errno(ret);
2559                 goto out;
2560         }
2561         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2562                                       OCFS2_JOURNAL_ACCESS_WRITE);
2563         if (ret) {
2564                 mlog_errno(ret);
2565                 goto out_commit;
2566         }
2567
2568         di->i_xattr_loc = 0;
2569
2570         spin_lock(&oi->ip_lock);
2571         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2572         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2573         spin_unlock(&oi->ip_lock);
2574
2575         ret = ocfs2_journal_dirty(handle, di_bh);
2576         if (ret < 0)
2577                 mlog_errno(ret);
2578 out_commit:
2579         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2580 out:
2581         if (ref_tree)
2582                 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2583         brelse(ref_root_bh);
2584         return ret;
2585 }
2586
2587 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2588                                         struct ocfs2_dinode *di)
2589 {
2590         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2591         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2592         int free;
2593
2594         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2595                 return 0;
2596
2597         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2598                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2599                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2600         } else if (ocfs2_inode_is_fast_symlink(inode)) {
2601                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2602                         le64_to_cpu(di->i_size);
2603         } else {
2604                 struct ocfs2_extent_list *el = &di->id2.i_list;
2605                 free = (le16_to_cpu(el->l_count) -
2606                         le16_to_cpu(el->l_next_free_rec)) *
2607                         sizeof(struct ocfs2_extent_rec);
2608         }
2609         if (free >= xattrsize)
2610                 return 1;
2611
2612         return 0;
2613 }
2614
2615 /*
2616  * ocfs2_xattr_ibody_find()
2617  *
2618  * Find extended attribute in inode block and
2619  * fill search info into struct ocfs2_xattr_search.
2620  */
2621 static int ocfs2_xattr_ibody_find(struct inode *inode,
2622                                   int name_index,
2623                                   const char *name,
2624                                   struct ocfs2_xattr_search *xs)
2625 {
2626         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2627         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2628         int ret;
2629         int has_space = 0;
2630
2631         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2632                 return 0;
2633
2634         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2635                 down_read(&oi->ip_alloc_sem);
2636                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2637                 up_read(&oi->ip_alloc_sem);
2638                 if (!has_space)
2639                         return 0;
2640         }
2641
2642         xs->xattr_bh = xs->inode_bh;
2643         xs->end = (void *)di + inode->i_sb->s_blocksize;
2644         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2645                 xs->header = (struct ocfs2_xattr_header *)
2646                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2647         else
2648                 xs->header = (struct ocfs2_xattr_header *)
2649                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2650         xs->base = (void *)xs->header;
2651         xs->here = xs->header->xh_entries;
2652
2653         /* Find the named attribute. */
2654         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2655                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2656                 if (ret && ret != -ENODATA)
2657                         return ret;
2658                 xs->not_found = ret;
2659         }
2660
2661         return 0;
2662 }
2663
2664 /*
2665  * ocfs2_xattr_ibody_set()
2666  *
2667  * Set, replace or remove an extended attribute into inode block.
2668  *
2669  */
2670 static int ocfs2_xattr_ibody_set(struct inode *inode,
2671                                  struct ocfs2_xattr_info *xi,
2672                                  struct ocfs2_xattr_search *xs,
2673                                  struct ocfs2_xattr_set_ctxt *ctxt)
2674 {
2675         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2676         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2677         int ret;
2678
2679         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2680                 return -ENOSPC;
2681
2682         down_write(&oi->ip_alloc_sem);
2683         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2684                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2685                         ret = -ENOSPC;
2686                         goto out;
2687                 }
2688         }
2689
2690         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2691                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2692 out:
2693         up_write(&oi->ip_alloc_sem);
2694
2695         return ret;
2696 }
2697
2698 /*
2699  * ocfs2_xattr_block_find()
2700  *
2701  * Find extended attribute in external block and
2702  * fill search info into struct ocfs2_xattr_search.
2703  */
2704 static int ocfs2_xattr_block_find(struct inode *inode,
2705                                   int name_index,
2706                                   const char *name,
2707                                   struct ocfs2_xattr_search *xs)
2708 {
2709         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2710         struct buffer_head *blk_bh = NULL;
2711         struct ocfs2_xattr_block *xb;
2712         int ret = 0;
2713
2714         if (!di->i_xattr_loc)
2715                 return ret;
2716
2717         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2718                                      &blk_bh);
2719         if (ret < 0) {
2720                 mlog_errno(ret);
2721                 return ret;
2722         }
2723
2724         xs->xattr_bh = blk_bh;
2725         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2726
2727         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2728                 xs->header = &xb->xb_attrs.xb_header;
2729                 xs->base = (void *)xs->header;
2730                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2731                 xs->here = xs->header->xh_entries;
2732
2733                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2734         } else
2735                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2736                                                    name_index,
2737                                                    name, xs);
2738
2739         if (ret && ret != -ENODATA) {
2740                 xs->xattr_bh = NULL;
2741                 goto cleanup;
2742         }
2743         xs->not_found = ret;
2744         return 0;
2745 cleanup:
2746         brelse(blk_bh);
2747
2748         return ret;
2749 }
2750
2751 static int ocfs2_create_xattr_block(handle_t *handle,
2752                                     struct inode *inode,
2753                                     struct buffer_head *inode_bh,
2754                                     struct ocfs2_alloc_context *meta_ac,
2755                                     struct buffer_head **ret_bh,
2756                                     int indexed)
2757 {
2758         int ret;
2759         u16 suballoc_bit_start;
2760         u32 num_got;
2761         u64 first_blkno;
2762         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2763         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2764         struct buffer_head *new_bh = NULL;
2765         struct ocfs2_xattr_block *xblk;
2766
2767         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2768                                       OCFS2_JOURNAL_ACCESS_CREATE);
2769         if (ret < 0) {
2770                 mlog_errno(ret);
2771                 goto end;
2772         }
2773
2774         ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2775                                    &suballoc_bit_start, &num_got,
2776                                    &first_blkno);
2777         if (ret < 0) {
2778                 mlog_errno(ret);
2779                 goto end;
2780         }
2781
2782         new_bh = sb_getblk(inode->i_sb, first_blkno);
2783         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2784
2785         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2786                                       new_bh,
2787                                       OCFS2_JOURNAL_ACCESS_CREATE);
2788         if (ret < 0) {
2789                 mlog_errno(ret);
2790                 goto end;
2791         }
2792
2793         /* Initialize ocfs2_xattr_block */
2794         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2795         memset(xblk, 0, inode->i_sb->s_blocksize);
2796         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2797         xblk->xb_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2798         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2799         xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2800         xblk->xb_blkno = cpu_to_le64(first_blkno);
2801
2802         if (indexed) {
2803                 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2804                 xr->xt_clusters = cpu_to_le32(1);
2805                 xr->xt_last_eb_blk = 0;
2806                 xr->xt_list.l_tree_depth = 0;
2807                 xr->xt_list.l_count = cpu_to_le16(
2808                                         ocfs2_xattr_recs_per_xb(inode->i_sb));
2809                 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2810                 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2811         }
2812
2813         ret = ocfs2_journal_dirty(handle, new_bh);
2814         if (ret < 0) {
2815                 mlog_errno(ret);
2816                 goto end;
2817         }
2818         di->i_xattr_loc = cpu_to_le64(first_blkno);
2819         ocfs2_journal_dirty(handle, inode_bh);
2820
2821         *ret_bh = new_bh;
2822         new_bh = NULL;
2823
2824 end:
2825         brelse(new_bh);
2826         return ret;
2827 }
2828
2829 /*
2830  * ocfs2_xattr_block_set()
2831  *
2832  * Set, replace or remove an extended attribute into external block.
2833  *
2834  */
2835 static int ocfs2_xattr_block_set(struct inode *inode,
2836                                  struct ocfs2_xattr_info *xi,
2837                                  struct ocfs2_xattr_search *xs,
2838                                  struct ocfs2_xattr_set_ctxt *ctxt)
2839 {
2840         struct buffer_head *new_bh = NULL;
2841         handle_t *handle = ctxt->handle;
2842         struct ocfs2_xattr_block *xblk = NULL;
2843         int ret;
2844
2845         if (!xs->xattr_bh) {
2846                 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2847                                                ctxt->meta_ac, &new_bh, 0);
2848                 if (ret) {
2849                         mlog_errno(ret);
2850                         goto end;
2851                 }
2852
2853                 xs->xattr_bh = new_bh;
2854                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2855                 xs->header = &xblk->xb_attrs.xb_header;
2856                 xs->base = (void *)xs->header;
2857                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2858                 xs->here = xs->header->xh_entries;
2859         } else
2860                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2861
2862         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2863                 /* Set extended attribute into external block */
2864                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2865                                             OCFS2_HAS_XATTR_FL);
2866                 if (!ret || ret != -ENOSPC)
2867                         goto end;
2868
2869                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2870                 if (ret)
2871                         goto end;
2872         }
2873
2874         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2875
2876 end:
2877
2878         return ret;
2879 }
2880
2881 /* Check whether the new xattr can be inserted into the inode. */
2882 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2883                                        struct ocfs2_xattr_info *xi,
2884                                        struct ocfs2_xattr_search *xs)
2885 {
2886         struct ocfs2_xattr_entry *last;
2887         int free, i;
2888         size_t min_offs = xs->end - xs->base;
2889
2890         if (!xs->header)
2891                 return 0;
2892
2893         last = xs->header->xh_entries;
2894
2895         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2896                 size_t offs = le16_to_cpu(last->xe_name_offset);
2897                 if (offs < min_offs)
2898                         min_offs = offs;
2899                 last += 1;
2900         }
2901
2902         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2903         if (free < 0)
2904                 return 0;
2905
2906         BUG_ON(!xs->not_found);
2907
2908         if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
2909                 return 1;
2910
2911         return 0;
2912 }
2913
2914 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2915                                      struct ocfs2_dinode *di,
2916                                      struct ocfs2_xattr_info *xi,
2917                                      struct ocfs2_xattr_search *xis,
2918                                      struct ocfs2_xattr_search *xbs,
2919                                      int *clusters_need,
2920                                      int *meta_need,
2921                                      int *credits_need)
2922 {
2923         int ret = 0, old_in_xb = 0;
2924         int clusters_add = 0, meta_add = 0, credits = 0;
2925         struct buffer_head *bh = NULL;
2926         struct ocfs2_xattr_block *xb = NULL;
2927         struct ocfs2_xattr_entry *xe = NULL;
2928         struct ocfs2_xattr_value_root *xv = NULL;
2929         char *base = NULL;
2930         int name_offset, name_len = 0;
2931         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2932                                                     xi->xi_value_len);
2933         u64 value_size;
2934
2935         /*
2936          * Calculate the clusters we need to write.
2937          * No matter whether we replace an old one or add a new one,
2938          * we need this for writing.
2939          */
2940         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2941                 credits += new_clusters *
2942                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2943
2944         if (xis->not_found && xbs->not_found) {
2945                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2946
2947                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2948                         clusters_add += new_clusters;
2949                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2950                                                         &def_xv.xv.xr_list,
2951                                                         new_clusters);
2952                 }
2953
2954                 goto meta_guess;
2955         }
2956
2957         if (!xis->not_found) {
2958                 xe = xis->here;
2959                 name_offset = le16_to_cpu(xe->xe_name_offset);
2960                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2961                 base = xis->base;
2962                 credits += OCFS2_INODE_UPDATE_CREDITS;
2963         } else {
2964                 int i, block_off = 0;
2965                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2966                 xe = xbs->here;
2967                 name_offset = le16_to_cpu(xe->xe_name_offset);
2968                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2969                 i = xbs->here - xbs->header->xh_entries;
2970                 old_in_xb = 1;
2971
2972                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2973                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2974                                                         bucket_xh(xbs->bucket),
2975                                                         i, &block_off,
2976                                                         &name_offset);
2977                         base = bucket_block(xbs->bucket, block_off);
2978                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2979                 } else {
2980                         base = xbs->base;
2981                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2982                 }
2983         }
2984
2985         /*
2986          * delete a xattr doesn't need metadata and cluster allocation.
2987          * so just calculate the credits and return.
2988          *
2989          * The credits for removing the value tree will be extended
2990          * by ocfs2_remove_extent itself.
2991          */
2992         if (!xi->xi_value) {
2993                 if (!ocfs2_xattr_is_local(xe))
2994                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2995
2996                 goto out;
2997         }
2998
2999         /* do cluster allocation guess first. */
3000         value_size = le64_to_cpu(xe->xe_value_size);
3001
3002         if (old_in_xb) {
3003                 /*
3004                  * In xattr set, we always try to set the xe in inode first,
3005                  * so if it can be inserted into inode successfully, the old
3006                  * one will be removed from the xattr block, and this xattr
3007                  * will be inserted into inode as a new xattr in inode.
3008                  */
3009                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3010                         clusters_add += new_clusters;
3011                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
3012                                     OCFS2_INODE_UPDATE_CREDITS;
3013                         if (!ocfs2_xattr_is_local(xe))
3014                                 credits += ocfs2_calc_extend_credits(
3015                                                         inode->i_sb,
3016                                                         &def_xv.xv.xr_list,
3017                                                         new_clusters);
3018                         goto out;
3019                 }
3020         }
3021
3022         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3023                 /* the new values will be stored outside. */
3024                 u32 old_clusters = 0;
3025
3026                 if (!ocfs2_xattr_is_local(xe)) {
3027                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3028                                                                  value_size);
3029                         xv = (struct ocfs2_xattr_value_root *)
3030                              (base + name_offset + name_len);
3031                         value_size = OCFS2_XATTR_ROOT_SIZE;
3032                 } else
3033                         xv = &def_xv.xv;
3034
3035                 if (old_clusters >= new_clusters) {
3036                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3037                         goto out;
3038                 } else {
3039                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3040                         clusters_add += new_clusters - old_clusters;
3041                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3042                                                              &xv->xr_list,
3043                                                              new_clusters -
3044                                                              old_clusters);
3045                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3046                                 goto out;
3047                 }
3048         } else {
3049                 /*
3050                  * Now the new value will be stored inside. So if the new
3051                  * value is smaller than the size of value root or the old
3052                  * value, we don't need any allocation, otherwise we have
3053                  * to guess metadata allocation.
3054                  */
3055                 if ((ocfs2_xattr_is_local(xe) &&
3056                      (value_size >= xi->xi_value_len)) ||
3057                     (!ocfs2_xattr_is_local(xe) &&
3058                      OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3059                         goto out;
3060         }
3061
3062 meta_guess:
3063         /* calculate metadata allocation. */
3064         if (di->i_xattr_loc) {
3065                 if (!xbs->xattr_bh) {
3066                         ret = ocfs2_read_xattr_block(inode,
3067                                                      le64_to_cpu(di->i_xattr_loc),
3068                                                      &bh);
3069                         if (ret) {
3070                                 mlog_errno(ret);
3071                                 goto out;
3072                         }
3073
3074                         xb = (struct ocfs2_xattr_block *)bh->b_data;
3075                 } else
3076                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3077
3078                 /*
3079                  * If there is already an xattr tree, good, we can calculate
3080                  * like other b-trees. Otherwise we may have the chance of
3081                  * create a tree, the credit calculation is borrowed from
3082                  * ocfs2_calc_extend_credits with root_el = NULL. And the
3083                  * new tree will be cluster based, so no meta is needed.
3084                  */
3085                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3086                         struct ocfs2_extent_list *el =
3087                                  &xb->xb_attrs.xb_root.xt_list;
3088                         meta_add += ocfs2_extend_meta_needed(el);
3089                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3090                                                              el, 1);
3091                 } else
3092                         credits += OCFS2_SUBALLOC_ALLOC + 1;
3093
3094                 /*
3095                  * This cluster will be used either for new bucket or for
3096                  * new xattr block.
3097                  * If the cluster size is the same as the bucket size, one
3098                  * more is needed since we may need to extend the bucket
3099                  * also.
3100                  */
3101                 clusters_add += 1;
3102                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3103                 if (OCFS2_XATTR_BUCKET_SIZE ==
3104                         OCFS2_SB(inode->i_sb)->s_clustersize) {
3105                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3106                         clusters_add += 1;
3107                 }
3108         } else {
3109                 meta_add += 1;
3110                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3111         }
3112 out:
3113         if (clusters_need)
3114                 *clusters_need = clusters_add;
3115         if (meta_need)
3116                 *meta_need = meta_add;
3117         if (credits_need)
3118                 *credits_need = credits;
3119         brelse(bh);
3120         return ret;
3121 }
3122
3123 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3124                                      struct ocfs2_dinode *di,
3125                                      struct ocfs2_xattr_info *xi,
3126                                      struct ocfs2_xattr_search *xis,
3127                                      struct ocfs2_xattr_search *xbs,
3128                                      struct ocfs2_xattr_set_ctxt *ctxt,
3129                                      int extra_meta,
3130                                      int *credits)
3131 {
3132         int clusters_add, meta_add, ret;
3133         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3134
3135         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3136
3137         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3138
3139         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3140                                         &clusters_add, &meta_add, credits);
3141         if (ret) {
3142                 mlog_errno(ret);
3143                 return ret;
3144         }
3145
3146         meta_add += extra_meta;
3147         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3148              "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3149
3150         if (meta_add) {
3151                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3152                                                         &ctxt->meta_ac);
3153                 if (ret) {
3154                         mlog_errno(ret);
3155                         goto out;
3156                 }
3157         }
3158
3159         if (clusters_add) {
3160                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3161                 if (ret)
3162                         mlog_errno(ret);
3163         }
3164 out:
3165         if (ret) {
3166                 if (ctxt->meta_ac) {
3167                         ocfs2_free_alloc_context(ctxt->meta_ac);
3168                         ctxt->meta_ac = NULL;
3169                 }
3170
3171                 /*
3172                  * We cannot have an error and a non null ctxt->data_ac.
3173                  */
3174         }
3175
3176         return ret;
3177 }
3178
3179 static int __ocfs2_xattr_set_handle(struct inode *inode,
3180                                     struct ocfs2_dinode *di,
3181                                     struct ocfs2_xattr_info *xi,
3182                                     struct ocfs2_xattr_search *xis,
3183                                     struct ocfs2_xattr_search *xbs,
3184                                     struct ocfs2_xattr_set_ctxt *ctxt)
3185 {
3186         int ret = 0, credits, old_found;
3187
3188         if (!xi->xi_value) {
3189                 /* Remove existing extended attribute */
3190                 if (!xis->not_found)
3191                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3192                 else if (!xbs->not_found)
3193                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3194         } else {
3195                 /* We always try to set extended attribute into inode first*/
3196                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3197                 if (!ret && !xbs->not_found) {
3198                         /*
3199                          * If succeed and that extended attribute existing in
3200                          * external block, then we will remove it.
3201                          */
3202                         xi->xi_value = NULL;
3203                         xi->xi_value_len = 0;
3204
3205                         old_found = xis->not_found;
3206                         xis->not_found = -ENODATA;
3207                         ret = ocfs2_calc_xattr_set_need(inode,
3208                                                         di,
3209                                                         xi,
3210                                                         xis,
3211                                                         xbs,
3212                                                         NULL,
3213                                                         NULL,
3214                                                         &credits);
3215                         xis->not_found = old_found;
3216                         if (ret) {
3217                                 mlog_errno(ret);
3218                                 goto out;
3219                         }
3220
3221                         ret = ocfs2_extend_trans(ctxt->handle, credits +
3222                                         ctxt->handle->h_buffer_credits);
3223                         if (ret) {
3224                                 mlog_errno(ret);
3225                                 goto out;
3226                         }
3227                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3228                 } else if (ret == -ENOSPC) {
3229                         if (di->i_xattr_loc && !xbs->xattr_bh) {
3230                                 ret = ocfs2_xattr_block_find(inode,
3231                                                              xi->xi_name_index,
3232                                                              xi->xi_name, xbs);
3233                                 if (ret)
3234                                         goto out;
3235
3236                                 old_found = xis->not_found;
3237                                 xis->not_found = -ENODATA;
3238                                 ret = ocfs2_calc_xattr_set_need(inode,
3239                                                                 di,
3240                                                                 xi,
3241                                                                 xis,
3242                                                                 xbs,
3243                                                                 NULL,
3244                                                                 NULL,
3245                                                                 &credits);
3246                                 xis->not_found = old_found;
3247                                 if (ret) {
3248                                         mlog_errno(ret);
3249                                         goto out;
3250                                 }
3251
3252                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3253                                         ctxt->handle->h_buffer_credits);
3254                                 if (ret) {
3255                                         mlog_errno(ret);
3256                                         goto out;
3257                                 }
3258                         }
3259                         /*
3260                          * If no space in inode, we will set extended attribute
3261                          * into external block.
3262                          */
3263                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3264                         if (ret)
3265                                 goto out;
3266                         if (!xis->not_found) {
3267                                 /*
3268                                  * If succeed and that extended attribute
3269                                  * existing in inode, we will remove it.
3270                                  */
3271                                 xi->xi_value = NULL;
3272                                 xi->xi_value_len = 0;
3273                                 xbs->not_found = -ENODATA;
3274                                 ret = ocfs2_calc_xattr_set_need(inode,
3275                                                                 di,
3276                                                                 xi,
3277                                                                 xis,
3278                                                                 xbs,
3279                                                                 NULL,
3280                                                                 NULL,
3281                                                                 &credits);
3282                                 if (ret) {
3283                                         mlog_errno(ret);
3284                                         goto out;
3285                                 }
3286
3287                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3288                                                 ctxt->handle->h_buffer_credits);
3289                                 if (ret) {
3290                                         mlog_errno(ret);
3291                                         goto out;
3292                                 }
3293                                 ret = ocfs2_xattr_ibody_set(inode, xi,
3294                                                             xis, ctxt);
3295                         }
3296                 }
3297         }
3298
3299         if (!ret) {
3300                 /* Update inode ctime. */
3301                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3302                                               xis->inode_bh,
3303                                               OCFS2_JOURNAL_ACCESS_WRITE);
3304                 if (ret) {
3305                         mlog_errno(ret);
3306                         goto out;
3307                 }
3308
3309                 inode->i_ctime = CURRENT_TIME;
3310                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3311                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3312                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3313         }
3314 out:
3315         return ret;
3316 }
3317
3318 /*
3319  * This function only called duing creating inode
3320  * for init security/acl xattrs of the new inode.
3321  * All transanction credits have been reserved in mknod.
3322  */
3323 int ocfs2_xattr_set_handle(handle_t *handle,
3324                            struct inode *inode,
3325                            struct buffer_head *di_bh,
3326                            int name_index,
3327                            const char *name,
3328                            const void *value,
3329                            size_t value_len,
3330                            int flags,
3331                            struct ocfs2_alloc_context *meta_ac,
3332                            struct ocfs2_alloc_context *data_ac)
3333 {
3334         struct ocfs2_dinode *di;
3335         int ret;
3336
3337         struct ocfs2_xattr_info xi = {
3338                 .xi_name_index = name_index,
3339                 .xi_name = name,
3340                 .xi_name_len = strlen(name),
3341                 .xi_value = value,
3342                 .xi_value_len = value_len,
3343         };
3344
3345         struct ocfs2_xattr_search xis = {
3346                 .not_found = -ENODATA,
3347         };
3348
3349         struct ocfs2_xattr_search xbs = {
3350                 .not_found = -ENODATA,
3351         };
3352
3353         struct ocfs2_xattr_set_ctxt ctxt = {
3354                 .handle = handle,
3355                 .meta_ac = meta_ac,
3356                 .data_ac = data_ac,
3357         };
3358
3359         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3360                 return -EOPNOTSUPP;
3361
3362         /*
3363          * In extreme situation, may need xattr bucket when
3364          * block size is too small. And we have already reserved
3365          * the credits for bucket in mknod.
3366          */
3367         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3368                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
3369                 if (!xbs.bucket) {
3370                         mlog_errno(-ENOMEM);
3371                         return -ENOMEM;
3372                 }
3373         }
3374
3375         xis.inode_bh = xbs.inode_bh = di_bh;
3376         di = (struct ocfs2_dinode *)di_bh->b_data;
3377
3378         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3379
3380         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3381         if (ret)
3382                 goto cleanup;
3383         if (xis.not_found) {
3384                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3385                 if (ret)
3386                         goto cleanup;
3387         }
3388
3389         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3390
3391 cleanup:
3392         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3393         brelse(xbs.xattr_bh);
3394         ocfs2_xattr_bucket_free(xbs.bucket);
3395
3396         return ret;
3397 }
3398
3399 /*
3400  * ocfs2_xattr_set()
3401  *
3402  * Set, replace or remove an extended attribute for this inode.
3403  * value is NULL to remove an existing extended attribute, else either
3404  * create or replace an extended attribute.
3405  */
3406 int ocfs2_xattr_set(struct inode *inode,
3407                     int name_index,
3408                     const char *name,
3409                     const void *value,
3410                     size_t value_len,
3411                     int flags)
3412 {
3413         struct buffer_head *di_bh = NULL;
3414         struct ocfs2_dinode *di;
3415         int ret, credits, ref_meta = 0, ref_credits = 0;
3416         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3417         struct inode *tl_inode = osb->osb_tl_inode;
3418         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3419         struct ocfs2_refcount_tree *ref_tree = NULL;
3420
3421         struct ocfs2_xattr_info xi = {
3422                 .xi_name_index = name_index,
3423                 .xi_name = name,
3424                 .xi_name_len = strlen(name),
3425                 .xi_value = value,
3426                 .xi_value_len = value_len,
3427         };
3428
3429         struct ocfs2_xattr_search xis = {
3430                 .not_found = -ENODATA,
3431         };
3432
3433         struct ocfs2_xattr_search xbs = {
3434                 .not_found = -ENODATA,
3435         };
3436
3437         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3438                 return -EOPNOTSUPP;
3439
3440         /*
3441          * Only xbs will be used on indexed trees.  xis doesn't need a
3442          * bucket.
3443          */
3444         xbs.bucket = ocfs2_xattr_bucket_new(inode);
3445         if (!xbs.bucket) {
3446                 mlog_errno(-ENOMEM);
3447                 return -ENOMEM;
3448         }
3449
3450         ret = ocfs2_inode_lock(inode, &di_bh, 1);
3451         if (ret < 0) {
3452                 mlog_errno(ret);
3453                 goto cleanup_nolock;
3454         }
3455         xis.inode_bh = xbs.inode_bh = di_bh;
3456         di = (struct ocfs2_dinode *)di_bh->b_data;
3457
3458         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3459         /*
3460          * Scan inode and external block to find the same name
3461          * extended attribute and collect search infomation.
3462          */
3463         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3464         if (ret)
3465                 goto cleanup;
3466         if (xis.not_found) {
3467                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3468                 if (ret)
3469                         goto cleanup;
3470         }
3471
3472         if (xis.not_found && xbs.not_found) {
3473                 ret = -ENODATA;
3474                 if (flags & XATTR_REPLACE)
3475                         goto cleanup;
3476                 ret = 0;
3477                 if (!value)
3478                         goto cleanup;
3479         } else {
3480                 ret = -EEXIST;
3481                 if (flags & XATTR_CREATE)
3482                         goto cleanup;
3483         }
3484
3485         /* Check whether the value is refcounted and do some prepartion. */
3486         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3487             (!xis.not_found || !xbs.not_found)) {
3488                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3489                                                    &xis, &xbs, &ref_tree,
3490                                                    &ref_meta, &ref_credits);
3491                 if (ret) {
3492                         mlog_errno(ret);
3493                         goto cleanup;
3494                 }
3495         }
3496
3497         mutex_lock(&tl_inode->i_mutex);
3498
3499         if (ocfs2_truncate_log_needs_flush(osb)) {
3500                 ret = __ocfs2_flush_truncate_log(osb);
3501                 if (ret < 0) {
3502                         mutex_unlock(&tl_inode->i_mutex);
3503                         mlog_errno(ret);
3504                         goto cleanup;
3505                 }
3506         }
3507         mutex_unlock(&tl_inode->i_mutex);
3508
3509         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3510                                         &xbs, &ctxt, ref_meta, &credits);
3511         if (ret) {
3512                 mlog_errno(ret);
3513                 goto cleanup;
3514         }
3515
3516         /* we need to update inode's ctime field, so add credit for it. */
3517         credits += OCFS2_INODE_UPDATE_CREDITS;
3518         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3519         if (IS_ERR(ctxt.handle)) {
3520                 ret = PTR_ERR(ctxt.handle);
3521                 mlog_errno(ret);
3522                 goto cleanup;
3523         }
3524
3525         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3526
3527         ocfs2_commit_trans(osb, ctxt.handle);
3528
3529         if (ctxt.data_ac)
3530                 ocfs2_free_alloc_context(ctxt.data_ac);
3531         if (ctxt.meta_ac)
3532                 ocfs2_free_alloc_context(ctxt.meta_ac);
3533         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3534                 ocfs2_schedule_truncate_log_flush(osb, 1);
3535         ocfs2_run_deallocs(osb, &ctxt.dealloc);
3536
3537 cleanup:
3538         if (ref_tree)
3539                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3540         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3541         if (!value && !ret) {
3542                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3543                 if (ret)
3544                         mlog_errno(ret);
3545         }
3546         ocfs2_inode_unlock(inode, 1);
3547 cleanup_nolock:
3548         brelse(di_bh);
3549         brelse(xbs.xattr_bh);
3550         ocfs2_xattr_bucket_free(xbs.bucket);
3551
3552         return ret;
3553 }
3554
3555 /*
3556  * Find the xattr extent rec which may contains name_hash.
3557  * e_cpos will be the first name hash of the xattr rec.
3558  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3559  */
3560 static int ocfs2_xattr_get_rec(struct inode *inode,
3561                                u32 name_hash,
3562                                u64 *p_blkno,
3563                                u32 *e_cpos,
3564                                u32 *num_clusters,
3565                                struct ocfs2_extent_list *el)
3566 {
3567         int ret = 0, i;
3568         struct buffer_head *eb_bh = NULL;
3569         struct ocfs2_extent_block *eb;
3570         struct ocfs2_extent_rec *rec = NULL;
3571         u64 e_blkno = 0;
3572
3573         if (el->l_tree_depth) {
3574                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3575                                       &eb_bh);
3576                 if (ret) {
3577                         mlog_errno(ret);
3578                         goto out;
3579                 }
3580
3581                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3582                 el = &eb->h_list;
3583
3584                 if (el->l_tree_depth) {
3585                         ocfs2_error(inode->i_sb,
3586                                     "Inode %lu has non zero tree depth in "
3587                                     "xattr tree block %llu\n", inode->i_ino,
3588                                     (unsigned long long)eb_bh->b_blocknr);
3589                         ret = -EROFS;
3590                         goto out;
3591                 }
3592         }
3593
3594         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3595                 rec = &el->l_recs[i];
3596
3597                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3598                         e_blkno = le64_to_cpu(rec->e_blkno);
3599                         break;
3600                 }
3601         }
3602
3603         if (!e_blkno) {
3604                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3605                             "record (%u, %u, 0) in xattr", inode->i_ino,
3606                             le32_to_cpu(rec->e_cpos),
3607                             ocfs2_rec_clusters(el, rec));
3608                 ret = -EROFS;
3609                 goto out;
3610         }
3611
3612         *p_blkno = le64_to_cpu(rec->e_blkno);
3613         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3614         if (e_cpos)
3615                 *e_cpos = le32_to_cpu(rec->e_cpos);
3616 out:
3617         brelse(eb_bh);
3618         return ret;
3619 }
3620
3621 typedef int (xattr_bucket_func)(struct inode *inode,
3622                                 struct ocfs2_xattr_bucket *bucket,
3623                                 void *para);
3624
3625 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3626                                    struct ocfs2_xattr_bucket *bucket,
3627                                    int name_index,
3628                                    const char *name,
3629                                    u32 name_hash,
3630                                    u16 *xe_index,
3631                                    int *found)
3632 {
3633         int i, ret = 0, cmp = 1, block_off, new_offset;
3634         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3635         size_t name_len = strlen(name);
3636         struct ocfs2_xattr_entry *xe = NULL;
3637         char *xe_name;
3638
3639         /*
3640          * We don't use binary search in the bucket because there
3641          * may be multiple entries with the same name hash.
3642          */
3643         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3644                 xe = &xh->xh_entries[i];
3645
3646                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3647                         continue;
3648                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3649                         break;
3650
3651                 cmp = name_index - ocfs2_xattr_get_type(xe);
3652                 if (!cmp)
3653                         cmp = name_len - xe->xe_name_len;
3654                 if (cmp)
3655                         continue;
3656
3657                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3658                                                         xh,
3659                                                         i,
3660                                                         &block_off,
3661                                                         &new_offset);
3662                 if (ret) {
3663                         mlog_errno(ret);
3664                         break;
3665                 }
3666
3667
3668                 xe_name = bucket_block(bucket, block_off) + new_offset;
3669                 if (!memcmp(name, xe_name, name_len)) {
3670                         *xe_index = i;
3671                         *found = 1;
3672                         ret = 0;
3673                         break;
3674                 }
3675         }
3676
3677         return ret;
3678 }
3679
3680 /*
3681  * Find the specified xattr entry in a series of buckets.
3682  * This series start from p_blkno and last for num_clusters.
3683  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3684  * the num of the valid buckets.
3685  *
3686  * Return the buffer_head this xattr should reside in. And if the xattr's
3687  * hash is in the gap of 2 buckets, return the lower bucket.
3688  */
3689 static int ocfs2_xattr_bucket_find(struct inode *inode,
3690                                    int name_index,
3691                                    const char *name,
3692                                    u32 name_hash,
3693                                    u64 p_blkno,
3694                                    u32 first_hash,
3695                                    u32 num_clusters,
3696                                    struct ocfs2_xattr_search *xs)
3697 {
3698         int ret, found = 0;
3699         struct ocfs2_xattr_header *xh = NULL;
3700         struct ocfs2_xattr_entry *xe = NULL;
3701         u16 index = 0;
3702         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3703         int low_bucket = 0, bucket, high_bucket;
3704         struct ocfs2_xattr_bucket *search;
3705         u32 last_hash;
3706         u64 blkno, lower_blkno = 0;
3707
3708         search = ocfs2_xattr_bucket_new(inode);
3709         if (!search) {
3710                 ret = -ENOMEM;
3711                 mlog_errno(ret);
3712                 goto out;
3713         }
3714
3715         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3716         if (ret) {
3717                 mlog_errno(ret);
3718                 goto out;
3719         }
3720
3721         xh = bucket_xh(search);
3722         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3723         while (low_bucket <= high_bucket) {
3724                 ocfs2_xattr_bucket_relse(search);
3725
3726                 bucket = (low_bucket + high_bucket) / 2;
3727                 blkno = p_blkno + bucket * blk_per_bucket;
3728                 ret = ocfs2_read_xattr_bucket(search, blkno);
3729                 if (ret) {
3730                         mlog_errno(ret);
3731                         goto out;
3732                 }
3733
3734                 xh = bucket_xh(search);
3735                 xe = &xh->xh_entries[0];
3736                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3737                         high_bucket = bucket - 1;
3738                         continue;
3739                 }
3740
3741                 /*
3742                  * Check whether the hash of the last entry in our
3743                  * bucket is larger than the search one. for an empty
3744                  * bucket, the last one is also the first one.
3745                  */
3746                 if (xh->xh_count)
3747                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3748
3749                 last_hash = le32_to_cpu(xe->xe_name_hash);
3750
3751                 /* record lower_blkno which may be the insert place. */
3752                 lower_blkno = blkno;
3753
3754                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3755                         low_bucket = bucket + 1;
3756                         continue;
3757                 }
3758
3759                 /* the searched xattr should reside in this bucket if exists. */
3760                 ret = ocfs2_find_xe_in_bucket(inode, search,
3761                                               name_index, name, name_hash,
3762                                               &index, &found);
3763                 if (ret) {
3764                         mlog_errno(ret);
3765                         goto out;
3766                 }
3767                 break;
3768         }
3769
3770         /*
3771          * Record the bucket we have found.
3772          * When the xattr's hash value is in the gap of 2 buckets, we will
3773          * always set it to the previous bucket.
3774          */
3775         if (!lower_blkno)
3776                 lower_blkno = p_blkno;
3777
3778         /* This should be in cache - we just read it during the search */
3779         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3780         if (ret) {
3781                 mlog_errno(ret);
3782                 goto out;
3783         }
3784
3785         xs->header = bucket_xh(xs->bucket);
3786         xs->base = bucket_block(xs->bucket, 0);
3787         xs->end = xs->base + inode->i_sb->s_blocksize;
3788
3789         if (found) {
3790                 xs->here = &xs->header->xh_entries[index];
3791                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3792                      (unsigned long long)bucket_blkno(xs->bucket), index);
3793         } else
3794                 ret = -ENODATA;
3795
3796 out:
3797         ocfs2_xattr_bucket_free(search);
3798         return ret;
3799 }
3800
3801 static int ocfs2_xattr_index_block_find(struct inode *inode,
3802                                         struct buffer_head *root_bh,
3803                                         int name_index,
3804                                         const char *name,
3805                                         struct ocfs2_xattr_search *xs)
3806 {
3807         int ret;
3808         struct ocfs2_xattr_block *xb =
3809                         (struct ocfs2_xattr_block *)root_bh->b_data;
3810         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3811         struct ocfs2_extent_list *el = &xb_root->xt_list;
3812         u64 p_blkno = 0;
3813         u32 first_hash, num_clusters = 0;
3814         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3815
3816         if (le16_to_cpu(el->l_next_free_rec) == 0)
3817                 return -ENODATA;
3818
3819         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3820              name, name_hash, name_index);
3821
3822         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3823                                   &num_clusters, el);
3824         if (ret) {
3825                 mlog_errno(ret);
3826                 goto out;
3827         }
3828
3829         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3830
3831         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3832              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3833              first_hash);
3834
3835         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3836                                       p_blkno, first_hash, num_clusters, xs);
3837
3838 out:
3839         return ret;
3840 }
3841
3842 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3843                                        u64 blkno,
3844                                        u32 clusters,
3845                                        xattr_bucket_func *func,
3846                                        void *para)
3847 {
3848         int i, ret = 0;
3849         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3850         u32 num_buckets = clusters * bpc;
3851         struct ocfs2_xattr_bucket *bucket;
3852
3853         bucket = ocfs2_xattr_bucket_new(inode);
3854         if (!bucket) {
3855                 mlog_errno(-ENOMEM);
3856                 return -ENOMEM;
3857         }
3858
3859         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3860              clusters, (unsigned long long)blkno);
3861
3862         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3863                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3864                 if (ret) {
3865                         mlog_errno(ret);
3866                         break;
3867                 }
3868
3869                 /*
3870                  * The real bucket num in this series of blocks is stored
3871                  * in the 1st bucket.
3872                  */
3873                 if (i == 0)
3874                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3875
3876                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3877                      (unsigned long long)blkno,
3878                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3879                 if (func) {
3880                         ret = func(inode, bucket, para);
3881                         if (ret && ret != -ERANGE)
3882                                 mlog_errno(ret);
3883                         /* Fall through to bucket_relse() */
3884                 }
3885
3886                 ocfs2_xattr_bucket_relse(bucket);
3887                 if (ret)
3888                         break;
3889         }
3890
3891         ocfs2_xattr_bucket_free(bucket);
3892         return ret;
3893 }
3894
3895 struct ocfs2_xattr_tree_list {
3896         char *buffer;
3897         size_t buffer_size;
3898         size_t result;
3899 };
3900
3901 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3902                                              struct ocfs2_xattr_header *xh,
3903                                              int index,
3904                                              int *block_off,
3905                                              int *new_offset)
3906 {
3907         u16 name_offset;
3908
3909         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3910                 return -EINVAL;
3911
3912         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3913
3914         *block_off = name_offset >> sb->s_blocksize_bits;
3915         *new_offset = name_offset % sb->s_blocksize;
3916
3917         return 0;
3918 }
3919
3920 static int ocfs2_list_xattr_bucket(struct inode *inode,
3921                                    struct ocfs2_xattr_bucket *bucket,
3922                                    void *para)
3923 {
3924         int ret = 0, type;
3925         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3926         int i, block_off, new_offset;
3927         const char *prefix, *name;
3928
3929         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3930                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3931                 type = ocfs2_xattr_get_type(entry);
3932                 prefix = ocfs2_xattr_prefix(type);
3933
3934                 if (prefix) {
3935                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3936                                                                 bucket_xh(bucket),
3937                                                                 i,
3938                                                                 &block_off,
3939                                                                 &new_offset);
3940                         if (ret)
3941                                 break;
3942
3943                         name = (const char *)bucket_block(bucket, block_off) +
3944                                 new_offset;
3945                         ret = ocfs2_xattr_list_entry(xl->buffer,
3946                                                      xl->buffer_size,
3947                                                      &xl->result,
3948                                                      prefix, name,
3949                                                      entry->xe_name_len);
3950                         if (ret)
3951                                 break;
3952                 }
3953         }
3954
3955         return ret;
3956 }
3957
3958 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3959                                            struct buffer_head *blk_bh,
3960                                            xattr_tree_rec_func *rec_func,
3961                                            void *para)
3962 {
3963         struct ocfs2_xattr_block *xb =
3964                         (struct ocfs2_xattr_block *)blk_bh->b_data;
3965         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3966         int ret = 0;
3967         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3968         u64 p_blkno = 0;
3969
3970         if (!el->l_next_free_rec || !rec_func)
3971                 return 0;
3972
3973         while (name_hash > 0) {
3974                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3975                                           &e_cpos, &num_clusters, el);
3976                 if (ret) {
3977                         mlog_errno(ret);
3978                         break;
3979                 }
3980
3981                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3982                                num_clusters, para);
3983                 if (ret) {
3984                         if (ret != -ERANGE)
3985                                 mlog_errno(ret);
3986                         break;
3987                 }
3988
3989                 if (e_cpos == 0)
3990                         break;
3991
3992                 name_hash = e_cpos - 1;
3993         }
3994
3995         return ret;
3996
3997 }
3998
3999 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4000                                      struct buffer_head *root_bh,
4001                                      u64 blkno, u32 cpos, u32 len, void *para)
4002 {
4003         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4004                                            ocfs2_list_xattr_bucket, para);
4005 }
4006
4007 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4008                                              struct buffer_head *blk_bh,
4009                                              char *buffer,
4010                                              size_t buffer_size)
4011 {
4012         int ret;
4013         struct ocfs2_xattr_tree_list xl = {
4014                 .buffer = buffer,
4015                 .buffer_size = buffer_size,
4016                 .result = 0,
4017         };
4018
4019         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4020                                               ocfs2_list_xattr_tree_rec, &xl);
4021         if (ret) {
4022                 mlog_errno(ret);
4023                 goto out;
4024         }
4025
4026         ret = xl.result;
4027 out:
4028         return ret;
4029 }
4030
4031 static int cmp_xe(const void *a, const void *b)
4032 {
4033         const struct ocfs2_xattr_entry *l = a, *r = b;
4034         u32 l_hash = le32_to_cpu(l->xe_name_hash);
4035         u32 r_hash = le32_to_cpu(r->xe_name_hash);
4036
4037         if (l_hash > r_hash)
4038                 return 1;
4039         if (l_hash < r_hash)
4040                 return -1;
4041         return 0;
4042 }
4043
4044 static void swap_xe(void *a, void *b, int size)
4045 {
4046         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4047
4048         tmp = *l;
4049         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4050         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4051 }
4052
4053 /*
4054  * When the ocfs2_xattr_block is filled up, new bucket will be created
4055  * and all the xattr entries will be moved to the new bucket.
4056  * The header goes at the start of the bucket, and the names+values are
4057  * filled from the end.  This is why *target starts as the last buffer.
4058  * Note: we need to sort the entries since they are not saved in order
4059  * in the ocfs2_xattr_block.
4060  */
4061 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4062                                            struct buffer_head *xb_bh,
4063                                            struct ocfs2_xattr_bucket *bucket)
4064 {
4065         int i, blocksize = inode->i_sb->s_blocksize;
4066         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4067         u16 offset, size, off_change;
4068         struct ocfs2_xattr_entry *xe;
4069         struct ocfs2_xattr_block *xb =
4070                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
4071         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4072         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4073         u16 count = le16_to_cpu(xb_xh->xh_count);
4074         char *src = xb_bh->b_data;
4075         char *target = bucket_block(bucket, blks - 1);
4076
4077         mlog(0, "cp xattr from block %llu to bucket %llu\n",
4078              (unsigned long long)xb_bh->b_blocknr,
4079              (unsigned long long)bucket_blkno(bucket));
4080
4081         for (i = 0; i < blks; i++)
4082                 memset(bucket_block(bucket, i), 0, blocksize);
4083
4084         /*
4085          * Since the xe_name_offset is based on ocfs2_xattr_header,
4086          * there is a offset change corresponding to the change of
4087          * ocfs2_xattr_header's position.
4088          */
4089         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4090         xe = &xb_xh->xh_entries[count - 1];
4091         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4092         size = blocksize - offset;
4093
4094         /* copy all the names and values. */
4095         memcpy(target + offset, src + offset, size);
4096
4097         /* Init new header now. */
4098         xh->xh_count = xb_xh->xh_count;
4099         xh->xh_num_buckets = cpu_to_le16(1);
4100         xh->xh_name_value_len = cpu_to_le16(size);
4101         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4102
4103         /* copy all the entries. */
4104         target = bucket_block(bucket, 0);
4105         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4106         size = count * sizeof(struct ocfs2_xattr_entry);
4107         memcpy(target + offset, (char *)xb_xh + offset, size);
4108
4109         /* Change the xe offset for all the xe because of the move. */
4110         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4111                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4112         for (i = 0; i < count; i++)
4113                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4114
4115         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4116              offset, size, off_change);
4117
4118         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4119              cmp_xe, swap_xe);
4120 }
4121
4122 /*
4123  * After we move xattr from block to index btree, we have to
4124  * update ocfs2_xattr_search to the new xe and base.
4125  *
4126  * When the entry is in xattr block, xattr_bh indicates the storage place.
4127  * While if the entry is in index b-tree, "bucket" indicates the
4128  * real place of the xattr.
4129  */
4130 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4131                                             struct ocfs2_xattr_search *xs,
4132                                             struct buffer_head *old_bh)
4133 {
4134         char *buf = old_bh->b_data;
4135         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4136         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4137         int i;
4138
4139         xs->header = bucket_xh(xs->bucket);
4140         xs->base = bucket_block(xs->bucket, 0);
4141         xs->end = xs->base + inode->i_sb->s_blocksize;
4142
4143         if (xs->not_found)
4144                 return;
4145
4146         i = xs->here - old_xh->xh_entries;
4147         xs->here = &xs->header->xh_entries[i];
4148 }
4149
4150 static int ocfs2_xattr_create_index_block(struct inode *inode,
4151                                           struct ocfs2_xattr_search *xs,
4152                                           struct ocfs2_xattr_set_ctxt *ctxt)
4153 {
4154         int ret;
4155         u32 bit_off, len;
4156         u64 blkno;
4157         handle_t *handle = ctxt->handle;
4158         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4159         struct ocfs2_inode_info *oi = OCFS2_I(inode);
4160         struct buffer_head *xb_bh = xs->xattr_bh;
4161         struct ocfs2_xattr_block *xb =
4162                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4163         struct ocfs2_xattr_tree_root *xr;
4164         u16 xb_flags = le16_to_cpu(xb->xb_flags);
4165
4166         mlog(0, "create xattr index block for %llu\n",
4167              (unsigned long long)xb_bh->b_blocknr);
4168
4169         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4170         BUG_ON(!xs->bucket);
4171
4172         /*
4173          * XXX:
4174          * We can use this lock for now, and maybe move to a dedicated mutex
4175          * if performance becomes a problem later.
4176          */
4177         down_write(&oi->ip_alloc_sem);
4178
4179         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4180                                       OCFS2_JOURNAL_ACCESS_WRITE);
4181         if (ret) {
4182                 mlog_errno(ret);
4183                 goto out;
4184         }
4185
4186         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
4187                                      1, 1, &bit_off, &len);
4188         if (ret) {
4189                 mlog_errno(ret);
4190                 goto out;
4191         }
4192
4193         /*
4194          * The bucket may spread in many blocks, and
4195          * we will only touch the 1st block and the last block
4196          * in the whole bucket(one for entry and one for data).
4197          */
4198         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4199
4200         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4201              (unsigned long long)blkno);
4202
4203         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4204         if (ret) {
4205                 mlog_errno(ret);
4206                 goto out;
4207         }
4208
4209         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4210                                                 OCFS2_JOURNAL_ACCESS_CREATE);
4211         if (ret) {
4212                 mlog_errno(ret);
4213                 goto out;
4214         }
4215
4216         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4217         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4218
4219         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4220
4221         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4222         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4223                offsetof(struct ocfs2_xattr_block, xb_attrs));
4224
4225         xr = &xb->xb_attrs.xb_root;
4226         xr->xt_clusters = cpu_to_le32(1);
4227         xr->xt_last_eb_blk = 0;
4228         xr->xt_list.l_tree_depth = 0;
4229         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4230         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4231
4232         xr->xt_list.l_recs[0].e_cpos = 0;
4233         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4234         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4235
4236         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4237
4238         ocfs2_journal_dirty(handle, xb_bh);
4239
4240 out:
4241         up_write(&oi->ip_alloc_sem);
4242
4243         return ret;
4244 }
4245
4246 static int cmp_xe_offset(const void *a, const void *b)
4247 {
4248         const struct ocfs2_xattr_entry *l = a, *r = b;
4249         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4250         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4251
4252         if (l_name_offset < r_name_offset)
4253                 return 1;
4254         if (l_name_offset > r_name_offset)
4255                 return -1;
4256         return 0;
4257 }
4258
4259 /*
4260  * defrag a xattr bucket if we find that the bucket has some
4261  * holes beteen name/value pairs.
4262  * We will move all the name/value pairs to the end of the bucket
4263  * so that we can spare some space for insertion.
4264  */
4265 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4266                                      handle_t *handle,
4267                                      struct ocfs2_xattr_bucket *bucket)
4268 {
4269         int ret, i;
4270         size_t end, offset, len;
4271         struct ocfs2_xattr_header *xh;
4272         char *entries, *buf, *bucket_buf = NULL;
4273         u64 blkno = bucket_blkno(bucket);
4274         u16 xh_free_start;
4275         size_t blocksize = inode->i_sb->s_blocksize;
4276         struct ocfs2_xattr_entry *xe;
4277
4278         /*
4279          * In order to make the operation more efficient and generic,
4280          * we copy all the blocks into a contiguous memory and do the
4281          * defragment there, so if anything is error, we will not touch
4282          * the real block.
4283          */
4284         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4285         if (!bucket_buf) {
4286                 ret = -EIO;
4287                 goto out;
4288         }
4289
4290         buf = bucket_buf;
4291         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4292                 memcpy(buf, bucket_block(bucket, i), blocksize);
4293
4294         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4295                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4296         if (ret < 0) {
4297                 mlog_errno(ret);
4298                 goto out;
4299         }
4300
4301         xh = (struct ocfs2_xattr_header *)bucket_buf;
4302         entries = (char *)xh->xh_entries;
4303         xh_free_start = le16_to_cpu(xh->xh_free_start);
4304
4305         mlog(0, "adjust xattr bucket in %llu, count = %u, "
4306              "xh_free_start = %u, xh_name_value_len = %u.\n",
4307              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4308              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4309
4310         /*
4311          * sort all the entries by their offset.
4312          * the largest will be the first, so that we can
4313          * move them to the end one by one.
4314          */
4315         sort(entries, le16_to_cpu(xh->xh_count),
4316              sizeof(struct ocfs2_xattr_entry),
4317              cmp_xe_offset, swap_xe);
4318
4319         /* Move all name/values to the end of the bucket. */
4320         xe = xh->xh_entries;
4321         end = OCFS2_XATTR_BUCKET_SIZE;
4322         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4323                 offset = le16_to_cpu(xe->xe_name_offset);
4324                 len = namevalue_size_xe(xe);
4325
4326                 /*
4327                  * We must make sure that the name/value pair
4328                  * exist in the same block. So adjust end to
4329                  * the previous block end if needed.
4330                  */
4331                 if (((end - len) / blocksize !=
4332                         (end - 1) / blocksize))
4333                         end = end - end % blocksize;
4334
4335                 if (end > offset + len) {
4336                         memmove(bucket_buf + end - len,
4337                                 bucket_buf + offset, len);
4338                         xe->xe_name_offset = cpu_to_le16(end - len);
4339                 }
4340
4341                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4342                                 "bucket %llu\n", (unsigned long long)blkno);
4343
4344                 end -= len;
4345         }
4346
4347         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4348                         "bucket %llu\n", (unsigned long long)blkno);
4349
4350         if (xh_free_start == end)
4351                 goto out;
4352
4353         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4354         xh->xh_free_start = cpu_to_le16(end);
4355
4356         /* sort the entries by their name_hash. */
4357         sort(entries, le16_to_cpu(xh->xh_count),
4358              sizeof(struct ocfs2_xattr_entry),
4359              cmp_xe, swap_xe);
4360
4361         buf = bucket_buf;
4362         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4363                 memcpy(bucket_block(bucket, i), buf, blocksize);
4364         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4365
4366 out:
4367         kfree(bucket_buf);
4368         return ret;
4369 }
4370
4371 /*
4372  * prev_blkno points to the start of an existing extent.  new_blkno
4373  * points to a newly allocated extent.  Because we know each of our
4374  * clusters contains more than bucket, we can easily split one cluster
4375  * at a bucket boundary.  So we take the last cluster of the existing
4376  * extent and split it down the middle.  We move the last half of the
4377  * buckets in the last cluster of the existing extent over to the new
4378  * extent.
4379  *
4380  * first_bh is the buffer at prev_blkno so we can update the existing
4381  * extent's bucket count.  header_bh is the bucket were we were hoping
4382  * to insert our xattr.  If the bucket move places the target in the new
4383  * extent, we'll update first_bh and header_bh after modifying the old
4384  * extent.
4385  *
4386  * first_hash will be set as the 1st xe's name_hash in the new extent.
4387  */
4388 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4389                                                handle_t *handle,
4390                                                struct ocfs2_xattr_bucket *first,
4391                                                struct ocfs2_xattr_bucket *target,
4392                                                u64 new_blkno,
4393                                                u32 num_clusters,
4394                                                u32 *first_hash)
4395 {
4396         int ret;
4397         struct super_block *sb = inode->i_sb;
4398         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4399         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4400         int to_move = num_buckets / 2;
4401         u64 src_blkno;
4402         u64 last_cluster_blkno = bucket_blkno(first) +
4403                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4404
4405         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4406         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4407
4408         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4409              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4410
4411         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4412                                      last_cluster_blkno, new_blkno,
4413                                      to_move, first_hash);
4414         if (ret) {
4415                 mlog_errno(ret);
4416                 goto out;
4417         }
4418
4419         /* This is the first bucket that got moved */
4420         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4421
4422         /*
4423          * If the target bucket was part of the moved buckets, we need to
4424          * update first and target.
4425          */
4426         if (bucket_blkno(target) >= src_blkno) {
4427                 /* Find the block for the new target bucket */
4428                 src_blkno = new_blkno +
4429                         (bucket_blkno(target) - src_blkno);
4430
4431                 ocfs2_xattr_bucket_relse(first);
4432                 ocfs2_xattr_bucket_relse(target);
4433
4434                 /*
4435                  * These shouldn't fail - the buffers are in the
4436                  * journal from ocfs2_cp_xattr_bucket().
4437                  */
4438                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
4439                 if (ret) {
4440                         mlog_errno(ret);
4441                         goto out;
4442                 }
4443                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
4444                 if (ret)
4445                         mlog_errno(ret);
4446
4447         }
4448
4449 out:
4450         return ret;
4451 }
4452
4453 /*
4454  * Find the suitable pos when we divide a bucket into 2.
4455  * We have to make sure the xattrs with the same hash value exist
4456  * in the same bucket.
4457  *
4458  * If this ocfs2_xattr_header covers more than one hash value, find a
4459  * place where the hash value changes.  Try to find the most even split.
4460  * The most common case is that all entries have different hash values,
4461  * and the first check we make will find a place to split.
4462  */
4463 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4464 {
4465         struct ocfs2_xattr_entry *entries = xh->xh_entries;
4466         int count = le16_to_cpu(xh->xh_count);
4467         int delta, middle = count / 2;
4468
4469         /*
4470          * We start at the middle.  Each step gets farther away in both
4471          * directions.  We therefore hit the change in hash value
4472          * nearest to the middle.  Note that this loop does not execute for
4473          * count < 2.
4474          */
4475         for (delta = 0; delta < middle; delta++) {
4476                 /* Let's check delta earlier than middle */
4477                 if (cmp_xe(&entries[middle - delta - 1],
4478                            &entries[middle - delta]))
4479                         return middle - delta;
4480
4481                 /* For even counts, don't walk off the end */
4482                 if ((middle + delta + 1) == count)
4483                         continue;
4484
4485                 /* Now try delta past middle */
4486                 if (cmp_xe(&entries[middle + delta],
4487                            &entries[middle + delta + 1]))
4488                         return middle + delta + 1;
4489         }
4490
4491         /* Every entry had the same hash */
4492         return count;
4493 }
4494
4495 /*
4496  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4497  * first_hash will record the 1st hash of the new bucket.
4498  *
4499  * Normally half of the xattrs will be moved.  But we have to make
4500  * sure that the xattrs with the same hash value are stored in the
4501  * same bucket. If all the xattrs in this bucket have the same hash
4502  * value, the new bucket will be initialized as an empty one and the
4503  * first_hash will be initialized as (hash_value+1).
4504  */
4505 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4506                                     handle_t *handle,
4507                                     u64 blk,
4508                                     u64 new_blk,
4509                                     u32 *first_hash,
4510                                     int new_bucket_head)
4511 {
4512         int ret, i;
4513         int count, start, len, name_value_len = 0, name_offset = 0;
4514         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4515         struct ocfs2_xattr_header *xh;
4516         struct ocfs2_xattr_entry *xe;
4517         int blocksize = inode->i_sb->s_blocksize;
4518
4519         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4520              (unsigned long long)blk, (unsigned long long)new_blk);
4521
4522         s_bucket = ocfs2_xattr_bucket_new(inode);
4523         t_bucket = ocfs2_xattr_bucket_new(inode);
4524         if (!s_bucket || !t_bucket) {
4525                 ret = -ENOMEM;
4526                 mlog_errno(ret);
4527                 goto out;
4528         }
4529
4530         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4531         if (ret) {
4532                 mlog_errno(ret);
4533                 goto out;
4534         }
4535
4536         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4537                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4538         if (ret) {
4539                 mlog_errno(ret);
4540                 goto out;
4541         }
4542
4543         /*
4544          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4545          * there's no need to read it.
4546          */
4547         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4548         if (ret) {
4549                 mlog_errno(ret);
4550                 goto out;
4551         }
4552
4553         /*
4554          * Hey, if we're overwriting t_bucket, what difference does
4555          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4556          * same part of ocfs2_cp_xattr_bucket().
4557          */
4558         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4559                                                 new_bucket_head ?
4560                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4561                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4562         if (ret) {
4563                 mlog_errno(ret);
4564                 goto out;
4565         }
4566
4567         xh = bucket_xh(s_bucket);
4568         count = le16_to_cpu(xh->xh_count);
4569         start = ocfs2_xattr_find_divide_pos(xh);
4570
4571         if (start == count) {
4572                 xe = &xh->xh_entries[start-1];
4573
4574                 /*
4575                  * initialized a new empty bucket here.
4576                  * The hash value is set as one larger than
4577                  * that of the last entry in the previous bucket.
4578                  */
4579                 for (i = 0; i < t_bucket->bu_blocks; i++)
4580                         memset(bucket_block(t_bucket, i), 0, blocksize);
4581
4582                 xh = bucket_xh(t_bucket);
4583                 xh->xh_free_start = cpu_to_le16(blocksize);
4584                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4585                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4586
4587                 goto set_num_buckets;
4588         }
4589
4590         /* copy the whole bucket to the new first. */
4591         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4592
4593         /* update the new bucket. */
4594         xh = bucket_xh(t_bucket);
4595
4596         /*
4597          * Calculate the total name/value len and xh_free_start for
4598          * the old bucket first.
4599          */
4600         name_offset = OCFS2_XATTR_BUCKET_SIZE;
4601         name_value_len = 0;
4602         for (i = 0; i < start; i++) {
4603                 xe = &xh->xh_entries[i];
4604                 name_value_len += namevalue_size_xe(xe);
4605                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4606                         name_offset = le16_to_cpu(xe->xe_name_offset);
4607         }
4608
4609         /*
4610          * Now begin the modification to the new bucket.
4611          *
4612          * In the new bucket, We just move the xattr entry to the beginning
4613          * and don't touch the name/value. So there will be some holes in the
4614          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4615          * called.
4616          */
4617         xe = &xh->xh_entries[start];
4618         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4619         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4620              (int)((char *)xe - (char *)xh),
4621              (int)((char *)xh->xh_entries - (char *)xh));
4622         memmove((char *)xh->xh_entries, (char *)xe, len);
4623         xe = &xh->xh_entries[count - start];
4624         len = sizeof(struct ocfs2_xattr_entry) * start;
4625         memset((char *)xe, 0, len);
4626
4627         le16_add_cpu(&xh->xh_count, -start);
4628         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4629
4630         /* Calculate xh_free_start for the new bucket. */
4631         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4632         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4633                 xe = &xh->xh_entries[i];
4634                 if (le16_to_cpu(xe->xe_name_offset) <
4635                     le16_to_cpu(xh->xh_free_start))
4636                         xh->xh_free_start = xe->xe_name_offset;
4637         }
4638
4639 set_num_buckets:
4640         /* set xh->xh_num_buckets for the new xh. */
4641         if (new_bucket_head)
4642                 xh->xh_num_buckets = cpu_to_le16(1);
4643         else
4644                 xh->xh_num_buckets = 0;
4645
4646         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4647
4648         /* store the first_hash of the new bucket. */
4649         if (first_hash)
4650                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4651
4652         /*
4653          * Now only update the 1st block of the old bucket.  If we
4654          * just added a new empty bucket, there is no need to modify
4655          * it.
4656          */
4657         if (start == count)
4658                 goto out;
4659
4660         xh = bucket_xh(s_bucket);
4661         memset(&xh->xh_entries[start], 0,
4662                sizeof(struct ocfs2_xattr_entry) * (count - start));
4663         xh->xh_count = cpu_to_le16(start);
4664         xh->xh_free_start = cpu_to_le16(name_offset);
4665         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4666
4667         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4668
4669 out:
4670         ocfs2_xattr_bucket_free(s_bucket);
4671         ocfs2_xattr_bucket_free(t_bucket);
4672
4673         return ret;
4674 }
4675
4676 /*
4677  * Copy xattr from one bucket to another bucket.
4678  *
4679  * The caller must make sure that the journal transaction
4680  * has enough space for journaling.
4681  */
4682 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4683                                  handle_t *handle,
4684                                  u64 s_blkno,
4685                                  u64 t_blkno,
4686                                  int t_is_new)
4687 {
4688         int ret;
4689         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4690
4691         BUG_ON(s_blkno == t_blkno);
4692
4693         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4694              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4695              t_is_new);
4696
4697         s_bucket = ocfs2_xattr_bucket_new(inode);
4698         t_bucket = ocfs2_xattr_bucket_new(inode);
4699         if (!s_bucket || !t_bucket) {
4700                 ret = -ENOMEM;
4701                 mlog_errno(ret);
4702                 goto out;
4703         }
4704
4705         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4706         if (ret)
4707                 goto out;
4708
4709         /*
4710          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4711          * there's no need to read it.
4712          */
4713         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4714         if (ret)
4715                 goto out;
4716
4717         /*
4718          * Hey, if we're overwriting t_bucket, what difference does
4719          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4720          * cluster to fill, we came here from
4721          * ocfs2_mv_xattr_buckets(), and it is really new -
4722          * ACCESS_CREATE is required.  But we also might have moved data
4723          * out of t_bucket before extending back into it.
4724          * ocfs2_add_new_xattr_bucket() can do this - its call to
4725          * ocfs2_add_new_xattr_cluster() may have created a new extent
4726          * and copied out the end of the old extent.  Then it re-extends
4727          * the old extent back to create space for new xattrs.  That's
4728          * how we get here, and the bucket isn't really new.
4729          */
4730         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4731                                                 t_is_new ?
4732                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4733                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4734         if (ret)
4735                 goto out;
4736
4737         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4738         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4739
4740 out:
4741         ocfs2_xattr_bucket_free(t_bucket);
4742         ocfs2_xattr_bucket_free(s_bucket);
4743
4744         return ret;
4745 }
4746
4747 /*
4748  * src_blk points to the start of an existing extent.  last_blk points to
4749  * last cluster in that extent.  to_blk points to a newly allocated
4750  * extent.  We copy the buckets from the cluster at last_blk to the new
4751  * extent.  If start_bucket is non-zero, we skip that many buckets before
4752  * we start copying.  The new extent's xh_num_buckets gets set to the
4753  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4754  * by the same amount.
4755  */
4756 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4757                                   u64 src_blk, u64 last_blk, u64 to_blk,
4758                                   unsigned int start_bucket,
4759                                   u32 *first_hash)
4760 {
4761         int i, ret, credits;
4762         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4763         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4764         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4765         struct ocfs2_xattr_bucket *old_first, *new_first;
4766
4767         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4768              (unsigned long long)last_blk, (unsigned long long)to_blk);
4769
4770         BUG_ON(start_bucket >= num_buckets);
4771         if (start_bucket) {
4772                 num_buckets -= start_bucket;
4773                 last_blk += (start_bucket * blks_per_bucket);
4774         }
4775
4776         /* The first bucket of the original extent */
4777         old_first = ocfs2_xattr_bucket_new(inode);
4778         /* The first bucket of the new extent */
4779         new_first = ocfs2_xattr_bucket_new(inode);
4780         if (!old_first || !new_first) {
4781                 ret = -ENOMEM;
4782                 mlog_errno(ret);
4783                 goto out;
4784         }
4785
4786         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4787         if (ret) {
4788                 mlog_errno(ret);
4789                 goto out;
4790         }
4791
4792         /*
4793          * We need to update the first bucket of the old extent and all
4794          * the buckets going to the new extent.
4795          */
4796         credits = ((num_buckets + 1) * blks_per_bucket) +
4797                 handle->h_buffer_credits;
4798         ret = ocfs2_extend_trans(handle, credits);
4799         if (ret) {
4800                 mlog_errno(ret);
4801                 goto out;
4802         }
4803
4804         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4805                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4806         if (ret) {
4807                 mlog_errno(ret);
4808                 goto out;
4809         }
4810
4811         for (i = 0; i < num_buckets; i++) {
4812                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4813                                             last_blk + (i * blks_per_bucket),
4814                                             to_blk + (i * blks_per_bucket),
4815                                             1);
4816                 if (ret) {
4817                         mlog_errno(ret);
4818                         goto out;
4819                 }
4820         }
4821
4822         /*
4823          * Get the new bucket ready before we dirty anything
4824          * (This actually shouldn't fail, because we already dirtied
4825          * it once in ocfs2_cp_xattr_bucket()).
4826          */
4827         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4828         if (ret) {
4829                 mlog_errno(ret);
4830                 goto out;
4831         }
4832         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4833                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4834         if (ret) {
4835                 mlog_errno(ret);
4836                 goto out;
4837         }
4838
4839         /* Now update the headers */
4840         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4841         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4842
4843         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4844         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4845
4846         if (first_hash)
4847                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4848
4849 out:
4850         ocfs2_xattr_bucket_free(new_first);
4851         ocfs2_xattr_bucket_free(old_first);
4852         return ret;
4853 }
4854
4855 /*
4856  * Move some xattrs in this cluster to the new cluster.
4857  * This function should only be called when bucket size == cluster size.
4858  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4859  */
4860 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4861                                       handle_t *handle,
4862                                       u64 prev_blk,
4863                                       u64 new_blk,
4864                                       u32 *first_hash)
4865 {
4866         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4867         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4868
4869         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4870
4871         ret = ocfs2_extend_trans(handle, credits);
4872         if (ret) {
4873                 mlog_errno(ret);
4874                 return ret;
4875         }
4876
4877         /* Move half of the xattr in start_blk to the next bucket. */
4878         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4879                                           new_blk, first_hash, 1);
4880 }
4881
4882 /*
4883  * Move some xattrs from the old cluster to the new one since they are not
4884  * contiguous in ocfs2 xattr tree.
4885  *
4886  * new_blk starts a new separate cluster, and we will move some xattrs from
4887  * prev_blk to it. v_start will be set as the first name hash value in this
4888  * new cluster so that it can be used as e_cpos during tree insertion and
4889  * don't collide with our original b-tree operations. first_bh and header_bh
4890  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4891  * to extend the insert bucket.
4892  *
4893  * The problem is how much xattr should we move to the new one and when should
4894  * we update first_bh and header_bh?
4895  * 1. If cluster size > bucket size, that means the previous cluster has more
4896  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4897  *    update the first_bh and header_bh if the insert bucket has been moved
4898  *    to the new cluster.
4899  * 2. If cluster_size == bucket_size:
4900  *    a) If the previous extent rec has more than one cluster and the insert
4901  *       place isn't in the last cluster, copy the entire last cluster to the
4902  *       new one. This time, we don't need to upate the first_bh and header_bh
4903  *       since they will not be moved into the new cluster.
4904  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4905  *       the new one. And we set the extend flag to zero if the insert place is
4906  *       moved into the new allocated cluster since no extend is needed.
4907  */
4908 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4909                                             handle_t *handle,
4910                                             struct ocfs2_xattr_bucket *first,
4911                                             struct ocfs2_xattr_bucket *target,
4912                                             u64 new_blk,
4913                                             u32 prev_clusters,
4914                                             u32 *v_start,
4915                                             int *extend)
4916 {
4917         int ret;
4918
4919         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4920              (unsigned long long)bucket_blkno(first), prev_clusters,
4921              (unsigned long long)new_blk);
4922
4923         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4924                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4925                                                           handle,
4926                                                           first, target,
4927                                                           new_blk,
4928                                                           prev_clusters,
4929                                                           v_start);
4930                 if (ret)
4931                         mlog_errno(ret);
4932         } else {
4933                 /* The start of the last cluster in the first extent */
4934                 u64 last_blk = bucket_blkno(first) +
4935                         ((prev_clusters - 1) *
4936                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4937
4938                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4939                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4940                                                      bucket_blkno(first),
4941                                                      last_blk, new_blk, 0,
4942                                                      v_start);
4943                         if (ret)
4944                                 mlog_errno(ret);
4945                 } else {
4946                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4947                                                          last_blk, new_blk,
4948                                                          v_start);
4949                         if (ret)
4950                                 mlog_errno(ret);
4951
4952                         if ((bucket_blkno(target) == last_blk) && extend)
4953                                 *extend = 0;
4954                 }
4955         }
4956
4957         return ret;
4958 }
4959
4960 /*
4961  * Add a new cluster for xattr storage.
4962  *
4963  * If the new cluster is contiguous with the previous one, it will be
4964  * appended to the same extent record, and num_clusters will be updated.
4965  * If not, we will insert a new extent for it and move some xattrs in
4966  * the last cluster into the new allocated one.
4967  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4968  * lose the benefits of hashing because we'll have to search large leaves.
4969  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4970  * if it's bigger).
4971  *
4972  * first_bh is the first block of the previous extent rec and header_bh
4973  * indicates the bucket we will insert the new xattrs. They will be updated
4974  * when the header_bh is moved into the new cluster.
4975  */
4976 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4977                                        struct buffer_head *root_bh,
4978                                        struct ocfs2_xattr_bucket *first,
4979                                        struct ocfs2_xattr_bucket *target,
4980                                        u32 *num_clusters,
4981                                        u32 prev_cpos,
4982                                        int *extend,
4983                                        struct ocfs2_xattr_set_ctxt *ctxt)
4984 {
4985         int ret;
4986         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4987         u32 prev_clusters = *num_clusters;
4988         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4989         u64 block;
4990         handle_t *handle = ctxt->handle;
4991         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4992         struct ocfs2_extent_tree et;
4993
4994         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4995              "previous xattr blkno = %llu\n",
4996              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4997              prev_cpos, (unsigned long long)bucket_blkno(first));
4998
4999         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5000
5001         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5002                                       OCFS2_JOURNAL_ACCESS_WRITE);
5003         if (ret < 0) {
5004                 mlog_errno(ret);
5005                 goto leave;
5006         }
5007
5008         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
5009                                      clusters_to_add, &bit_off, &num_bits);
5010         if (ret < 0) {
5011                 if (ret != -ENOSPC)
5012                         mlog_errno(ret);
5013                 goto leave;
5014         }
5015
5016         BUG_ON(num_bits > clusters_to_add);
5017
5018         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5019         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5020              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5021
5022         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5023             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5024              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5025                 /*
5026                  * If this cluster is contiguous with the old one and
5027                  * adding this new cluster, we don't surpass the limit of
5028                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5029                  * initialized and used like other buckets in the previous
5030                  * cluster.
5031                  * So add it as a contiguous one. The caller will handle
5032                  * its init process.
5033                  */
5034                 v_start = prev_cpos + prev_clusters;
5035                 *num_clusters = prev_clusters + num_bits;
5036                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5037                      num_bits);
5038         } else {
5039                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5040                                                        handle,
5041                                                        first,
5042                                                        target,
5043                                                        block,
5044                                                        prev_clusters,
5045                                                        &v_start,
5046                                                        extend);
5047                 if (ret) {
5048                         mlog_errno(ret);
5049                         goto leave;
5050                 }
5051         }
5052
5053         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5054              num_bits, (unsigned long long)block, v_start);
5055         ret = ocfs2_insert_extent(handle, &et, v_start, block,
5056                                   num_bits, 0, ctxt->meta_ac);
5057         if (ret < 0) {
5058                 mlog_errno(ret);
5059                 goto leave;
5060         }
5061
5062         ret = ocfs2_journal_dirty(handle, root_bh);
5063         if (ret < 0)
5064                 mlog_errno(ret);
5065
5066 leave:
5067         return ret;
5068 }
5069
5070 /*
5071  * We are given an extent.  'first' is the bucket at the very front of
5072  * the extent.  The extent has space for an additional bucket past
5073  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5074  * of the target bucket.  We wish to shift every bucket past the target
5075  * down one, filling in that additional space.  When we get back to the
5076  * target, we split the target between itself and the now-empty bucket
5077  * at target+1 (aka, target_blkno + blks_per_bucket).
5078  */
5079 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5080                                      handle_t *handle,
5081                                      struct ocfs2_xattr_bucket *first,
5082                                      u64 target_blk,
5083                                      u32 num_clusters)
5084 {
5085         int ret, credits;
5086         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5087         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5088         u64 end_blk;
5089         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5090
5091         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5092              "from %llu, len = %u\n", (unsigned long long)target_blk,
5093              (unsigned long long)bucket_blkno(first), num_clusters);
5094
5095         /* The extent must have room for an additional bucket */
5096         BUG_ON(new_bucket >=
5097                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5098
5099         /* end_blk points to the last existing bucket */
5100         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5101
5102         /*
5103          * end_blk is the start of the last existing bucket.
5104          * Thus, (end_blk - target_blk) covers the target bucket and
5105          * every bucket after it up to, but not including, the last
5106          * existing bucket.  Then we add the last existing bucket, the
5107          * new bucket, and the first bucket (3 * blk_per_bucket).
5108          */
5109         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
5110                   handle->h_buffer_credits;
5111         ret = ocfs2_extend_trans(handle, credits);
5112         if (ret) {
5113                 mlog_errno(ret);
5114                 goto out;
5115         }
5116
5117         ret = ocfs2_xattr_bucket_journal_access(handle, first,
5118                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5119         if (ret) {
5120                 mlog_errno(ret);
5121                 goto out;
5122         }
5123
5124         while (end_blk != target_blk) {
5125                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5126                                             end_blk + blk_per_bucket, 0);
5127                 if (ret)
5128                         goto out;
5129                 end_blk -= blk_per_bucket;
5130         }
5131
5132         /* Move half of the xattr in target_blkno to the next bucket. */
5133         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5134                                         target_blk + blk_per_bucket, NULL, 0);
5135
5136         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5137         ocfs2_xattr_bucket_journal_dirty(handle, first);
5138
5139 out:
5140         return ret;
5141 }
5142
5143 /*
5144  * Add new xattr bucket in an extent record and adjust the buckets
5145  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5146  * bucket we want to insert into.
5147  *
5148  * In the easy case, we will move all the buckets after target down by
5149  * one. Half of target's xattrs will be moved to the next bucket.
5150  *
5151  * If current cluster is full, we'll allocate a new one.  This may not
5152  * be contiguous.  The underlying calls will make sure that there is
5153  * space for the insert, shifting buckets around if necessary.
5154  * 'target' may be moved by those calls.
5155  */
5156 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5157                                       struct buffer_head *xb_bh,
5158                                       struct ocfs2_xattr_bucket *target,
5159                                       struct ocfs2_xattr_set_ctxt *ctxt)
5160 {
5161         struct ocfs2_xattr_block *xb =
5162                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5163         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5164         struct ocfs2_extent_list *el = &xb_root->xt_list;
5165         u32 name_hash =
5166                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5167         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5168         int ret, num_buckets, extend = 1;
5169         u64 p_blkno;
5170         u32 e_cpos, num_clusters;
5171         /* The bucket at the front of the extent */
5172         struct ocfs2_xattr_bucket *first;
5173
5174         mlog(0, "Add new xattr bucket starting from %llu\n",
5175              (unsigned long long)bucket_blkno(target));
5176
5177         /* The first bucket of the original extent */
5178         first = ocfs2_xattr_bucket_new(inode);
5179         if (!first) {
5180                 ret = -ENOMEM;
5181                 mlog_errno(ret);
5182                 goto out;
5183         }
5184
5185         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5186                                   &num_clusters, el);
5187         if (ret) {
5188                 mlog_errno(ret);
5189                 goto out;
5190         }
5191
5192         ret = ocfs2_read_xattr_bucket(first, p_blkno);
5193         if (ret) {
5194                 mlog_errno(ret);
5195                 goto out;
5196         }
5197
5198         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5199         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5200                 /*
5201                  * This can move first+target if the target bucket moves
5202                  * to the new extent.
5203                  */
5204                 ret = ocfs2_add_new_xattr_cluster(inode,
5205                                                   xb_bh,
5206                                                   first,
5207                                                   target,
5208                                                   &num_clusters,
5209                                                   e_cpos,
5210                                                   &extend,
5211                                                   ctxt);
5212                 if (ret) {
5213                         mlog_errno(ret);
5214                         goto out;
5215                 }
5216         }
5217
5218         if (extend) {
5219                 ret = ocfs2_extend_xattr_bucket(inode,
5220                                                 ctxt->handle,
5221                                                 first,
5222                                                 bucket_blkno(target),
5223                                                 num_clusters);
5224                 if (ret)
5225                         mlog_errno(ret);
5226         }
5227
5228 out:
5229         ocfs2_xattr_bucket_free(first);
5230
5231         return ret;
5232 }
5233
5234 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5235                                         struct ocfs2_xattr_bucket *bucket,
5236                                         int offs)
5237 {
5238         int block_off = offs >> inode->i_sb->s_blocksize_bits;
5239
5240         offs = offs % inode->i_sb->s_blocksize;
5241         return bucket_block(bucket, block_off) + offs;
5242 }
5243
5244 /*
5245  * Truncate the specified xe_off entry in xattr bucket.
5246  * bucket is indicated by header_bh and len is the new length.
5247  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5248  *
5249  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5250  */
5251 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5252                                              struct ocfs2_xattr_bucket *bucket,
5253                                              int xe_off,
5254                                              int len,
5255                                              struct ocfs2_xattr_set_ctxt *ctxt)
5256 {
5257         int ret, offset;
5258         u64 value_blk;
5259         struct ocfs2_xattr_entry *xe;
5260         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5261         size_t blocksize = inode->i_sb->s_blocksize;
5262         struct ocfs2_xattr_value_buf vb = {
5263                 .vb_access = ocfs2_journal_access,
5264         };
5265
5266         xe = &xh->xh_entries[xe_off];
5267
5268         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5269
5270         offset = le16_to_cpu(xe->xe_name_offset) +
5271                  OCFS2_XATTR_SIZE(xe->xe_name_len);
5272
5273         value_blk = offset / blocksize;
5274
5275         /* We don't allow ocfs2_xattr_value to be stored in different block. */
5276         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5277
5278         vb.vb_bh = bucket->bu_bhs[value_blk];
5279         BUG_ON(!vb.vb_bh);
5280
5281         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5282                 (vb.vb_bh->b_data + offset % blocksize);
5283
5284         /*
5285          * From here on out we have to dirty the bucket.  The generic
5286          * value calls only modify one of the bucket's bhs, but we need
5287          * to send the bucket at once.  So if they error, they *could* have
5288          * modified something.  We have to assume they did, and dirty
5289          * the whole bucket.  This leaves us in a consistent state.
5290          */
5291         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5292              xe_off, (unsigned long long)bucket_blkno(bucket), len);
5293         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5294         if (ret) {
5295                 mlog_errno(ret);
5296                 goto out;
5297         }
5298
5299         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5300                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5301         if (ret) {
5302                 mlog_errno(ret);
5303                 goto out;
5304         }
5305
5306         xe->xe_value_size = cpu_to_le64(len);
5307
5308         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5309
5310 out:
5311         return ret;
5312 }
5313
5314 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5315                                   struct buffer_head *root_bh,
5316                                   u64 blkno,
5317                                   u32 cpos,
5318                                   u32 len,
5319                                   void *para)
5320 {
5321         int ret;
5322         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5323         struct inode *tl_inode = osb->osb_tl_inode;
5324         handle_t *handle;
5325         struct ocfs2_xattr_block *xb =
5326                         (struct ocfs2_xattr_block *)root_bh->b_data;
5327         struct ocfs2_alloc_context *meta_ac = NULL;
5328         struct ocfs2_cached_dealloc_ctxt dealloc;
5329         struct ocfs2_extent_tree et;
5330
5331         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5332                                           ocfs2_delete_xattr_in_bucket, para);
5333         if (ret) {
5334                 mlog_errno(ret);
5335                 return ret;
5336         }
5337
5338         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5339
5340         ocfs2_init_dealloc_ctxt(&dealloc);
5341
5342         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5343              cpos, len, (unsigned long long)blkno);
5344
5345         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5346                                                len);
5347
5348         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5349         if (ret) {
5350                 mlog_errno(ret);
5351                 return ret;
5352         }
5353
5354         mutex_lock(&tl_inode->i_mutex);
5355
5356         if (ocfs2_truncate_log_needs_flush(osb)) {
5357                 ret = __ocfs2_flush_truncate_log(osb);
5358                 if (ret < 0) {
5359                         mlog_errno(ret);
5360                         goto out;
5361                 }
5362         }
5363
5364         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5365         if (IS_ERR(handle)) {
5366                 ret = -ENOMEM;
5367                 mlog_errno(ret);
5368                 goto out;
5369         }
5370
5371         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5372                                       OCFS2_JOURNAL_ACCESS_WRITE);
5373         if (ret) {
5374                 mlog_errno(ret);
5375                 goto out_commit;
5376         }
5377
5378         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5379                                   &dealloc);
5380         if (ret) {
5381                 mlog_errno(ret);
5382                 goto out_commit;
5383         }
5384
5385         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5386
5387         ret = ocfs2_journal_dirty(handle, root_bh);
5388         if (ret) {
5389                 mlog_errno(ret);
5390                 goto out_commit;
5391         }
5392
5393         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5394         if (ret)
5395                 mlog_errno(ret);
5396
5397 out_commit:
5398         ocfs2_commit_trans(osb, handle);
5399 out:
5400         ocfs2_schedule_truncate_log_flush(osb, 1);
5401
5402         mutex_unlock(&tl_inode->i_mutex);
5403
5404         if (meta_ac)
5405                 ocfs2_free_alloc_context(meta_ac);
5406
5407         ocfs2_run_deallocs(osb, &dealloc);
5408
5409         return ret;
5410 }
5411
5412 /*
5413  * check whether the xattr bucket is filled up with the same hash value.
5414  * If we want to insert the xattr with the same hash, return -ENOSPC.
5415  * If we want to insert a xattr with different hash value, go ahead
5416  * and ocfs2_divide_xattr_bucket will handle this.
5417  */
5418 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5419                                               struct ocfs2_xattr_bucket *bucket,
5420                                               const char *name)
5421 {
5422         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5423         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5424
5425         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5426                 return 0;
5427
5428         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5429             xh->xh_entries[0].xe_name_hash) {
5430                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5431                      "hash = %u\n",
5432                      (unsigned long long)bucket_blkno(bucket),
5433                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5434                 return -ENOSPC;
5435         }
5436
5437         return 0;
5438 }
5439
5440 /*
5441  * Try to set the entry in the current bucket.  If we fail, the caller
5442  * will handle getting us another bucket.
5443  */
5444 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5445                                         struct ocfs2_xattr_info *xi,
5446                                         struct ocfs2_xattr_search *xs,
5447                                         struct ocfs2_xattr_set_ctxt *ctxt)
5448 {
5449         int ret;
5450         struct ocfs2_xa_loc loc;
5451
5452         mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name);
5453
5454         ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5455                                        xs->not_found ? NULL : xs->here);
5456         ret = ocfs2_xa_set(&loc, xi, ctxt);
5457         if (!ret) {
5458                 xs->here = loc.xl_entry;
5459                 goto out;
5460         }
5461         if (ret != -ENOSPC) {
5462                 mlog_errno(ret);
5463                 goto out;
5464         }
5465
5466         /* Ok, we need space.  Let's try defragmenting the bucket. */
5467         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5468                                         xs->bucket);
5469         if (ret) {
5470                 mlog_errno(ret);
5471                 goto out;
5472         }
5473
5474         ret = ocfs2_xa_set(&loc, xi, ctxt);
5475         if (!ret) {
5476                 xs->here = loc.xl_entry;
5477                 goto out;
5478         }
5479         if (ret != -ENOSPC)
5480                 mlog_errno(ret);
5481
5482
5483 out:
5484         mlog_exit(ret);
5485         return ret;
5486 }
5487
5488 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5489                                              struct ocfs2_xattr_info *xi,
5490                                              struct ocfs2_xattr_search *xs,
5491                                              struct ocfs2_xattr_set_ctxt *ctxt)
5492 {
5493         int ret;
5494
5495         mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5496
5497         ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5498         if (!ret)
5499                 goto out;
5500         if (ret != -ENOSPC) {
5501                 mlog_errno(ret);
5502                 goto out;
5503         }
5504
5505         /* Ack, need more space.  Let's try to get another bucket! */
5506
5507         /*
5508          * We do not allow for overlapping ranges between buckets. And
5509          * the maximum number of collisions we will allow for then is
5510          * one bucket's worth, so check it here whether we need to
5511          * add a new bucket for the insert.
5512          */
5513         ret = ocfs2_check_xattr_bucket_collision(inode,
5514                                                  xs->bucket,
5515                                                  xi->xi_name);
5516         if (ret) {
5517                 mlog_errno(ret);
5518                 goto out;
5519         }
5520
5521         ret = ocfs2_add_new_xattr_bucket(inode,
5522                                          xs->xattr_bh,
5523                                          xs->bucket,
5524                                          ctxt);
5525         if (ret) {
5526                 mlog_errno(ret);
5527                 goto out;
5528         }
5529
5530         /*
5531          * ocfs2_add_new_xattr_bucket() will have updated
5532          * xs->bucket if it moved, but it will not have updated
5533          * any of the other search fields.  Thus, we drop it and
5534          * re-search.  Everything should be cached, so it'll be
5535          * quick.
5536          */
5537         ocfs2_xattr_bucket_relse(xs->bucket);
5538         ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5539                                            xi->xi_name_index,
5540                                            xi->xi_name, xs);
5541         if (ret && ret != -ENODATA)
5542                 goto out;
5543         xs->not_found = ret;
5544
5545         /* Ok, we have a new bucket, let's try again */
5546         ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5547         if (ret && (ret != -ENOSPC))
5548                 mlog_errno(ret);
5549
5550 out:
5551         mlog_exit(ret);
5552         return ret;
5553 }
5554
5555 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5556                                         struct ocfs2_xattr_bucket *bucket,
5557                                         void *para)
5558 {
5559         int ret = 0, ref_credits;
5560         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5561         u16 i;
5562         struct ocfs2_xattr_entry *xe;
5563         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5564         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5565         int credits = ocfs2_remove_extent_credits(osb->sb) +
5566                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5567         struct ocfs2_xattr_value_root *xv;
5568         struct ocfs2_rm_xattr_bucket_para *args =
5569                         (struct ocfs2_rm_xattr_bucket_para *)para;
5570
5571         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5572
5573         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5574                 xe = &xh->xh_entries[i];
5575                 if (ocfs2_xattr_is_local(xe))
5576                         continue;
5577
5578                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5579                                                       i, &xv, NULL);
5580
5581                 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5582                                                          args->ref_ci,
5583                                                          args->ref_root_bh,
5584                                                          &ctxt.meta_ac,
5585                                                          &ref_credits);
5586
5587                 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5588                 if (IS_ERR(ctxt.handle)) {
5589                         ret = PTR_ERR(ctxt.handle);
5590                         mlog_errno(ret);
5591                         break;
5592                 }
5593
5594                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5595                                                         i, 0, &ctxt);
5596
5597                 ocfs2_commit_trans(osb, ctxt.handle);
5598                 if (ctxt.meta_ac) {
5599                         ocfs2_free_alloc_context(ctxt.meta_ac);
5600                         ctxt.meta_ac = NULL;
5601                 }
5602                 if (ret) {
5603                         mlog_errno(ret);
5604                         break;
5605                 }
5606         }
5607
5608         if (ctxt.meta_ac)
5609                 ocfs2_free_alloc_context(ctxt.meta_ac);
5610         ocfs2_schedule_truncate_log_flush(osb, 1);
5611         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5612         return ret;
5613 }
5614
5615 /*
5616  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5617  * or change the extent record flag), we need to recalculate
5618  * the metaecc for the whole bucket. So it is done here.
5619  *
5620  * Note:
5621  * We have to give the extra credits for the caller.
5622  */
5623 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5624                                             handle_t *handle,
5625                                             void *para)
5626 {
5627         int ret;
5628         struct ocfs2_xattr_bucket *bucket =
5629                         (struct ocfs2_xattr_bucket *)para;
5630
5631         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5632                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5633         if (ret) {
5634                 mlog_errno(ret);
5635                 return ret;
5636         }
5637
5638         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5639
5640         return 0;
5641 }
5642
5643 /*
5644  * Special action we need if the xattr value is refcounted.
5645  *
5646  * 1. If the xattr is refcounted, lock the tree.
5647  * 2. CoW the xattr if we are setting the new value and the value
5648  *    will be stored outside.
5649  * 3. In other case, decrease_refcount will work for us, so just
5650  *    lock the refcount tree, calculate the meta and credits is OK.
5651  *
5652  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5653  * currently CoW is a completed transaction, while this function
5654  * will also lock the allocators and let us deadlock. So we will
5655  * CoW the whole xattr value.
5656  */
5657 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5658                                         struct ocfs2_dinode *di,
5659                                         struct ocfs2_xattr_info *xi,
5660                                         struct ocfs2_xattr_search *xis,
5661                                         struct ocfs2_xattr_search *xbs,
5662                                         struct ocfs2_refcount_tree **ref_tree,
5663                                         int *meta_add,
5664                                         int *credits)
5665 {
5666         int ret = 0;
5667         struct ocfs2_xattr_block *xb;
5668         struct ocfs2_xattr_entry *xe;
5669         char *base;
5670         u32 p_cluster, num_clusters;
5671         unsigned int ext_flags;
5672         int name_offset, name_len;
5673         struct ocfs2_xattr_value_buf vb;
5674         struct ocfs2_xattr_bucket *bucket = NULL;
5675         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5676         struct ocfs2_post_refcount refcount;
5677         struct ocfs2_post_refcount *p = NULL;
5678         struct buffer_head *ref_root_bh = NULL;
5679
5680         if (!xis->not_found) {
5681                 xe = xis->here;
5682                 name_offset = le16_to_cpu(xe->xe_name_offset);
5683                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5684                 base = xis->base;
5685                 vb.vb_bh = xis->inode_bh;
5686                 vb.vb_access = ocfs2_journal_access_di;
5687         } else {
5688                 int i, block_off = 0;
5689                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5690                 xe = xbs->here;
5691                 name_offset = le16_to_cpu(xe->xe_name_offset);
5692                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5693                 i = xbs->here - xbs->header->xh_entries;
5694
5695                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5696                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5697                                                         bucket_xh(xbs->bucket),
5698                                                         i, &block_off,
5699                                                         &name_offset);
5700                         if (ret) {
5701                                 mlog_errno(ret);
5702                                 goto out;
5703                         }
5704                         base = bucket_block(xbs->bucket, block_off);
5705                         vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5706                         vb.vb_access = ocfs2_journal_access;
5707
5708                         if (ocfs2_meta_ecc(osb)) {
5709                                 /*create parameters for ocfs2_post_refcount. */
5710                                 bucket = xbs->bucket;
5711                                 refcount.credits = bucket->bu_blocks;
5712                                 refcount.para = bucket;
5713                                 refcount.func =
5714                                         ocfs2_xattr_bucket_post_refcount;
5715                                 p = &refcount;
5716                         }
5717                 } else {
5718                         base = xbs->base;
5719                         vb.vb_bh = xbs->xattr_bh;
5720                         vb.vb_access = ocfs2_journal_access_xb;
5721                 }
5722         }
5723
5724         if (ocfs2_xattr_is_local(xe))
5725                 goto out;
5726
5727         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5728                                 (base + name_offset + name_len);
5729
5730         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5731                                        &num_clusters, &vb.vb_xv->xr_list,
5732                                        &ext_flags);
5733         if (ret) {
5734                 mlog_errno(ret);
5735                 goto out;
5736         }
5737
5738         /*
5739          * We just need to check the 1st extent record, since we always
5740          * CoW the whole xattr. So there shouldn't be a xattr with
5741          * some REFCOUNT extent recs after the 1st one.
5742          */
5743         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5744                 goto out;
5745
5746         ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5747                                        1, ref_tree, &ref_root_bh);
5748         if (ret) {
5749                 mlog_errno(ret);
5750                 goto out;
5751         }
5752
5753         /*
5754          * If we are deleting the xattr or the new size will be stored inside,
5755          * cool, leave it there, the xattr truncate process will remove them
5756          * for us(it still needs the refcount tree lock and the meta, credits).
5757          * And the worse case is that every cluster truncate will split the
5758          * refcount tree, and make the original extent become 3. So we will need
5759          * 2 * cluster more extent recs at most.
5760          */
5761         if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5762
5763                 ret = ocfs2_refcounted_xattr_delete_need(inode,
5764                                                          &(*ref_tree)->rf_ci,
5765                                                          ref_root_bh, vb.vb_xv,
5766                                                          meta_add, credits);
5767                 if (ret)
5768                         mlog_errno(ret);
5769                 goto out;
5770         }
5771
5772         ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5773                                        *ref_tree, ref_root_bh, 0,
5774                                        le32_to_cpu(vb.vb_xv->xr_clusters), p);
5775         if (ret)
5776                 mlog_errno(ret);
5777
5778 out:
5779         brelse(ref_root_bh);
5780         return ret;
5781 }
5782
5783 /*
5784  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5785  * The physical clusters will be added to refcount tree.
5786  */
5787 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5788                                 struct ocfs2_xattr_value_root *xv,
5789                                 struct ocfs2_extent_tree *value_et,
5790                                 struct ocfs2_caching_info *ref_ci,
5791                                 struct buffer_head *ref_root_bh,
5792                                 struct ocfs2_cached_dealloc_ctxt *dealloc,
5793                                 struct ocfs2_post_refcount *refcount)
5794 {
5795         int ret = 0;
5796         u32 clusters = le32_to_cpu(xv->xr_clusters);
5797         u32 cpos, p_cluster, num_clusters;
5798         struct ocfs2_extent_list *el = &xv->xr_list;
5799         unsigned int ext_flags;
5800
5801         cpos = 0;
5802         while (cpos < clusters) {
5803                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5804                                                &num_clusters, el, &ext_flags);
5805
5806                 cpos += num_clusters;
5807                 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5808                         continue;
5809
5810                 BUG_ON(!p_cluster);
5811
5812                 ret = ocfs2_add_refcount_flag(inode, value_et,
5813                                               ref_ci, ref_root_bh,
5814                                               cpos - num_clusters,
5815                                               p_cluster, num_clusters,
5816                                               dealloc, refcount);
5817                 if (ret) {
5818                         mlog_errno(ret);
5819                         break;
5820                 }
5821         }
5822
5823         return ret;
5824 }
5825
5826 /*
5827  * Given a normal ocfs2_xattr_header, refcount all the entries which
5828  * have value stored outside.
5829  * Used for xattrs stored in inode and ocfs2_xattr_block.
5830  */
5831 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5832                                 struct ocfs2_xattr_value_buf *vb,
5833                                 struct ocfs2_xattr_header *header,
5834                                 struct ocfs2_caching_info *ref_ci,
5835                                 struct buffer_head *ref_root_bh,
5836                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5837 {
5838
5839         struct ocfs2_xattr_entry *xe;
5840         struct ocfs2_xattr_value_root *xv;
5841         struct ocfs2_extent_tree et;
5842         int i, ret = 0;
5843
5844         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5845                 xe = &header->xh_entries[i];
5846
5847                 if (ocfs2_xattr_is_local(xe))
5848                         continue;
5849
5850                 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5851                         le16_to_cpu(xe->xe_name_offset) +
5852                         OCFS2_XATTR_SIZE(xe->xe_name_len));
5853
5854                 vb->vb_xv = xv;
5855                 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5856
5857                 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5858                                                         ref_ci, ref_root_bh,
5859                                                         dealloc, NULL);
5860                 if (ret) {
5861                         mlog_errno(ret);
5862                         break;
5863                 }
5864         }
5865
5866         return ret;
5867 }
5868
5869 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5870                                 struct buffer_head *fe_bh,
5871                                 struct ocfs2_caching_info *ref_ci,
5872                                 struct buffer_head *ref_root_bh,
5873                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5874 {
5875         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5876         struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5877                                 (fe_bh->b_data + inode->i_sb->s_blocksize -
5878                                 le16_to_cpu(di->i_xattr_inline_size));
5879         struct ocfs2_xattr_value_buf vb = {
5880                 .vb_bh = fe_bh,
5881                 .vb_access = ocfs2_journal_access_di,
5882         };
5883
5884         return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5885                                                   ref_ci, ref_root_bh, dealloc);
5886 }
5887
5888 struct ocfs2_xattr_tree_value_refcount_para {
5889         struct ocfs2_caching_info *ref_ci;
5890         struct buffer_head *ref_root_bh;
5891         struct ocfs2_cached_dealloc_ctxt *dealloc;
5892 };
5893
5894 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5895                                            struct ocfs2_xattr_bucket *bucket,
5896                                            int offset,
5897                                            struct ocfs2_xattr_value_root **xv,
5898                                            struct buffer_head **bh)
5899 {
5900         int ret, block_off, name_offset;
5901         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5902         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5903         void *base;
5904
5905         ret = ocfs2_xattr_bucket_get_name_value(sb,
5906                                                 bucket_xh(bucket),
5907                                                 offset,
5908                                                 &block_off,
5909                                                 &name_offset);
5910         if (ret) {
5911                 mlog_errno(ret);
5912                 goto out;
5913         }
5914
5915         base = bucket_block(bucket, block_off);
5916
5917         *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
5918                          OCFS2_XATTR_SIZE(xe->xe_name_len));
5919
5920         if (bh)
5921                 *bh = bucket->bu_bhs[block_off];
5922 out:
5923         return ret;
5924 }
5925
5926 /*
5927  * For a given xattr bucket, refcount all the entries which
5928  * have value stored outside.
5929  */
5930 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
5931                                              struct ocfs2_xattr_bucket *bucket,
5932                                              void *para)
5933 {
5934         int i, ret = 0;
5935         struct ocfs2_extent_tree et;
5936         struct ocfs2_xattr_tree_value_refcount_para *ref =
5937                         (struct ocfs2_xattr_tree_value_refcount_para *)para;
5938         struct ocfs2_xattr_header *xh =
5939                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
5940         struct ocfs2_xattr_entry *xe;
5941         struct ocfs2_xattr_value_buf vb = {
5942                 .vb_access = ocfs2_journal_access,
5943         };
5944         struct ocfs2_post_refcount refcount = {
5945                 .credits = bucket->bu_blocks,
5946                 .para = bucket,
5947                 .func = ocfs2_xattr_bucket_post_refcount,
5948         };
5949         struct ocfs2_post_refcount *p = NULL;
5950
5951         /* We only need post_refcount if we support metaecc. */
5952         if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
5953                 p = &refcount;
5954
5955         mlog(0, "refcount bucket %llu, count = %u\n",
5956              (unsigned long long)bucket_blkno(bucket),
5957              le16_to_cpu(xh->xh_count));
5958         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5959                 xe = &xh->xh_entries[i];
5960
5961                 if (ocfs2_xattr_is_local(xe))
5962                         continue;
5963
5964                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
5965                                                       &vb.vb_xv, &vb.vb_bh);
5966                 if (ret) {
5967                         mlog_errno(ret);
5968                         break;
5969                 }
5970
5971                 ocfs2_init_xattr_value_extent_tree(&et,
5972                                                    INODE_CACHE(inode), &vb);
5973
5974                 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
5975                                                         &et, ref->ref_ci,
5976                                                         ref->ref_root_bh,
5977                                                         ref->dealloc, p);
5978                 if (ret) {
5979                         mlog_errno(ret);
5980                         break;
5981                 }
5982         }
5983
5984         return ret;
5985
5986 }
5987
5988 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
5989                                      struct buffer_head *root_bh,
5990                                      u64 blkno, u32 cpos, u32 len, void *para)
5991 {
5992         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
5993                                            ocfs2_xattr_bucket_value_refcount,
5994                                            para);
5995 }
5996
5997 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
5998                                 struct buffer_head *blk_bh,
5999                                 struct ocfs2_caching_info *ref_ci,
6000                                 struct buffer_head *ref_root_bh,
6001                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6002 {
6003         int ret = 0;
6004         struct ocfs2_xattr_block *xb =
6005                                 (struct ocfs2_xattr_block *)blk_bh->b_data;
6006
6007         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6008                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6009                 struct ocfs2_xattr_value_buf vb = {
6010                         .vb_bh = blk_bh,
6011                         .vb_access = ocfs2_journal_access_xb,
6012                 };
6013
6014                 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6015                                                          ref_ci, ref_root_bh,
6016                                                          dealloc);
6017         } else {
6018                 struct ocfs2_xattr_tree_value_refcount_para para = {
6019                         .ref_ci = ref_ci,
6020                         .ref_root_bh = ref_root_bh,
6021                         .dealloc = dealloc,
6022                 };
6023
6024                 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6025                                                 ocfs2_refcount_xattr_tree_rec,
6026                                                 &para);
6027         }
6028
6029         return ret;
6030 }
6031
6032 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6033                                      struct buffer_head *fe_bh,
6034                                      struct ocfs2_caching_info *ref_ci,
6035                                      struct buffer_head *ref_root_bh,
6036                                      struct ocfs2_cached_dealloc_ctxt *dealloc)
6037 {
6038         int ret = 0;
6039         struct ocfs2_inode_info *oi = OCFS2_I(inode);
6040         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6041         struct buffer_head *blk_bh = NULL;
6042
6043         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6044                 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6045                                                          ref_ci, ref_root_bh,
6046                                                          dealloc);
6047                 if (ret) {
6048                         mlog_errno(ret);
6049                         goto out;
6050                 }
6051         }
6052
6053         if (!di->i_xattr_loc)
6054                 goto out;
6055
6056         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6057                                      &blk_bh);
6058         if (ret < 0) {
6059                 mlog_errno(ret);
6060                 goto out;
6061         }
6062
6063         ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6064                                                 ref_root_bh, dealloc);
6065         if (ret)
6066                 mlog_errno(ret);
6067
6068         brelse(blk_bh);
6069 out:
6070
6071         return ret;
6072 }
6073
6074 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6075 /*
6076  * Store the information we need in xattr reflink.
6077  * old_bh and new_bh are inode bh for the old and new inode.
6078  */
6079 struct ocfs2_xattr_reflink {
6080         struct inode *old_inode;
6081         struct inode *new_inode;
6082         struct buffer_head *old_bh;
6083         struct buffer_head *new_bh;
6084         struct ocfs2_caching_info *ref_ci;
6085         struct buffer_head *ref_root_bh;
6086         struct ocfs2_cached_dealloc_ctxt *dealloc;
6087         should_xattr_reflinked *xattr_reflinked;
6088 };
6089
6090 /*
6091  * Given a xattr header and xe offset,
6092  * return the proper xv and the corresponding bh.
6093  * xattr in inode, block and xattr tree have different implementaions.
6094  */
6095 typedef int (get_xattr_value_root)(struct super_block *sb,
6096                                    struct buffer_head *bh,
6097                                    struct ocfs2_xattr_header *xh,
6098                                    int offset,
6099                                    struct ocfs2_xattr_value_root **xv,
6100                                    struct buffer_head **ret_bh,
6101                                    void *para);
6102
6103 /*
6104  * Calculate all the xattr value root metadata stored in this xattr header and
6105  * credits we need if we create them from the scratch.
6106  * We use get_xattr_value_root so that all types of xattr container can use it.
6107  */
6108 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6109                                              struct buffer_head *bh,
6110                                              struct ocfs2_xattr_header *xh,
6111                                              int *metas, int *credits,
6112                                              int *num_recs,
6113                                              get_xattr_value_root *func,
6114                                              void *para)
6115 {
6116         int i, ret = 0;
6117         struct ocfs2_xattr_value_root *xv;
6118         struct ocfs2_xattr_entry *xe;
6119
6120         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6121                 xe = &xh->xh_entries[i];
6122                 if (ocfs2_xattr_is_local(xe))
6123                         continue;
6124
6125                 ret = func(sb, bh, xh, i, &xv, NULL, para);
6126                 if (ret) {
6127                         mlog_errno(ret);
6128                         break;
6129                 }
6130
6131                 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6132                           le16_to_cpu(xv->xr_list.l_next_free_rec);
6133
6134                 *credits += ocfs2_calc_extend_credits(sb,
6135                                                 &def_xv.xv.xr_list,
6136                                                 le32_to_cpu(xv->xr_clusters));
6137
6138                 /*
6139                  * If the value is a tree with depth > 1, We don't go deep
6140                  * to the extent block, so just calculate a maximum record num.
6141                  */
6142                 if (!xv->xr_list.l_tree_depth)
6143                         *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6144                 else
6145                         *num_recs += ocfs2_clusters_for_bytes(sb,
6146                                                               XATTR_SIZE_MAX);
6147         }
6148
6149         return ret;
6150 }
6151
6152 /* Used by xattr inode and block to return the right xv and buffer_head. */
6153 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6154                                       struct buffer_head *bh,
6155                                       struct ocfs2_xattr_header *xh,
6156                                       int offset,
6157                                       struct ocfs2_xattr_value_root **xv,
6158                                       struct buffer_head **ret_bh,
6159                                       void *para)
6160 {
6161         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6162
6163         *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6164                 le16_to_cpu(xe->xe_name_offset) +
6165                 OCFS2_XATTR_SIZE(xe->xe_name_len));
6166
6167         if (ret_bh)
6168                 *ret_bh = bh;
6169
6170         return 0;
6171 }
6172
6173 /*
6174  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6175  * It is only used for inline xattr and xattr block.
6176  */
6177 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6178                                         struct ocfs2_xattr_header *xh,
6179                                         struct buffer_head *ref_root_bh,
6180                                         int *credits,
6181                                         struct ocfs2_alloc_context **meta_ac)
6182 {
6183         int ret, meta_add = 0, num_recs = 0;
6184         struct ocfs2_refcount_block *rb =
6185                         (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6186
6187         *credits = 0;
6188
6189         ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6190                                                 &meta_add, credits, &num_recs,
6191                                                 ocfs2_get_xattr_value_root,
6192                                                 NULL);
6193         if (ret) {
6194                 mlog_errno(ret);
6195                 goto out;
6196         }
6197
6198         /*
6199          * We need to add/modify num_recs in refcount tree, so just calculate
6200          * an approximate number we need for refcount tree change.
6201          * Sometimes we need to split the tree, and after split,  half recs
6202          * will be moved to the new block, and a new block can only provide
6203          * half number of recs. So we multiple new blocks by 2.
6204          */
6205         num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6206         meta_add += num_recs;
6207         *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6208         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6209                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6210                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6211         else
6212                 *credits += 1;
6213
6214         ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6215         if (ret)
6216                 mlog_errno(ret);
6217
6218 out:
6219         return ret;
6220 }
6221
6222 /*
6223  * Given a xattr header, reflink all the xattrs in this container.
6224  * It can be used for inode, block and bucket.
6225  *
6226  * NOTE:
6227  * Before we call this function, the caller has memcpy the xattr in
6228  * old_xh to the new_xh.
6229  *
6230  * If args.xattr_reflinked is set, call it to decide whether the xe should
6231  * be reflinked or not. If not, remove it from the new xattr header.
6232  */
6233 static int ocfs2_reflink_xattr_header(handle_t *handle,
6234                                       struct ocfs2_xattr_reflink *args,
6235                                       struct buffer_head *old_bh,
6236                                       struct ocfs2_xattr_header *xh,
6237                                       struct buffer_head *new_bh,
6238                                       struct ocfs2_xattr_header *new_xh,
6239                                       struct ocfs2_xattr_value_buf *vb,
6240                                       struct ocfs2_alloc_context *meta_ac,
6241                                       get_xattr_value_root *func,
6242                                       void *para)
6243 {
6244         int ret = 0, i, j;
6245         struct super_block *sb = args->old_inode->i_sb;
6246         struct buffer_head *value_bh;
6247         struct ocfs2_xattr_entry *xe, *last;
6248         struct ocfs2_xattr_value_root *xv, *new_xv;
6249         struct ocfs2_extent_tree data_et;
6250         u32 clusters, cpos, p_cluster, num_clusters;
6251         unsigned int ext_flags = 0;
6252
6253         mlog(0, "reflink xattr in container %llu, count = %u\n",
6254              (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6255
6256         last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6257         for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6258                 xe = &xh->xh_entries[i];
6259
6260                 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6261                         xe = &new_xh->xh_entries[j];
6262
6263                         le16_add_cpu(&new_xh->xh_count, -1);
6264                         if (new_xh->xh_count) {
6265                                 memmove(xe, xe + 1,
6266                                         (void *)last - (void *)xe);
6267                                 memset(last, 0,
6268                                        sizeof(struct ocfs2_xattr_entry));
6269                         }
6270
6271                         /*
6272                          * We don't want j to increase in the next round since
6273                          * it is already moved ahead.
6274                          */
6275                         j--;
6276                         continue;
6277                 }
6278
6279                 if (ocfs2_xattr_is_local(xe))
6280                         continue;
6281
6282                 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6283                 if (ret) {
6284                         mlog_errno(ret);
6285                         break;
6286                 }
6287
6288                 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6289                 if (ret) {
6290                         mlog_errno(ret);
6291                         break;
6292                 }
6293
6294                 /*
6295                  * For the xattr which has l_tree_depth = 0, all the extent
6296                  * recs have already be copied to the new xh with the
6297                  * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6298                  * increase the refount count int the refcount tree.
6299                  *
6300                  * For the xattr which has l_tree_depth > 0, we need
6301                  * to initialize it to the empty default value root,
6302                  * and then insert the extents one by one.
6303                  */
6304                 if (xv->xr_list.l_tree_depth) {
6305                         memcpy(new_xv, &def_xv, sizeof(def_xv));
6306                         vb->vb_xv = new_xv;
6307                         vb->vb_bh = value_bh;
6308                         ocfs2_init_xattr_value_extent_tree(&data_et,
6309                                         INODE_CACHE(args->new_inode), vb);
6310                 }
6311
6312                 clusters = le32_to_cpu(xv->xr_clusters);
6313                 cpos = 0;
6314                 while (cpos < clusters) {
6315                         ret = ocfs2_xattr_get_clusters(args->old_inode,
6316                                                        cpos,
6317                                                        &p_cluster,
6318                                                        &num_clusters,
6319                                                        &xv->xr_list,
6320                                                        &ext_flags);
6321                         if (ret) {
6322                                 mlog_errno(ret);
6323                                 goto out;
6324                         }
6325
6326                         BUG_ON(!p_cluster);
6327
6328                         if (xv->xr_list.l_tree_depth) {
6329                                 ret = ocfs2_insert_extent(handle,
6330                                                 &data_et, cpos,
6331                                                 ocfs2_clusters_to_blocks(
6332                                                         args->old_inode->i_sb,
6333                                                         p_cluster),
6334                                                 num_clusters, ext_flags,
6335                                                 meta_ac);
6336                                 if (ret) {
6337                                         mlog_errno(ret);
6338                                         goto out;
6339                                 }
6340                         }
6341
6342                         ret = ocfs2_increase_refcount(handle, args->ref_ci,
6343                                                       args->ref_root_bh,
6344                                                       p_cluster, num_clusters,
6345                                                       meta_ac, args->dealloc);
6346                         if (ret) {
6347                                 mlog_errno(ret);
6348                                 goto out;
6349                         }
6350
6351                         cpos += num_clusters;
6352                 }
6353         }
6354
6355 out:
6356         return ret;
6357 }
6358
6359 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6360 {
6361         int ret = 0, credits = 0;
6362         handle_t *handle;
6363         struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6364         struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6365         int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6366         int header_off = osb->sb->s_blocksize - inline_size;
6367         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6368                                         (args->old_bh->b_data + header_off);
6369         struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6370                                         (args->new_bh->b_data + header_off);
6371         struct ocfs2_alloc_context *meta_ac = NULL;
6372         struct ocfs2_inode_info *new_oi;
6373         struct ocfs2_dinode *new_di;
6374         struct ocfs2_xattr_value_buf vb = {
6375                 .vb_bh = args->new_bh,
6376                 .vb_access = ocfs2_journal_access_di,
6377         };
6378
6379         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6380                                                   &credits, &meta_ac);
6381         if (ret) {
6382                 mlog_errno(ret);
6383                 goto out;
6384         }
6385
6386         handle = ocfs2_start_trans(osb, credits);
6387         if (IS_ERR(handle)) {
6388                 ret = PTR_ERR(handle);
6389                 mlog_errno(ret);
6390                 goto out;
6391         }
6392
6393         ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6394                                       args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6395         if (ret) {
6396                 mlog_errno(ret);
6397                 goto out_commit;
6398         }
6399
6400         memcpy(args->new_bh->b_data + header_off,
6401                args->old_bh->b_data + header_off, inline_size);
6402
6403         new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6404         new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6405
6406         ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6407                                          args->new_bh, new_xh, &vb, meta_ac,
6408                                          ocfs2_get_xattr_value_root, NULL);
6409         if (ret) {
6410                 mlog_errno(ret);
6411                 goto out_commit;
6412         }
6413
6414         new_oi = OCFS2_I(args->new_inode);
6415         spin_lock(&new_oi->ip_lock);
6416         new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6417         new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6418         spin_unlock(&new_oi->ip_lock);
6419
6420         ocfs2_journal_dirty(handle, args->new_bh);
6421
6422 out_commit:
6423         ocfs2_commit_trans(osb, handle);
6424
6425 out:
6426         if (meta_ac)
6427                 ocfs2_free_alloc_context(meta_ac);
6428         return ret;
6429 }
6430
6431 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6432                                           struct buffer_head *fe_bh,
6433                                           struct buffer_head **ret_bh,
6434                                           int indexed)
6435 {
6436         int ret;
6437         handle_t *handle;
6438         struct ocfs2_alloc_context *meta_ac;
6439         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6440
6441         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6442         if (ret < 0) {
6443                 mlog_errno(ret);
6444                 return ret;
6445         }
6446
6447         handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6448         if (IS_ERR(handle)) {
6449                 ret = PTR_ERR(handle);
6450                 mlog_errno(ret);
6451                 goto out;
6452         }
6453
6454         mlog(0, "create new xattr block for inode %llu, index = %d\n",
6455              (unsigned long long)fe_bh->b_blocknr, indexed);
6456         ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6457                                        meta_ac, ret_bh, indexed);
6458         if (ret)
6459                 mlog_errno(ret);
6460
6461         ocfs2_commit_trans(osb, handle);
6462 out:
6463         ocfs2_free_alloc_context(meta_ac);
6464         return ret;
6465 }
6466
6467 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6468                                      struct buffer_head *blk_bh,
6469                                      struct buffer_head *new_blk_bh)
6470 {
6471         int ret = 0, credits = 0;
6472         handle_t *handle;
6473         struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6474         struct ocfs2_dinode *new_di;
6475         struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6476         int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6477         struct ocfs2_xattr_block *xb =
6478                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6479         struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6480         struct ocfs2_xattr_block *new_xb =
6481                         (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6482         struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6483         struct ocfs2_alloc_context *meta_ac;
6484         struct ocfs2_xattr_value_buf vb = {
6485                 .vb_bh = new_blk_bh,
6486                 .vb_access = ocfs2_journal_access_xb,
6487         };
6488
6489         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6490                                                   &credits, &meta_ac);
6491         if (ret) {
6492                 mlog_errno(ret);
6493                 return ret;
6494         }
6495
6496         /* One more credits in case we need to add xattr flags in new inode. */
6497         handle = ocfs2_start_trans(osb, credits + 1);
6498         if (IS_ERR(handle)) {
6499                 ret = PTR_ERR(handle);
6500                 mlog_errno(ret);
6501                 goto out;
6502         }
6503
6504         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6505                 ret = ocfs2_journal_access_di(handle,
6506                                               INODE_CACHE(args->new_inode),
6507                                               args->new_bh,
6508                                               OCFS2_JOURNAL_ACCESS_WRITE);
6509                 if (ret) {
6510                         mlog_errno(ret);
6511                         goto out_commit;
6512                 }
6513         }
6514
6515         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6516                                       new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6517         if (ret) {
6518                 mlog_errno(ret);
6519                 goto out_commit;
6520         }
6521
6522         memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6523                osb->sb->s_blocksize - header_off);
6524
6525         ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6526                                          new_blk_bh, new_xh, &vb, meta_ac,
6527                                          ocfs2_get_xattr_value_root, NULL);
6528         if (ret) {
6529                 mlog_errno(ret);
6530                 goto out_commit;
6531         }
6532
6533         ocfs2_journal_dirty(handle, new_blk_bh);
6534
6535         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6536                 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6537                 spin_lock(&new_oi->ip_lock);
6538                 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6539                 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6540                 spin_unlock(&new_oi->ip_lock);
6541
6542                 ocfs2_journal_dirty(handle, args->new_bh);
6543         }
6544
6545 out_commit:
6546         ocfs2_commit_trans(osb, handle);
6547
6548 out:
6549         ocfs2_free_alloc_context(meta_ac);
6550         return ret;
6551 }
6552
6553 struct ocfs2_reflink_xattr_tree_args {
6554         struct ocfs2_xattr_reflink *reflink;
6555         struct buffer_head *old_blk_bh;
6556         struct buffer_head *new_blk_bh;
6557         struct ocfs2_xattr_bucket *old_bucket;
6558         struct ocfs2_xattr_bucket *new_bucket;
6559 };
6560
6561 /*
6562  * NOTE:
6563  * We have to handle the case that both old bucket and new bucket
6564  * will call this function to get the right ret_bh.
6565  * So The caller must give us the right bh.
6566  */
6567 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6568                                         struct buffer_head *bh,
6569                                         struct ocfs2_xattr_header *xh,
6570                                         int offset,
6571                                         struct ocfs2_xattr_value_root **xv,
6572                                         struct buffer_head **ret_bh,
6573                                         void *para)
6574 {
6575         struct ocfs2_reflink_xattr_tree_args *args =
6576                         (struct ocfs2_reflink_xattr_tree_args *)para;
6577         struct ocfs2_xattr_bucket *bucket;
6578
6579         if (bh == args->old_bucket->bu_bhs[0])
6580                 bucket = args->old_bucket;
6581         else
6582                 bucket = args->new_bucket;
6583
6584         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6585                                                xv, ret_bh);
6586 }
6587
6588 struct ocfs2_value_tree_metas {
6589         int num_metas;
6590         int credits;
6591         int num_recs;
6592 };
6593
6594 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6595                                         struct buffer_head *bh,
6596                                         struct ocfs2_xattr_header *xh,
6597                                         int offset,
6598                                         struct ocfs2_xattr_value_root **xv,
6599                                         struct buffer_head **ret_bh,
6600                                         void *para)
6601 {
6602         struct ocfs2_xattr_bucket *bucket =
6603                                 (struct ocfs2_xattr_bucket *)para;
6604
6605         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6606                                                xv, ret_bh);
6607 }
6608
6609 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6610                                       struct ocfs2_xattr_bucket *bucket,
6611                                       void *para)
6612 {
6613         struct ocfs2_value_tree_metas *metas =
6614                         (struct ocfs2_value_tree_metas *)para;
6615         struct ocfs2_xattr_header *xh =
6616                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6617
6618         /* Add the credits for this bucket first. */
6619         metas->credits += bucket->bu_blocks;
6620         return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6621                                         xh, &metas->num_metas,
6622                                         &metas->credits, &metas->num_recs,
6623                                         ocfs2_value_tree_metas_in_bucket,
6624                                         bucket);
6625 }
6626
6627 /*
6628  * Given a xattr extent rec starting from blkno and having len clusters,
6629  * iterate all the buckets calculate how much metadata we need for reflinking
6630  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6631  */
6632 static int ocfs2_lock_reflink_xattr_rec_allocators(
6633                                 struct ocfs2_reflink_xattr_tree_args *args,
6634                                 struct ocfs2_extent_tree *xt_et,
6635                                 u64 blkno, u32 len, int *credits,
6636                                 struct ocfs2_alloc_context **meta_ac,
6637                                 struct ocfs2_alloc_context **data_ac)
6638 {
6639         int ret, num_free_extents;
6640         struct ocfs2_value_tree_metas metas;
6641         struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6642         struct ocfs2_refcount_block *rb;
6643
6644         memset(&metas, 0, sizeof(metas));
6645
6646         ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6647                                           ocfs2_calc_value_tree_metas, &metas);
6648         if (ret) {
6649                 mlog_errno(ret);
6650                 goto out;
6651         }
6652
6653         *credits = metas.credits;
6654
6655         /*
6656          * Calculate we need for refcount tree change.
6657          *
6658          * We need to add/modify num_recs in refcount tree, so just calculate
6659          * an approximate number we need for refcount tree change.
6660          * Sometimes we need to split the tree, and after split,  half recs
6661          * will be moved to the new block, and a new block can only provide
6662          * half number of recs. So we multiple new blocks by 2.
6663          * In the end, we have to add credits for modifying the already
6664          * existed refcount block.
6665          */
6666         rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6667         metas.num_recs =
6668                 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6669                  ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6670         metas.num_metas += metas.num_recs;
6671         *credits += metas.num_recs +
6672                     metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6673         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6674                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6675                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6676         else
6677                 *credits += 1;
6678
6679         /* count in the xattr tree change. */
6680         num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6681         if (num_free_extents < 0) {
6682                 ret = num_free_extents;
6683                 mlog_errno(ret);
6684                 goto out;
6685         }
6686
6687         if (num_free_extents < len)
6688                 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6689
6690         *credits += ocfs2_calc_extend_credits(osb->sb,
6691                                               xt_et->et_root_el, len);
6692
6693         if (metas.num_metas) {
6694                 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6695                                                         meta_ac);
6696                 if (ret) {
6697                         mlog_errno(ret);
6698                         goto out;
6699                 }
6700         }
6701
6702         if (len) {
6703                 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6704                 if (ret)
6705                         mlog_errno(ret);
6706         }
6707 out:
6708         if (ret) {
6709                 if (*meta_ac) {
6710                         ocfs2_free_alloc_context(*meta_ac);
6711                         meta_ac = NULL;
6712                 }
6713         }
6714
6715         return ret;
6716 }
6717
6718 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6719                                 u64 blkno, u64 new_blkno, u32 clusters,
6720                                 struct ocfs2_alloc_context *meta_ac,
6721                                 struct ocfs2_alloc_context *data_ac,
6722                                 struct ocfs2_reflink_xattr_tree_args *args)
6723 {
6724         int i, j, ret = 0;
6725         struct super_block *sb = args->reflink->old_inode->i_sb;
6726         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6727         u32 num_buckets = clusters * bpc;
6728         int bpb = args->old_bucket->bu_blocks;
6729         struct ocfs2_xattr_value_buf vb = {
6730                 .vb_access = ocfs2_journal_access,
6731         };
6732
6733         for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6734                 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6735                 if (ret) {
6736                         mlog_errno(ret);
6737                         break;
6738                 }
6739
6740                 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6741                 if (ret) {
6742                         mlog_errno(ret);
6743                         break;
6744                 }
6745
6746                 /*
6747                  * The real bucket num in this series of blocks is stored
6748                  * in the 1st bucket.
6749                  */
6750                 if (i == 0)
6751                         num_buckets = le16_to_cpu(
6752                                 bucket_xh(args->old_bucket)->xh_num_buckets);
6753
6754                 ret = ocfs2_xattr_bucket_journal_access(handle,
6755                                                 args->new_bucket,
6756                                                 OCFS2_JOURNAL_ACCESS_CREATE);
6757                 if (ret) {
6758                         mlog_errno(ret);
6759                         break;
6760                 }
6761
6762                 for (j = 0; j < bpb; j++)
6763                         memcpy(bucket_block(args->new_bucket, j),
6764                                bucket_block(args->old_bucket, j),
6765                                sb->s_blocksize);
6766
6767                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6768
6769                 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6770                                         args->old_bucket->bu_bhs[0],
6771                                         bucket_xh(args->old_bucket),
6772                                         args->new_bucket->bu_bhs[0],
6773                                         bucket_xh(args->new_bucket),
6774                                         &vb, meta_ac,
6775                                         ocfs2_get_reflink_xattr_value_root,
6776                                         args);
6777                 if (ret) {
6778                         mlog_errno(ret);
6779                         break;
6780                 }
6781
6782                 /*
6783                  * Re-access and dirty the bucket to calculate metaecc.
6784                  * Because we may extend the transaction in reflink_xattr_header
6785                  * which will let the already accessed block gone.
6786                  */
6787                 ret = ocfs2_xattr_bucket_journal_access(handle,
6788                                                 args->new_bucket,
6789                                                 OCFS2_JOURNAL_ACCESS_WRITE);
6790                 if (ret) {
6791                         mlog_errno(ret);
6792                         break;
6793                 }
6794
6795                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6796                 ocfs2_xattr_bucket_relse(args->old_bucket);
6797                 ocfs2_xattr_bucket_relse(args->new_bucket);
6798         }
6799
6800         ocfs2_xattr_bucket_relse(args->old_bucket);
6801         ocfs2_xattr_bucket_relse(args->new_bucket);
6802         return ret;
6803 }
6804 /*
6805  * Create the same xattr extent record in the new inode's xattr tree.
6806  */
6807 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6808                                    struct buffer_head *root_bh,
6809                                    u64 blkno,
6810                                    u32 cpos,
6811                                    u32 len,
6812                                    void *para)
6813 {
6814         int ret, credits = 0;
6815         u32 p_cluster, num_clusters;
6816         u64 new_blkno;
6817         handle_t *handle;
6818         struct ocfs2_reflink_xattr_tree_args *args =
6819                         (struct ocfs2_reflink_xattr_tree_args *)para;
6820         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6821         struct ocfs2_alloc_context *meta_ac = NULL;
6822         struct ocfs2_alloc_context *data_ac = NULL;
6823         struct ocfs2_extent_tree et;
6824
6825         ocfs2_init_xattr_tree_extent_tree(&et,
6826                                           INODE_CACHE(args->reflink->new_inode),
6827                                           args->new_blk_bh);
6828
6829         ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6830                                                       len, &credits,
6831                                                       &meta_ac, &data_ac);
6832         if (ret) {
6833                 mlog_errno(ret);
6834                 goto out;
6835         }
6836
6837         handle = ocfs2_start_trans(osb, credits);
6838         if (IS_ERR(handle)) {
6839                 ret = PTR_ERR(handle);
6840                 mlog_errno(ret);
6841                 goto out;
6842         }
6843
6844         ret = ocfs2_claim_clusters(osb, handle, data_ac,
6845                                    len, &p_cluster, &num_clusters);
6846         if (ret) {
6847                 mlog_errno(ret);
6848                 goto out_commit;
6849         }
6850
6851         new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6852
6853         mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6854              (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6855         ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6856                                           meta_ac, data_ac, args);
6857         if (ret) {
6858                 mlog_errno(ret);
6859                 goto out_commit;
6860         }
6861
6862         mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6863              (unsigned long long)new_blkno, len, cpos);
6864         ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6865                                   len, 0, meta_ac);
6866         if (ret)
6867                 mlog_errno(ret);
6868
6869 out_commit:
6870         ocfs2_commit_trans(osb, handle);
6871
6872 out:
6873         if (meta_ac)
6874                 ocfs2_free_alloc_context(meta_ac);
6875         if (data_ac)
6876                 ocfs2_free_alloc_context(data_ac);
6877         return ret;
6878 }
6879
6880 /*
6881  * Create reflinked xattr buckets.
6882  * We will add bucket one by one, and refcount all the xattrs in the bucket
6883  * if they are stored outside.
6884  */
6885 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6886                                     struct buffer_head *blk_bh,
6887                                     struct buffer_head *new_blk_bh)
6888 {
6889         int ret;
6890         struct ocfs2_reflink_xattr_tree_args para;
6891
6892         memset(&para, 0, sizeof(para));
6893         para.reflink = args;
6894         para.old_blk_bh = blk_bh;
6895         para.new_blk_bh = new_blk_bh;
6896
6897         para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6898         if (!para.old_bucket) {
6899                 mlog_errno(-ENOMEM);
6900                 return -ENOMEM;
6901         }
6902
6903         para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6904         if (!para.new_bucket) {
6905                 ret = -ENOMEM;
6906                 mlog_errno(ret);
6907                 goto out;
6908         }
6909
6910         ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
6911                                               ocfs2_reflink_xattr_rec,
6912                                               &para);
6913         if (ret)
6914                 mlog_errno(ret);
6915
6916 out:
6917         ocfs2_xattr_bucket_free(para.old_bucket);
6918         ocfs2_xattr_bucket_free(para.new_bucket);
6919         return ret;
6920 }
6921
6922 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
6923                                         struct buffer_head *blk_bh)
6924 {
6925         int ret, indexed = 0;
6926         struct buffer_head *new_blk_bh = NULL;
6927         struct ocfs2_xattr_block *xb =
6928                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6929
6930
6931         if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
6932                 indexed = 1;
6933
6934         ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
6935                                              &new_blk_bh, indexed);
6936         if (ret) {
6937                 mlog_errno(ret);
6938                 goto out;
6939         }
6940
6941         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
6942                 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
6943         else
6944                 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
6945         if (ret)
6946                 mlog_errno(ret);
6947
6948 out:
6949         brelse(new_blk_bh);
6950         return ret;
6951 }
6952
6953 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
6954 {
6955         int type = ocfs2_xattr_get_type(xe);
6956
6957         return type != OCFS2_XATTR_INDEX_SECURITY &&
6958                type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
6959                type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
6960 }
6961
6962 int ocfs2_reflink_xattrs(struct inode *old_inode,
6963                          struct buffer_head *old_bh,
6964                          struct inode *new_inode,
6965                          struct buffer_head *new_bh,
6966                          bool preserve_security)
6967 {
6968         int ret;
6969         struct ocfs2_xattr_reflink args;
6970         struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
6971         struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
6972         struct buffer_head *blk_bh = NULL;
6973         struct ocfs2_cached_dealloc_ctxt dealloc;
6974         struct ocfs2_refcount_tree *ref_tree;
6975         struct buffer_head *ref_root_bh = NULL;
6976
6977         ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6978                                        le64_to_cpu(di->i_refcount_loc),
6979                                        1, &ref_tree, &ref_root_bh);
6980         if (ret) {
6981                 mlog_errno(ret);
6982                 goto out;
6983         }
6984
6985         ocfs2_init_dealloc_ctxt(&dealloc);
6986
6987         args.old_inode = old_inode;
6988         args.new_inode = new_inode;
6989         args.old_bh = old_bh;
6990         args.new_bh = new_bh;
6991         args.ref_ci = &ref_tree->rf_ci;
6992         args.ref_root_bh = ref_root_bh;
6993         args.dealloc = &dealloc;
6994         if (preserve_security)
6995                 args.xattr_reflinked = NULL;
6996         else
6997                 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
6998
6999         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7000                 ret = ocfs2_reflink_xattr_inline(&args);
7001                 if (ret) {
7002                         mlog_errno(ret);
7003                         goto out_unlock;
7004                 }
7005         }
7006
7007         if (!di->i_xattr_loc)
7008                 goto out_unlock;
7009
7010         ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7011                                      &blk_bh);
7012         if (ret < 0) {
7013                 mlog_errno(ret);
7014                 goto out_unlock;
7015         }
7016
7017         ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7018         if (ret)
7019                 mlog_errno(ret);
7020
7021         brelse(blk_bh);
7022
7023 out_unlock:
7024         ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7025                                    ref_tree, 1);
7026         brelse(ref_root_bh);
7027
7028         if (ocfs2_dealloc_has_cluster(&dealloc)) {
7029                 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7030                 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7031         }
7032
7033 out:
7034         return ret;
7035 }
7036
7037 /*
7038  * Initialize security and acl for a already created inode.
7039  * Used for reflink a non-preserve-security file.
7040  *
7041  * It uses common api like ocfs2_xattr_set, so the caller
7042  * must not hold any lock expect i_mutex.
7043  */
7044 int ocfs2_init_security_and_acl(struct inode *dir,
7045                                 struct inode *inode)
7046 {
7047         int ret = 0;
7048         struct buffer_head *dir_bh = NULL;
7049         struct ocfs2_security_xattr_info si = {
7050                 .enable = 1,
7051         };
7052
7053         ret = ocfs2_init_security_get(inode, dir, &si);
7054         if (!ret) {
7055                 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7056                                       si.name, si.value, si.value_len,
7057                                       XATTR_CREATE);
7058                 if (ret) {
7059                         mlog_errno(ret);
7060                         goto leave;
7061                 }
7062         } else if (ret != -EOPNOTSUPP) {
7063                 mlog_errno(ret);
7064                 goto leave;
7065         }
7066
7067         ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7068         if (ret) {
7069                 mlog_errno(ret);
7070                 goto leave;
7071         }
7072
7073         ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7074         if (ret)
7075                 mlog_errno(ret);
7076
7077         ocfs2_inode_unlock(dir, 0);
7078         brelse(dir_bh);
7079 leave:
7080         return ret;
7081 }
7082 /*
7083  * 'security' attributes support
7084  */
7085 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7086                                         size_t list_size, const char *name,
7087                                         size_t name_len, int type)
7088 {
7089         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7090         const size_t total_len = prefix_len + name_len + 1;
7091
7092         if (list && total_len <= list_size) {
7093                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7094                 memcpy(list + prefix_len, name, name_len);
7095                 list[prefix_len + name_len] = '\0';
7096         }
7097         return total_len;
7098 }
7099
7100 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7101                                     void *buffer, size_t size, int type)
7102 {
7103         if (strcmp(name, "") == 0)
7104                 return -EINVAL;
7105         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7106                                name, buffer, size);
7107 }
7108
7109 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7110                 const void *value, size_t size, int flags, int type)
7111 {
7112         if (strcmp(name, "") == 0)
7113                 return -EINVAL;
7114
7115         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7116                                name, value, size, flags);
7117 }
7118
7119 int ocfs2_init_security_get(struct inode *inode,
7120                             struct inode *dir,
7121                             struct ocfs2_security_xattr_info *si)
7122 {
7123         /* check whether ocfs2 support feature xattr */
7124         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7125                 return -EOPNOTSUPP;
7126         return security_inode_init_security(inode, dir, &si->name, &si->value,
7127                                             &si->value_len);
7128 }
7129
7130 int ocfs2_init_security_set(handle_t *handle,
7131                             struct inode *inode,
7132                             struct buffer_head *di_bh,
7133                             struct ocfs2_security_xattr_info *si,
7134                             struct ocfs2_alloc_context *xattr_ac,
7135                             struct ocfs2_alloc_context *data_ac)
7136 {
7137         return ocfs2_xattr_set_handle(handle, inode, di_bh,
7138                                      OCFS2_XATTR_INDEX_SECURITY,
7139                                      si->name, si->value, si->value_len, 0,
7140                                      xattr_ac, data_ac);
7141 }
7142
7143 struct xattr_handler ocfs2_xattr_security_handler = {
7144         .prefix = XATTR_SECURITY_PREFIX,
7145         .list   = ocfs2_xattr_security_list,
7146         .get    = ocfs2_xattr_security_get,
7147         .set    = ocfs2_xattr_security_set,
7148 };
7149
7150 /*
7151  * 'trusted' attributes support
7152  */
7153 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7154                                        size_t list_size, const char *name,
7155                                        size_t name_len, int type)
7156 {
7157         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7158         const size_t total_len = prefix_len + name_len + 1;
7159
7160         if (list && total_len <= list_size) {
7161                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7162                 memcpy(list + prefix_len, name, name_len);
7163                 list[prefix_len + name_len] = '\0';
7164         }
7165         return total_len;
7166 }
7167
7168 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7169                 void *buffer, size_t size, int type)
7170 {
7171         if (strcmp(name, "") == 0)
7172                 return -EINVAL;
7173         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7174                                name, buffer, size);
7175 }
7176
7177 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7178                 const void *value, size_t size, int flags, int type)
7179 {
7180         if (strcmp(name, "") == 0)
7181                 return -EINVAL;
7182
7183         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7184                                name, value, size, flags);
7185 }
7186
7187 struct xattr_handler ocfs2_xattr_trusted_handler = {
7188         .prefix = XATTR_TRUSTED_PREFIX,
7189         .list   = ocfs2_xattr_trusted_list,
7190         .get    = ocfs2_xattr_trusted_get,
7191         .set    = ocfs2_xattr_trusted_set,
7192 };
7193
7194 /*
7195  * 'user' attributes support
7196  */
7197 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7198                                     size_t list_size, const char *name,
7199                                     size_t name_len, int type)
7200 {
7201         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7202         const size_t total_len = prefix_len + name_len + 1;
7203         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7204
7205         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7206                 return 0;
7207
7208         if (list && total_len <= list_size) {
7209                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7210                 memcpy(list + prefix_len, name, name_len);
7211                 list[prefix_len + name_len] = '\0';
7212         }
7213         return total_len;
7214 }
7215
7216 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7217                 void *buffer, size_t size, int type)
7218 {
7219         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7220
7221         if (strcmp(name, "") == 0)
7222                 return -EINVAL;
7223         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7224                 return -EOPNOTSUPP;
7225         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7226                                buffer, size);
7227 }
7228
7229 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7230                 const void *value, size_t size, int flags, int type)
7231 {
7232         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7233
7234         if (strcmp(name, "") == 0)
7235                 return -EINVAL;
7236         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7237                 return -EOPNOTSUPP;
7238
7239         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7240                                name, value, size, flags);
7241 }
7242
7243 struct xattr_handler ocfs2_xattr_user_handler = {
7244         .prefix = XATTR_USER_PREFIX,
7245         .list   = ocfs2_xattr_user_list,
7246         .get    = ocfs2_xattr_user_get,
7247         .set    = ocfs2_xattr_user_set,
7248 };