ocfs2: Free block to the right block group.
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60
61 struct ocfs2_xattr_def_value_root {
62         struct ocfs2_xattr_value_root   xv;
63         struct ocfs2_extent_rec         er;
64 };
65
66 struct ocfs2_xattr_bucket {
67         /* The inode these xattrs are associated with */
68         struct inode *bu_inode;
69
70         /* The actual buffers that make up the bucket */
71         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72
73         /* How many blocks make up one bucket for this filesystem */
74         int bu_blocks;
75 };
76
77 struct ocfs2_xattr_set_ctxt {
78         handle_t *handle;
79         struct ocfs2_alloc_context *meta_ac;
80         struct ocfs2_alloc_context *data_ac;
81         struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83
84 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP  4
87 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
88                                          - sizeof(struct ocfs2_xattr_header) \
89                                          - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
91                                          - sizeof(struct ocfs2_xattr_block) \
92                                          - sizeof(struct ocfs2_xattr_header) \
93                                          - OCFS2_XATTR_HEADER_GAP)
94
95 static struct ocfs2_xattr_def_value_root def_xv = {
96         .xv.xr_list.l_count = cpu_to_le16(1),
97 };
98
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100         &ocfs2_xattr_user_handler,
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103         &ocfs2_xattr_trusted_handler,
104         &ocfs2_xattr_security_handler,
105         NULL
106 };
107
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
115         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
116 };
117
118 struct ocfs2_xattr_info {
119         int             xi_name_index;
120         const char      *xi_name;
121         int             xi_name_len;
122         const void      *xi_value;
123         size_t          xi_value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144         /*
145          * Journal functions
146          */
147         int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
148                                   int type);
149         void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
150
151         /*
152          * Return a pointer to the appropriate buffer in loc->xl_storage
153          * at the given offset from loc->xl_header.
154          */
155         void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
156
157         /* Can we reuse the existing entry for the new value? */
158         int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
159                              struct ocfs2_xattr_info *xi);
160
161         /* How much space is needed for the new value? */
162         int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
163                                struct ocfs2_xattr_info *xi);
164
165         /*
166          * Return the offset of the first name+value pair.  This is
167          * the start of our downward-filling free space.
168          */
169         int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
170
171         /*
172          * Remove the name+value at this location.  Do whatever is
173          * appropriate with the remaining name+value pairs.
174          */
175         void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
176
177         /* Fill xl_entry with a new entry */
178         void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
179
180         /* Add name+value storage to an entry */
181         void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
182
183         /*
184          * Initialize the value buf's access and bh fields for this entry.
185          * ocfs2_xa_fill_value_buf() will handle the xv pointer.
186          */
187         void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
188                                    struct ocfs2_xattr_value_buf *vb);
189 };
190
191 /*
192  * Describes an xattr entry location.  This is a memory structure
193  * tracking the on-disk structure.
194  */
195 struct ocfs2_xa_loc {
196         /* This xattr belongs to this inode */
197         struct inode *xl_inode;
198
199         /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
200         struct ocfs2_xattr_header *xl_header;
201
202         /* Bytes from xl_header to the end of the storage */
203         int xl_size;
204
205         /*
206          * The ocfs2_xattr_entry this location describes.  If this is
207          * NULL, this location describes the on-disk structure where it
208          * would have been.
209          */
210         struct ocfs2_xattr_entry *xl_entry;
211
212         /*
213          * Internal housekeeping
214          */
215
216         /* Buffer(s) containing this entry */
217         void *xl_storage;
218
219         /* Operations on the storage backing this location */
220         const struct ocfs2_xa_loc_operations *xl_ops;
221 };
222
223 /*
224  * Convenience functions to calculate how much space is needed for a
225  * given name+value pair
226  */
227 static int namevalue_size(int name_len, uint64_t value_len)
228 {
229         if (value_len > OCFS2_XATTR_INLINE_SIZE)
230                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
231         else
232                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
233 }
234
235 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
236 {
237         return namevalue_size(xi->xi_name_len, xi->xi_value_len);
238 }
239
240 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
241 {
242         u64 value_len = le64_to_cpu(xe->xe_value_size);
243
244         BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
245                ocfs2_xattr_is_local(xe));
246         return namevalue_size(xe->xe_name_len, value_len);
247 }
248
249
250 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
251                                              struct ocfs2_xattr_header *xh,
252                                              int index,
253                                              int *block_off,
254                                              int *new_offset);
255
256 static int ocfs2_xattr_block_find(struct inode *inode,
257                                   int name_index,
258                                   const char *name,
259                                   struct ocfs2_xattr_search *xs);
260 static int ocfs2_xattr_index_block_find(struct inode *inode,
261                                         struct buffer_head *root_bh,
262                                         int name_index,
263                                         const char *name,
264                                         struct ocfs2_xattr_search *xs);
265
266 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
267                                         struct buffer_head *blk_bh,
268                                         char *buffer,
269                                         size_t buffer_size);
270
271 static int ocfs2_xattr_create_index_block(struct inode *inode,
272                                           struct ocfs2_xattr_search *xs,
273                                           struct ocfs2_xattr_set_ctxt *ctxt);
274
275 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
276                                              struct ocfs2_xattr_info *xi,
277                                              struct ocfs2_xattr_search *xs,
278                                              struct ocfs2_xattr_set_ctxt *ctxt);
279
280 typedef int (xattr_tree_rec_func)(struct inode *inode,
281                                   struct buffer_head *root_bh,
282                                   u64 blkno, u32 cpos, u32 len, void *para);
283 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
284                                            struct buffer_head *root_bh,
285                                            xattr_tree_rec_func *rec_func,
286                                            void *para);
287 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
288                                         struct ocfs2_xattr_bucket *bucket,
289                                         void *para);
290 static int ocfs2_rm_xattr_cluster(struct inode *inode,
291                                   struct buffer_head *root_bh,
292                                   u64 blkno,
293                                   u32 cpos,
294                                   u32 len,
295                                   void *para);
296
297 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
298                                   u64 src_blk, u64 last_blk, u64 to_blk,
299                                   unsigned int start_bucket,
300                                   u32 *first_hash);
301 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
302                                         struct ocfs2_dinode *di,
303                                         struct ocfs2_xattr_info *xi,
304                                         struct ocfs2_xattr_search *xis,
305                                         struct ocfs2_xattr_search *xbs,
306                                         struct ocfs2_refcount_tree **ref_tree,
307                                         int *meta_need,
308                                         int *credits);
309 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
310                                            struct ocfs2_xattr_bucket *bucket,
311                                            int offset,
312                                            struct ocfs2_xattr_value_root **xv,
313                                            struct buffer_head **bh);
314
315 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
316 {
317         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
318 }
319
320 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
321 {
322         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
323 }
324
325 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
326 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
327 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
328
329 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
330 {
331         struct ocfs2_xattr_bucket *bucket;
332         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
333
334         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
335
336         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
337         if (bucket) {
338                 bucket->bu_inode = inode;
339                 bucket->bu_blocks = blks;
340         }
341
342         return bucket;
343 }
344
345 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
346 {
347         int i;
348
349         for (i = 0; i < bucket->bu_blocks; i++) {
350                 brelse(bucket->bu_bhs[i]);
351                 bucket->bu_bhs[i] = NULL;
352         }
353 }
354
355 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
356 {
357         if (bucket) {
358                 ocfs2_xattr_bucket_relse(bucket);
359                 bucket->bu_inode = NULL;
360                 kfree(bucket);
361         }
362 }
363
364 /*
365  * A bucket that has never been written to disk doesn't need to be
366  * read.  We just need the buffer_heads.  Don't call this for
367  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
368  * them fully.
369  */
370 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
371                                    u64 xb_blkno)
372 {
373         int i, rc = 0;
374
375         for (i = 0; i < bucket->bu_blocks; i++) {
376                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
377                                               xb_blkno + i);
378                 if (!bucket->bu_bhs[i]) {
379                         rc = -EIO;
380                         mlog_errno(rc);
381                         break;
382                 }
383
384                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
385                                            bucket->bu_bhs[i]))
386                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
387                                                       bucket->bu_bhs[i]);
388         }
389
390         if (rc)
391                 ocfs2_xattr_bucket_relse(bucket);
392         return rc;
393 }
394
395 /* Read the xattr bucket at xb_blkno */
396 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
397                                    u64 xb_blkno)
398 {
399         int rc;
400
401         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
402                                bucket->bu_blocks, bucket->bu_bhs, 0,
403                                NULL);
404         if (!rc) {
405                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
406                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
407                                                  bucket->bu_bhs,
408                                                  bucket->bu_blocks,
409                                                  &bucket_xh(bucket)->xh_check);
410                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
411                 if (rc)
412                         mlog_errno(rc);
413         }
414
415         if (rc)
416                 ocfs2_xattr_bucket_relse(bucket);
417         return rc;
418 }
419
420 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
421                                              struct ocfs2_xattr_bucket *bucket,
422                                              int type)
423 {
424         int i, rc = 0;
425
426         for (i = 0; i < bucket->bu_blocks; i++) {
427                 rc = ocfs2_journal_access(handle,
428                                           INODE_CACHE(bucket->bu_inode),
429                                           bucket->bu_bhs[i], type);
430                 if (rc) {
431                         mlog_errno(rc);
432                         break;
433                 }
434         }
435
436         return rc;
437 }
438
439 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
440                                              struct ocfs2_xattr_bucket *bucket)
441 {
442         int i;
443
444         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
445         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
446                                    bucket->bu_bhs, bucket->bu_blocks,
447                                    &bucket_xh(bucket)->xh_check);
448         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
449
450         for (i = 0; i < bucket->bu_blocks; i++)
451                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
452 }
453
454 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
455                                          struct ocfs2_xattr_bucket *src)
456 {
457         int i;
458         int blocksize = src->bu_inode->i_sb->s_blocksize;
459
460         BUG_ON(dest->bu_blocks != src->bu_blocks);
461         BUG_ON(dest->bu_inode != src->bu_inode);
462
463         for (i = 0; i < src->bu_blocks; i++) {
464                 memcpy(bucket_block(dest, i), bucket_block(src, i),
465                        blocksize);
466         }
467 }
468
469 static int ocfs2_validate_xattr_block(struct super_block *sb,
470                                       struct buffer_head *bh)
471 {
472         int rc;
473         struct ocfs2_xattr_block *xb =
474                 (struct ocfs2_xattr_block *)bh->b_data;
475
476         mlog(0, "Validating xattr block %llu\n",
477              (unsigned long long)bh->b_blocknr);
478
479         BUG_ON(!buffer_uptodate(bh));
480
481         /*
482          * If the ecc fails, we return the error but otherwise
483          * leave the filesystem running.  We know any error is
484          * local to this block.
485          */
486         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
487         if (rc)
488                 return rc;
489
490         /*
491          * Errors after here are fatal
492          */
493
494         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
495                 ocfs2_error(sb,
496                             "Extended attribute block #%llu has bad "
497                             "signature %.*s",
498                             (unsigned long long)bh->b_blocknr, 7,
499                             xb->xb_signature);
500                 return -EINVAL;
501         }
502
503         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
504                 ocfs2_error(sb,
505                             "Extended attribute block #%llu has an "
506                             "invalid xb_blkno of %llu",
507                             (unsigned long long)bh->b_blocknr,
508                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
509                 return -EINVAL;
510         }
511
512         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
513                 ocfs2_error(sb,
514                             "Extended attribute block #%llu has an invalid "
515                             "xb_fs_generation of #%u",
516                             (unsigned long long)bh->b_blocknr,
517                             le32_to_cpu(xb->xb_fs_generation));
518                 return -EINVAL;
519         }
520
521         return 0;
522 }
523
524 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
525                                   struct buffer_head **bh)
526 {
527         int rc;
528         struct buffer_head *tmp = *bh;
529
530         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
531                               ocfs2_validate_xattr_block);
532
533         /* If ocfs2_read_block() got us a new bh, pass it up. */
534         if (!rc && !*bh)
535                 *bh = tmp;
536
537         return rc;
538 }
539
540 static inline const char *ocfs2_xattr_prefix(int name_index)
541 {
542         struct xattr_handler *handler = NULL;
543
544         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
545                 handler = ocfs2_xattr_handler_map[name_index];
546
547         return handler ? handler->prefix : NULL;
548 }
549
550 static u32 ocfs2_xattr_name_hash(struct inode *inode,
551                                  const char *name,
552                                  int name_len)
553 {
554         /* Get hash value of uuid from super block */
555         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
556         int i;
557
558         /* hash extended attribute name */
559         for (i = 0; i < name_len; i++) {
560                 hash = (hash << OCFS2_HASH_SHIFT) ^
561                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
562                        *name++;
563         }
564
565         return hash;
566 }
567
568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
569 {
570         return namevalue_size(name_len, value_len) +
571                 sizeof(struct ocfs2_xattr_entry);
572 }
573
574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
575 {
576         return namevalue_size_xi(xi) +
577                 sizeof(struct ocfs2_xattr_entry);
578 }
579
580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
581 {
582         return namevalue_size_xe(xe) +
583                 sizeof(struct ocfs2_xattr_entry);
584 }
585
586 int ocfs2_calc_security_init(struct inode *dir,
587                              struct ocfs2_security_xattr_info *si,
588                              int *want_clusters,
589                              int *xattr_credits,
590                              struct ocfs2_alloc_context **xattr_ac)
591 {
592         int ret = 0;
593         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
594         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
595                                                  si->value_len);
596
597         /*
598          * The max space of security xattr taken inline is
599          * 256(name) + 80(value) + 16(entry) = 352 bytes,
600          * So reserve one metadata block for it is ok.
601          */
602         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
603             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
604                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
605                 if (ret) {
606                         mlog_errno(ret);
607                         return ret;
608                 }
609                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
610         }
611
612         /* reserve clusters for xattr value which will be set in B tree*/
613         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
614                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
615                                                             si->value_len);
616
617                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
618                                                            new_clusters);
619                 *want_clusters += new_clusters;
620         }
621         return ret;
622 }
623
624 int ocfs2_calc_xattr_init(struct inode *dir,
625                           struct buffer_head *dir_bh,
626                           int mode,
627                           struct ocfs2_security_xattr_info *si,
628                           int *want_clusters,
629                           int *xattr_credits,
630                           int *want_meta)
631 {
632         int ret = 0;
633         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
634         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
635
636         if (si->enable)
637                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
638                                                      si->value_len);
639
640         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
641                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
642                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
643                                         "", NULL, 0);
644                 if (acl_len > 0) {
645                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
646                         if (S_ISDIR(mode))
647                                 a_size <<= 1;
648                 } else if (acl_len != 0 && acl_len != -ENODATA) {
649                         mlog_errno(ret);
650                         return ret;
651                 }
652         }
653
654         if (!(s_size + a_size))
655                 return ret;
656
657         /*
658          * The max space of security xattr taken inline is
659          * 256(name) + 80(value) + 16(entry) = 352 bytes,
660          * The max space of acl xattr taken inline is
661          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
662          * when blocksize = 512, may reserve one more cluser for
663          * xattr bucket, otherwise reserve one metadata block
664          * for them is ok.
665          * If this is a new directory with inline data,
666          * we choose to reserve the entire inline area for
667          * directory contents and force an external xattr block.
668          */
669         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
670             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
671             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
672                 *want_meta = *want_meta + 1;
673                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
674         }
675
676         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
677             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
678                 *want_clusters += 1;
679                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
680         }
681
682         /*
683          * reserve credits and clusters for xattrs which has large value
684          * and have to be set outside
685          */
686         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
687                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
688                                                         si->value_len);
689                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
690                                                            new_clusters);
691                 *want_clusters += new_clusters;
692         }
693         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
694             acl_len > OCFS2_XATTR_INLINE_SIZE) {
695                 /* for directory, it has DEFAULT and ACCESS two types of acls */
696                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
697                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
698                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
699                                                            new_clusters);
700                 *want_clusters += new_clusters;
701         }
702
703         return ret;
704 }
705
706 static int ocfs2_xattr_extend_allocation(struct inode *inode,
707                                          u32 clusters_to_add,
708                                          struct ocfs2_xattr_value_buf *vb,
709                                          struct ocfs2_xattr_set_ctxt *ctxt)
710 {
711         int status = 0;
712         handle_t *handle = ctxt->handle;
713         enum ocfs2_alloc_restarted why;
714         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
715         struct ocfs2_extent_tree et;
716
717         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
718
719         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
720
721         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
722                               OCFS2_JOURNAL_ACCESS_WRITE);
723         if (status < 0) {
724                 mlog_errno(status);
725                 goto leave;
726         }
727
728         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
729         status = ocfs2_add_clusters_in_btree(handle,
730                                              &et,
731                                              &logical_start,
732                                              clusters_to_add,
733                                              0,
734                                              ctxt->data_ac,
735                                              ctxt->meta_ac,
736                                              &why);
737         if (status < 0) {
738                 mlog_errno(status);
739                 goto leave;
740         }
741
742         ocfs2_journal_dirty(handle, vb->vb_bh);
743
744         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
745
746         /*
747          * We should have already allocated enough space before the transaction,
748          * so no need to restart.
749          */
750         BUG_ON(why != RESTART_NONE || clusters_to_add);
751
752 leave:
753
754         return status;
755 }
756
757 static int __ocfs2_remove_xattr_range(struct inode *inode,
758                                       struct ocfs2_xattr_value_buf *vb,
759                                       u32 cpos, u32 phys_cpos, u32 len,
760                                       unsigned int ext_flags,
761                                       struct ocfs2_xattr_set_ctxt *ctxt)
762 {
763         int ret;
764         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
765         handle_t *handle = ctxt->handle;
766         struct ocfs2_extent_tree et;
767
768         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
769
770         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
771                             OCFS2_JOURNAL_ACCESS_WRITE);
772         if (ret) {
773                 mlog_errno(ret);
774                 goto out;
775         }
776
777         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
778                                   &ctxt->dealloc);
779         if (ret) {
780                 mlog_errno(ret);
781                 goto out;
782         }
783
784         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
785         ocfs2_journal_dirty(handle, vb->vb_bh);
786
787         if (ext_flags & OCFS2_EXT_REFCOUNTED)
788                 ret = ocfs2_decrease_refcount(inode, handle,
789                                         ocfs2_blocks_to_clusters(inode->i_sb,
790                                                                  phys_blkno),
791                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
792         else
793                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
794                                                   phys_blkno, len);
795         if (ret)
796                 mlog_errno(ret);
797
798 out:
799         return ret;
800 }
801
802 static int ocfs2_xattr_shrink_size(struct inode *inode,
803                                    u32 old_clusters,
804                                    u32 new_clusters,
805                                    struct ocfs2_xattr_value_buf *vb,
806                                    struct ocfs2_xattr_set_ctxt *ctxt)
807 {
808         int ret = 0;
809         unsigned int ext_flags;
810         u32 trunc_len, cpos, phys_cpos, alloc_size;
811         u64 block;
812
813         if (old_clusters <= new_clusters)
814                 return 0;
815
816         cpos = new_clusters;
817         trunc_len = old_clusters - new_clusters;
818         while (trunc_len) {
819                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
820                                                &alloc_size,
821                                                &vb->vb_xv->xr_list, &ext_flags);
822                 if (ret) {
823                         mlog_errno(ret);
824                         goto out;
825                 }
826
827                 if (alloc_size > trunc_len)
828                         alloc_size = trunc_len;
829
830                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
831                                                  phys_cpos, alloc_size,
832                                                  ext_flags, ctxt);
833                 if (ret) {
834                         mlog_errno(ret);
835                         goto out;
836                 }
837
838                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
839                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
840                                                        block, alloc_size);
841                 cpos += alloc_size;
842                 trunc_len -= alloc_size;
843         }
844
845 out:
846         return ret;
847 }
848
849 static int ocfs2_xattr_value_truncate(struct inode *inode,
850                                       struct ocfs2_xattr_value_buf *vb,
851                                       int len,
852                                       struct ocfs2_xattr_set_ctxt *ctxt)
853 {
854         int ret;
855         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
856         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
857
858         if (new_clusters == old_clusters)
859                 return 0;
860
861         if (new_clusters > old_clusters)
862                 ret = ocfs2_xattr_extend_allocation(inode,
863                                                     new_clusters - old_clusters,
864                                                     vb, ctxt);
865         else
866                 ret = ocfs2_xattr_shrink_size(inode,
867                                               old_clusters, new_clusters,
868                                               vb, ctxt);
869
870         return ret;
871 }
872
873 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
874                                   size_t *result, const char *prefix,
875                                   const char *name, int name_len)
876 {
877         char *p = buffer + *result;
878         int prefix_len = strlen(prefix);
879         int total_len = prefix_len + name_len + 1;
880
881         *result += total_len;
882
883         /* we are just looking for how big our buffer needs to be */
884         if (!size)
885                 return 0;
886
887         if (*result > size)
888                 return -ERANGE;
889
890         memcpy(p, prefix, prefix_len);
891         memcpy(p + prefix_len, name, name_len);
892         p[prefix_len + name_len] = '\0';
893
894         return 0;
895 }
896
897 static int ocfs2_xattr_list_entries(struct inode *inode,
898                                     struct ocfs2_xattr_header *header,
899                                     char *buffer, size_t buffer_size)
900 {
901         size_t result = 0;
902         int i, type, ret;
903         const char *prefix, *name;
904
905         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
906                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
907                 type = ocfs2_xattr_get_type(entry);
908                 prefix = ocfs2_xattr_prefix(type);
909
910                 if (prefix) {
911                         name = (const char *)header +
912                                 le16_to_cpu(entry->xe_name_offset);
913
914                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
915                                                      &result, prefix, name,
916                                                      entry->xe_name_len);
917                         if (ret)
918                                 return ret;
919                 }
920         }
921
922         return result;
923 }
924
925 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
926                                          struct ocfs2_dinode *di)
927 {
928         struct ocfs2_xattr_header *xh;
929         int i;
930
931         xh = (struct ocfs2_xattr_header *)
932                  ((void *)di + inode->i_sb->s_blocksize -
933                  le16_to_cpu(di->i_xattr_inline_size));
934
935         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
936                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
937                         return 1;
938
939         return 0;
940 }
941
942 static int ocfs2_xattr_ibody_list(struct inode *inode,
943                                   struct ocfs2_dinode *di,
944                                   char *buffer,
945                                   size_t buffer_size)
946 {
947         struct ocfs2_xattr_header *header = NULL;
948         struct ocfs2_inode_info *oi = OCFS2_I(inode);
949         int ret = 0;
950
951         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
952                 return ret;
953
954         header = (struct ocfs2_xattr_header *)
955                  ((void *)di + inode->i_sb->s_blocksize -
956                  le16_to_cpu(di->i_xattr_inline_size));
957
958         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
959
960         return ret;
961 }
962
963 static int ocfs2_xattr_block_list(struct inode *inode,
964                                   struct ocfs2_dinode *di,
965                                   char *buffer,
966                                   size_t buffer_size)
967 {
968         struct buffer_head *blk_bh = NULL;
969         struct ocfs2_xattr_block *xb;
970         int ret = 0;
971
972         if (!di->i_xattr_loc)
973                 return ret;
974
975         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
976                                      &blk_bh);
977         if (ret < 0) {
978                 mlog_errno(ret);
979                 return ret;
980         }
981
982         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
983         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
984                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
985                 ret = ocfs2_xattr_list_entries(inode, header,
986                                                buffer, buffer_size);
987         } else
988                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
989                                                    buffer, buffer_size);
990
991         brelse(blk_bh);
992
993         return ret;
994 }
995
996 ssize_t ocfs2_listxattr(struct dentry *dentry,
997                         char *buffer,
998                         size_t size)
999 {
1000         int ret = 0, i_ret = 0, b_ret = 0;
1001         struct buffer_head *di_bh = NULL;
1002         struct ocfs2_dinode *di = NULL;
1003         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1004
1005         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1006                 return -EOPNOTSUPP;
1007
1008         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1009                 return ret;
1010
1011         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1012         if (ret < 0) {
1013                 mlog_errno(ret);
1014                 return ret;
1015         }
1016
1017         di = (struct ocfs2_dinode *)di_bh->b_data;
1018
1019         down_read(&oi->ip_xattr_sem);
1020         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1021         if (i_ret < 0)
1022                 b_ret = 0;
1023         else {
1024                 if (buffer) {
1025                         buffer += i_ret;
1026                         size -= i_ret;
1027                 }
1028                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1029                                                buffer, size);
1030                 if (b_ret < 0)
1031                         i_ret = 0;
1032         }
1033         up_read(&oi->ip_xattr_sem);
1034         ocfs2_inode_unlock(dentry->d_inode, 0);
1035
1036         brelse(di_bh);
1037
1038         return i_ret + b_ret;
1039 }
1040
1041 static int ocfs2_xattr_find_entry(int name_index,
1042                                   const char *name,
1043                                   struct ocfs2_xattr_search *xs)
1044 {
1045         struct ocfs2_xattr_entry *entry;
1046         size_t name_len;
1047         int i, cmp = 1;
1048
1049         if (name == NULL)
1050                 return -EINVAL;
1051
1052         name_len = strlen(name);
1053         entry = xs->here;
1054         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1055                 cmp = name_index - ocfs2_xattr_get_type(entry);
1056                 if (!cmp)
1057                         cmp = name_len - entry->xe_name_len;
1058                 if (!cmp)
1059                         cmp = memcmp(name, (xs->base +
1060                                      le16_to_cpu(entry->xe_name_offset)),
1061                                      name_len);
1062                 if (cmp == 0)
1063                         break;
1064                 entry += 1;
1065         }
1066         xs->here = entry;
1067
1068         return cmp ? -ENODATA : 0;
1069 }
1070
1071 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1072                                          struct ocfs2_xattr_value_root *xv,
1073                                          void *buffer,
1074                                          size_t len)
1075 {
1076         u32 cpos, p_cluster, num_clusters, bpc, clusters;
1077         u64 blkno;
1078         int i, ret = 0;
1079         size_t cplen, blocksize;
1080         struct buffer_head *bh = NULL;
1081         struct ocfs2_extent_list *el;
1082
1083         el = &xv->xr_list;
1084         clusters = le32_to_cpu(xv->xr_clusters);
1085         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1086         blocksize = inode->i_sb->s_blocksize;
1087
1088         cpos = 0;
1089         while (cpos < clusters) {
1090                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1091                                                &num_clusters, el, NULL);
1092                 if (ret) {
1093                         mlog_errno(ret);
1094                         goto out;
1095                 }
1096
1097                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1098                 /* Copy ocfs2_xattr_value */
1099                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1100                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1101                                                &bh, NULL);
1102                         if (ret) {
1103                                 mlog_errno(ret);
1104                                 goto out;
1105                         }
1106
1107                         cplen = len >= blocksize ? blocksize : len;
1108                         memcpy(buffer, bh->b_data, cplen);
1109                         len -= cplen;
1110                         buffer += cplen;
1111
1112                         brelse(bh);
1113                         bh = NULL;
1114                         if (len == 0)
1115                                 break;
1116                 }
1117                 cpos += num_clusters;
1118         }
1119 out:
1120         return ret;
1121 }
1122
1123 static int ocfs2_xattr_ibody_get(struct inode *inode,
1124                                  int name_index,
1125                                  const char *name,
1126                                  void *buffer,
1127                                  size_t buffer_size,
1128                                  struct ocfs2_xattr_search *xs)
1129 {
1130         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1131         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1132         struct ocfs2_xattr_value_root *xv;
1133         size_t size;
1134         int ret = 0;
1135
1136         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1137                 return -ENODATA;
1138
1139         xs->end = (void *)di + inode->i_sb->s_blocksize;
1140         xs->header = (struct ocfs2_xattr_header *)
1141                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1142         xs->base = (void *)xs->header;
1143         xs->here = xs->header->xh_entries;
1144
1145         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1146         if (ret)
1147                 return ret;
1148         size = le64_to_cpu(xs->here->xe_value_size);
1149         if (buffer) {
1150                 if (size > buffer_size)
1151                         return -ERANGE;
1152                 if (ocfs2_xattr_is_local(xs->here)) {
1153                         memcpy(buffer, (void *)xs->base +
1154                                le16_to_cpu(xs->here->xe_name_offset) +
1155                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1156                 } else {
1157                         xv = (struct ocfs2_xattr_value_root *)
1158                                 (xs->base + le16_to_cpu(
1159                                  xs->here->xe_name_offset) +
1160                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1161                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1162                                                             buffer, size);
1163                         if (ret < 0) {
1164                                 mlog_errno(ret);
1165                                 return ret;
1166                         }
1167                 }
1168         }
1169
1170         return size;
1171 }
1172
1173 static int ocfs2_xattr_block_get(struct inode *inode,
1174                                  int name_index,
1175                                  const char *name,
1176                                  void *buffer,
1177                                  size_t buffer_size,
1178                                  struct ocfs2_xattr_search *xs)
1179 {
1180         struct ocfs2_xattr_block *xb;
1181         struct ocfs2_xattr_value_root *xv;
1182         size_t size;
1183         int ret = -ENODATA, name_offset, name_len, i;
1184         int uninitialized_var(block_off);
1185
1186         xs->bucket = ocfs2_xattr_bucket_new(inode);
1187         if (!xs->bucket) {
1188                 ret = -ENOMEM;
1189                 mlog_errno(ret);
1190                 goto cleanup;
1191         }
1192
1193         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1194         if (ret) {
1195                 mlog_errno(ret);
1196                 goto cleanup;
1197         }
1198
1199         if (xs->not_found) {
1200                 ret = -ENODATA;
1201                 goto cleanup;
1202         }
1203
1204         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1205         size = le64_to_cpu(xs->here->xe_value_size);
1206         if (buffer) {
1207                 ret = -ERANGE;
1208                 if (size > buffer_size)
1209                         goto cleanup;
1210
1211                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1212                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1213                 i = xs->here - xs->header->xh_entries;
1214
1215                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1216                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1217                                                                 bucket_xh(xs->bucket),
1218                                                                 i,
1219                                                                 &block_off,
1220                                                                 &name_offset);
1221                         xs->base = bucket_block(xs->bucket, block_off);
1222                 }
1223                 if (ocfs2_xattr_is_local(xs->here)) {
1224                         memcpy(buffer, (void *)xs->base +
1225                                name_offset + name_len, size);
1226                 } else {
1227                         xv = (struct ocfs2_xattr_value_root *)
1228                                 (xs->base + name_offset + name_len);
1229                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1230                                                             buffer, size);
1231                         if (ret < 0) {
1232                                 mlog_errno(ret);
1233                                 goto cleanup;
1234                         }
1235                 }
1236         }
1237         ret = size;
1238 cleanup:
1239         ocfs2_xattr_bucket_free(xs->bucket);
1240
1241         brelse(xs->xattr_bh);
1242         xs->xattr_bh = NULL;
1243         return ret;
1244 }
1245
1246 int ocfs2_xattr_get_nolock(struct inode *inode,
1247                            struct buffer_head *di_bh,
1248                            int name_index,
1249                            const char *name,
1250                            void *buffer,
1251                            size_t buffer_size)
1252 {
1253         int ret;
1254         struct ocfs2_dinode *di = NULL;
1255         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1256         struct ocfs2_xattr_search xis = {
1257                 .not_found = -ENODATA,
1258         };
1259         struct ocfs2_xattr_search xbs = {
1260                 .not_found = -ENODATA,
1261         };
1262
1263         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1264                 return -EOPNOTSUPP;
1265
1266         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1267                 ret = -ENODATA;
1268
1269         xis.inode_bh = xbs.inode_bh = di_bh;
1270         di = (struct ocfs2_dinode *)di_bh->b_data;
1271
1272         down_read(&oi->ip_xattr_sem);
1273         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1274                                     buffer_size, &xis);
1275         if (ret == -ENODATA && di->i_xattr_loc)
1276                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1277                                             buffer_size, &xbs);
1278         up_read(&oi->ip_xattr_sem);
1279
1280         return ret;
1281 }
1282
1283 /* ocfs2_xattr_get()
1284  *
1285  * Copy an extended attribute into the buffer provided.
1286  * Buffer is NULL to compute the size of buffer required.
1287  */
1288 static int ocfs2_xattr_get(struct inode *inode,
1289                            int name_index,
1290                            const char *name,
1291                            void *buffer,
1292                            size_t buffer_size)
1293 {
1294         int ret;
1295         struct buffer_head *di_bh = NULL;
1296
1297         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1298         if (ret < 0) {
1299                 mlog_errno(ret);
1300                 return ret;
1301         }
1302         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1303                                      name, buffer, buffer_size);
1304
1305         ocfs2_inode_unlock(inode, 0);
1306
1307         brelse(di_bh);
1308
1309         return ret;
1310 }
1311
1312 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1313                                            handle_t *handle,
1314                                            struct ocfs2_xattr_value_buf *vb,
1315                                            const void *value,
1316                                            int value_len)
1317 {
1318         int ret = 0, i, cp_len;
1319         u16 blocksize = inode->i_sb->s_blocksize;
1320         u32 p_cluster, num_clusters;
1321         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1322         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1323         u64 blkno;
1324         struct buffer_head *bh = NULL;
1325         unsigned int ext_flags;
1326         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1327
1328         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1329
1330         while (cpos < clusters) {
1331                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1332                                                &num_clusters, &xv->xr_list,
1333                                                &ext_flags);
1334                 if (ret) {
1335                         mlog_errno(ret);
1336                         goto out;
1337                 }
1338
1339                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1340
1341                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1342
1343                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1344                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1345                                                &bh, NULL);
1346                         if (ret) {
1347                                 mlog_errno(ret);
1348                                 goto out;
1349                         }
1350
1351                         ret = ocfs2_journal_access(handle,
1352                                                    INODE_CACHE(inode),
1353                                                    bh,
1354                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1355                         if (ret < 0) {
1356                                 mlog_errno(ret);
1357                                 goto out;
1358                         }
1359
1360                         cp_len = value_len > blocksize ? blocksize : value_len;
1361                         memcpy(bh->b_data, value, cp_len);
1362                         value_len -= cp_len;
1363                         value += cp_len;
1364                         if (cp_len < blocksize)
1365                                 memset(bh->b_data + cp_len, 0,
1366                                        blocksize - cp_len);
1367
1368                         ocfs2_journal_dirty(handle, bh);
1369                         brelse(bh);
1370                         bh = NULL;
1371
1372                         /*
1373                          * XXX: do we need to empty all the following
1374                          * blocks in this cluster?
1375                          */
1376                         if (!value_len)
1377                                 break;
1378                 }
1379                 cpos += num_clusters;
1380         }
1381 out:
1382         brelse(bh);
1383
1384         return ret;
1385 }
1386
1387 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1388                                        int num_entries)
1389 {
1390         int free_space;
1391
1392         if (!needed_space)
1393                 return 0;
1394
1395         free_space = free_start -
1396                 sizeof(struct ocfs2_xattr_header) -
1397                 (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1398                 OCFS2_XATTR_HEADER_GAP;
1399         if (free_space < 0)
1400                 return -EIO;
1401         if (free_space < needed_space)
1402                 return -ENOSPC;
1403
1404         return 0;
1405 }
1406
1407 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1408                                    int type)
1409 {
1410         return loc->xl_ops->xlo_journal_access(handle, loc, type);
1411 }
1412
1413 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1414 {
1415         loc->xl_ops->xlo_journal_dirty(handle, loc);
1416 }
1417
1418 /* Give a pointer into the storage for the given offset */
1419 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1420 {
1421         BUG_ON(offset >= loc->xl_size);
1422         return loc->xl_ops->xlo_offset_pointer(loc, offset);
1423 }
1424
1425 /*
1426  * Wipe the name+value pair and allow the storage to reclaim it.  This
1427  * must be followed by either removal of the entry or a call to
1428  * ocfs2_xa_add_namevalue().
1429  */
1430 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1431 {
1432         loc->xl_ops->xlo_wipe_namevalue(loc);
1433 }
1434
1435 /*
1436  * Find lowest offset to a name+value pair.  This is the start of our
1437  * downward-growing free space.
1438  */
1439 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1440 {
1441         return loc->xl_ops->xlo_get_free_start(loc);
1442 }
1443
1444 /* Can we reuse loc->xl_entry for xi? */
1445 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1446                                     struct ocfs2_xattr_info *xi)
1447 {
1448         return loc->xl_ops->xlo_can_reuse(loc, xi);
1449 }
1450
1451 /* How much free space is needed to set the new value */
1452 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1453                                 struct ocfs2_xattr_info *xi)
1454 {
1455         return loc->xl_ops->xlo_check_space(loc, xi);
1456 }
1457
1458 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1459 {
1460         loc->xl_ops->xlo_add_entry(loc, name_hash);
1461         loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1462         /*
1463          * We can't leave the new entry's xe_name_offset at zero or
1464          * add_namevalue() will go nuts.  We set it to the size of our
1465          * storage so that it can never be less than any other entry.
1466          */
1467         loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1468 }
1469
1470 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1471                                    struct ocfs2_xattr_info *xi)
1472 {
1473         int size = namevalue_size_xi(xi);
1474         int nameval_offset;
1475         char *nameval_buf;
1476
1477         loc->xl_ops->xlo_add_namevalue(loc, size);
1478         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1479         loc->xl_entry->xe_name_len = xi->xi_name_len;
1480         ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1481         ocfs2_xattr_set_local(loc->xl_entry,
1482                               xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1483
1484         nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1485         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1486         memset(nameval_buf, 0, size);
1487         memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1488 }
1489
1490 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1491                                     struct ocfs2_xattr_value_buf *vb)
1492 {
1493         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1494         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1495
1496         /* Value bufs are for value trees */
1497         BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1498         BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1499                (name_size + OCFS2_XATTR_ROOT_SIZE));
1500
1501         loc->xl_ops->xlo_fill_value_buf(loc, vb);
1502         vb->vb_xv =
1503                 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1504                                                         nameval_offset +
1505                                                         name_size);
1506 }
1507
1508 static int ocfs2_xa_block_journal_access(handle_t *handle,
1509                                          struct ocfs2_xa_loc *loc, int type)
1510 {
1511         struct buffer_head *bh = loc->xl_storage;
1512         ocfs2_journal_access_func access;
1513
1514         if (loc->xl_size == (bh->b_size -
1515                              offsetof(struct ocfs2_xattr_block,
1516                                       xb_attrs.xb_header)))
1517                 access = ocfs2_journal_access_xb;
1518         else
1519                 access = ocfs2_journal_access_di;
1520         return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1521 }
1522
1523 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1524                                          struct ocfs2_xa_loc *loc)
1525 {
1526         struct buffer_head *bh = loc->xl_storage;
1527
1528         ocfs2_journal_dirty(handle, bh);
1529 }
1530
1531 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1532                                            int offset)
1533 {
1534         return (char *)loc->xl_header + offset;
1535 }
1536
1537 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1538                                     struct ocfs2_xattr_info *xi)
1539 {
1540         /*
1541          * Block storage is strict.  If the sizes aren't exact, we will
1542          * remove the old one and reinsert the new.
1543          */
1544         return namevalue_size_xe(loc->xl_entry) ==
1545                 namevalue_size_xi(xi);
1546 }
1547
1548 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1549 {
1550         struct ocfs2_xattr_header *xh = loc->xl_header;
1551         int i, count = le16_to_cpu(xh->xh_count);
1552         int offset, free_start = loc->xl_size;
1553
1554         for (i = 0; i < count; i++) {
1555                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1556                 if (offset < free_start)
1557                         free_start = offset;
1558         }
1559
1560         return free_start;
1561 }
1562
1563 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1564                                       struct ocfs2_xattr_info *xi)
1565 {
1566         int count = le16_to_cpu(loc->xl_header->xh_count);
1567         int free_start = ocfs2_xa_get_free_start(loc);
1568         int needed_space = ocfs2_xi_entry_usage(xi);
1569
1570         /*
1571          * Block storage will reclaim the original entry before inserting
1572          * the new value, so we only need the difference.  If the new
1573          * entry is smaller than the old one, we don't need anything.
1574          */
1575         if (loc->xl_entry) {
1576                 /* Don't need space if we're reusing! */
1577                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1578                         needed_space = 0;
1579                 else
1580                         needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1581         }
1582         if (needed_space < 0)
1583                 needed_space = 0;
1584         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1585 }
1586
1587 /*
1588  * Block storage for xattrs keeps the name+value pairs compacted.  When
1589  * we remove one, we have to shift any that preceded it towards the end.
1590  */
1591 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1592 {
1593         int i, offset;
1594         int namevalue_offset, first_namevalue_offset, namevalue_size;
1595         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1596         struct ocfs2_xattr_header *xh = loc->xl_header;
1597         int count = le16_to_cpu(xh->xh_count);
1598
1599         namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1600         namevalue_size = namevalue_size_xe(entry);
1601         first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1602
1603         /* Shift the name+value pairs */
1604         memmove((char *)xh + first_namevalue_offset + namevalue_size,
1605                 (char *)xh + first_namevalue_offset,
1606                 namevalue_offset - first_namevalue_offset);
1607         memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1608
1609         /* Now tell xh->xh_entries about it */
1610         for (i = 0; i < count; i++) {
1611                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1612                 if (offset <= namevalue_offset)
1613                         le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1614                                      namevalue_size);
1615         }
1616
1617         /*
1618          * Note that we don't update xh_free_start or xh_name_value_len
1619          * because they're not used in block-stored xattrs.
1620          */
1621 }
1622
1623 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1624 {
1625         int count = le16_to_cpu(loc->xl_header->xh_count);
1626         loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1627         le16_add_cpu(&loc->xl_header->xh_count, 1);
1628         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1629 }
1630
1631 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1632 {
1633         int free_start = ocfs2_xa_get_free_start(loc);
1634
1635         loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1636 }
1637
1638 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1639                                           struct ocfs2_xattr_value_buf *vb)
1640 {
1641         struct buffer_head *bh = loc->xl_storage;
1642
1643         if (loc->xl_size == (bh->b_size -
1644                              offsetof(struct ocfs2_xattr_block,
1645                                       xb_attrs.xb_header)))
1646                 vb->vb_access = ocfs2_journal_access_xb;
1647         else
1648                 vb->vb_access = ocfs2_journal_access_di;
1649         vb->vb_bh = bh;
1650 }
1651
1652 /*
1653  * Operations for xattrs stored in blocks.  This includes inline inode
1654  * storage and unindexed ocfs2_xattr_blocks.
1655  */
1656 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1657         .xlo_journal_access     = ocfs2_xa_block_journal_access,
1658         .xlo_journal_dirty      = ocfs2_xa_block_journal_dirty,
1659         .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1660         .xlo_check_space        = ocfs2_xa_block_check_space,
1661         .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1662         .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1663         .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1664         .xlo_add_entry          = ocfs2_xa_block_add_entry,
1665         .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1666         .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1667 };
1668
1669 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1670                                           struct ocfs2_xa_loc *loc, int type)
1671 {
1672         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1673
1674         return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1675 }
1676
1677 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1678                                           struct ocfs2_xa_loc *loc)
1679 {
1680         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1681
1682         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1683 }
1684
1685 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1686                                             int offset)
1687 {
1688         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1689         int block, block_offset;
1690
1691         /* The header is at the front of the bucket */
1692         block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1693         block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1694
1695         return bucket_block(bucket, block) + block_offset;
1696 }
1697
1698 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1699                                      struct ocfs2_xattr_info *xi)
1700 {
1701         return namevalue_size_xe(loc->xl_entry) >=
1702                 namevalue_size_xi(xi);
1703 }
1704
1705 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1706 {
1707         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1708         return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1709 }
1710
1711 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1712                                          int free_start, int size)
1713 {
1714         /*
1715          * We need to make sure that the name+value pair fits within
1716          * one block.
1717          */
1718         if (((free_start - size) >> sb->s_blocksize_bits) !=
1719             ((free_start - 1) >> sb->s_blocksize_bits))
1720                 free_start -= free_start % sb->s_blocksize;
1721
1722         return free_start;
1723 }
1724
1725 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1726                                        struct ocfs2_xattr_info *xi)
1727 {
1728         int rc;
1729         int count = le16_to_cpu(loc->xl_header->xh_count);
1730         int free_start = ocfs2_xa_get_free_start(loc);
1731         int needed_space = ocfs2_xi_entry_usage(xi);
1732         int size = namevalue_size_xi(xi);
1733         struct super_block *sb = loc->xl_inode->i_sb;
1734
1735         /*
1736          * Bucket storage does not reclaim name+value pairs it cannot
1737          * reuse.  They live as holes until the bucket fills, and then
1738          * the bucket is defragmented.  However, the bucket can reclaim
1739          * the ocfs2_xattr_entry.
1740          */
1741         if (loc->xl_entry) {
1742                 /* Don't need space if we're reusing! */
1743                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1744                         needed_space = 0;
1745                 else
1746                         needed_space -= sizeof(struct ocfs2_xattr_entry);
1747         }
1748         BUG_ON(needed_space < 0);
1749
1750         if (free_start < size) {
1751                 if (needed_space)
1752                         return -ENOSPC;
1753         } else {
1754                 /*
1755                  * First we check if it would fit in the first place.
1756                  * Below, we align the free start to a block.  This may
1757                  * slide us below the minimum gap.  By checking unaligned
1758                  * first, we avoid that error.
1759                  */
1760                 rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1761                                                  count);
1762                 if (rc)
1763                         return rc;
1764                 free_start = ocfs2_bucket_align_free_start(sb, free_start,
1765                                                            size);
1766         }
1767         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1768 }
1769
1770 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1771 {
1772         le16_add_cpu(&loc->xl_header->xh_name_value_len,
1773                      -namevalue_size_xe(loc->xl_entry));
1774 }
1775
1776 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1777 {
1778         struct ocfs2_xattr_header *xh = loc->xl_header;
1779         int count = le16_to_cpu(xh->xh_count);
1780         int low = 0, high = count - 1, tmp;
1781         struct ocfs2_xattr_entry *tmp_xe;
1782
1783         /*
1784          * We keep buckets sorted by name_hash, so we need to find
1785          * our insert place.
1786          */
1787         while (low <= high && count) {
1788                 tmp = (low + high) / 2;
1789                 tmp_xe = &xh->xh_entries[tmp];
1790
1791                 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1792                         low = tmp + 1;
1793                 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1794                         high = tmp - 1;
1795                 else {
1796                         low = tmp;
1797                         break;
1798                 }
1799         }
1800
1801         if (low != count)
1802                 memmove(&xh->xh_entries[low + 1],
1803                         &xh->xh_entries[low],
1804                         ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1805
1806         le16_add_cpu(&xh->xh_count, 1);
1807         loc->xl_entry = &xh->xh_entries[low];
1808         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1809 }
1810
1811 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1812 {
1813         int free_start = ocfs2_xa_get_free_start(loc);
1814         struct ocfs2_xattr_header *xh = loc->xl_header;
1815         struct super_block *sb = loc->xl_inode->i_sb;
1816         int nameval_offset;
1817
1818         free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1819         nameval_offset = free_start - size;
1820         loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1821         xh->xh_free_start = cpu_to_le16(nameval_offset);
1822         le16_add_cpu(&xh->xh_name_value_len, size);
1823
1824 }
1825
1826 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1827                                            struct ocfs2_xattr_value_buf *vb)
1828 {
1829         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1830         struct super_block *sb = loc->xl_inode->i_sb;
1831         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1832         int size = namevalue_size_xe(loc->xl_entry);
1833         int block_offset = nameval_offset >> sb->s_blocksize_bits;
1834
1835         /* Values are not allowed to straddle block boundaries */
1836         BUG_ON(block_offset !=
1837                ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1838         /* We expect the bucket to be filled in */
1839         BUG_ON(!bucket->bu_bhs[block_offset]);
1840
1841         vb->vb_access = ocfs2_journal_access;
1842         vb->vb_bh = bucket->bu_bhs[block_offset];
1843 }
1844
1845 /* Operations for xattrs stored in buckets. */
1846 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1847         .xlo_journal_access     = ocfs2_xa_bucket_journal_access,
1848         .xlo_journal_dirty      = ocfs2_xa_bucket_journal_dirty,
1849         .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1850         .xlo_check_space        = ocfs2_xa_bucket_check_space,
1851         .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1852         .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1853         .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1854         .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1855         .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1856         .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1857 };
1858
1859 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1860 {
1861         struct ocfs2_xattr_value_buf vb;
1862
1863         if (ocfs2_xattr_is_local(loc->xl_entry))
1864                 return 0;
1865
1866         ocfs2_xa_fill_value_buf(loc, &vb);
1867         return le32_to_cpu(vb.vb_xv->xr_clusters);
1868 }
1869
1870 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1871                                    struct ocfs2_xattr_set_ctxt *ctxt)
1872 {
1873         int trunc_rc, access_rc;
1874         struct ocfs2_xattr_value_buf vb;
1875
1876         ocfs2_xa_fill_value_buf(loc, &vb);
1877         trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1878                                               ctxt);
1879
1880         /*
1881          * The caller of ocfs2_xa_value_truncate() has already called
1882          * ocfs2_xa_journal_access on the loc.  However, The truncate code
1883          * calls ocfs2_extend_trans().  This may commit the previous
1884          * transaction and open a new one.  If this is a bucket, truncate
1885          * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1886          * the caller is expecting to dirty the entire bucket.  So we must
1887          * reset the journal work.  We do this even if truncate has failed,
1888          * as it could have failed after committing the extend.
1889          */
1890         access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1891                                             OCFS2_JOURNAL_ACCESS_WRITE);
1892
1893         /* Errors in truncate take precedence */
1894         return trunc_rc ? trunc_rc : access_rc;
1895 }
1896
1897 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1898 {
1899         int index, count;
1900         struct ocfs2_xattr_header *xh = loc->xl_header;
1901         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1902
1903         ocfs2_xa_wipe_namevalue(loc);
1904         loc->xl_entry = NULL;
1905
1906         le16_add_cpu(&xh->xh_count, -1);
1907         count = le16_to_cpu(xh->xh_count);
1908
1909         /*
1910          * Only zero out the entry if there are more remaining.  This is
1911          * important for an empty bucket, as it keeps track of the
1912          * bucket's hash value.  It doesn't hurt empty block storage.
1913          */
1914         if (count) {
1915                 index = ((char *)entry - (char *)&xh->xh_entries) /
1916                         sizeof(struct ocfs2_xattr_entry);
1917                 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1918                         (count - index) * sizeof(struct ocfs2_xattr_entry));
1919                 memset(&xh->xh_entries[count], 0,
1920                        sizeof(struct ocfs2_xattr_entry));
1921         }
1922 }
1923
1924 /*
1925  * If we have a problem adjusting the size of an external value during
1926  * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1927  * in an intermediate state.  For example, the value may be partially
1928  * truncated.
1929  *
1930  * If the value tree hasn't changed, the extend/truncate went nowhere.
1931  * We have nothing to do.  The caller can treat it as a straight error.
1932  *
1933  * If the value tree got partially truncated, we now have a corrupted
1934  * extended attribute.  We're going to wipe its entry and leak the
1935  * clusters.  Better to leak some storage than leave a corrupt entry.
1936  *
1937  * If the value tree grew, it obviously didn't grow enough for the
1938  * new entry.  We're not going to try and reclaim those clusters either.
1939  * If there was already an external value there (orig_clusters != 0),
1940  * the new clusters are attached safely and we can just leave the old
1941  * value in place.  If there was no external value there, we remove
1942  * the entry.
1943  *
1944  * This way, the xattr block we store in the journal will be consistent.
1945  * If the size change broke because of the journal, no changes will hit
1946  * disk anyway.
1947  */
1948 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1949                                             const char *what,
1950                                             unsigned int orig_clusters)
1951 {
1952         unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1953         char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1954                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
1955
1956         if (new_clusters < orig_clusters) {
1957                 mlog(ML_ERROR,
1958                      "Partial truncate while %s xattr %.*s.  Leaking "
1959                      "%u clusters and removing the entry\n",
1960                      what, loc->xl_entry->xe_name_len, nameval_buf,
1961                      orig_clusters - new_clusters);
1962                 ocfs2_xa_remove_entry(loc);
1963         } else if (!orig_clusters) {
1964                 mlog(ML_ERROR,
1965                      "Unable to allocate an external value for xattr "
1966                      "%.*s safely.  Leaking %u clusters and removing the "
1967                      "entry\n",
1968                      loc->xl_entry->xe_name_len, nameval_buf,
1969                      new_clusters - orig_clusters);
1970                 ocfs2_xa_remove_entry(loc);
1971         } else if (new_clusters > orig_clusters)
1972                 mlog(ML_ERROR,
1973                      "Unable to grow xattr %.*s safely.  %u new clusters "
1974                      "have been added, but the value will not be "
1975                      "modified\n",
1976                      loc->xl_entry->xe_name_len, nameval_buf,
1977                      new_clusters - orig_clusters);
1978 }
1979
1980 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
1981                            struct ocfs2_xattr_set_ctxt *ctxt)
1982 {
1983         int rc = 0;
1984         unsigned int orig_clusters;
1985
1986         if (!ocfs2_xattr_is_local(loc->xl_entry)) {
1987                 orig_clusters = ocfs2_xa_value_clusters(loc);
1988                 rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
1989                 if (rc) {
1990                         mlog_errno(rc);
1991                         /*
1992                          * Since this is remove, we can return 0 if
1993                          * ocfs2_xa_cleanup_value_truncate() is going to
1994                          * wipe the entry anyway.  So we check the
1995                          * cluster count as well.
1996                          */
1997                         if (orig_clusters != ocfs2_xa_value_clusters(loc))
1998                                 rc = 0;
1999                         ocfs2_xa_cleanup_value_truncate(loc, "removing",
2000                                                         orig_clusters);
2001                         if (rc)
2002                                 goto out;
2003                 }
2004         }
2005
2006         ocfs2_xa_remove_entry(loc);
2007
2008 out:
2009         return rc;
2010 }
2011
2012 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2013 {
2014         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2015         char *nameval_buf;
2016
2017         nameval_buf = ocfs2_xa_offset_pointer(loc,
2018                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
2019         memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2020 }
2021
2022 /*
2023  * Take an existing entry and make it ready for the new value.  This
2024  * won't allocate space, but it may free space.  It should be ready for
2025  * ocfs2_xa_prepare_entry() to finish the work.
2026  */
2027 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2028                                 struct ocfs2_xattr_info *xi,
2029                                 struct ocfs2_xattr_set_ctxt *ctxt)
2030 {
2031         int rc = 0;
2032         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2033         unsigned int orig_clusters;
2034         char *nameval_buf;
2035         int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2036         int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2037
2038         BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2039                name_size);
2040
2041         nameval_buf = ocfs2_xa_offset_pointer(loc,
2042                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
2043         if (xe_local) {
2044                 memset(nameval_buf + name_size, 0,
2045                        namevalue_size_xe(loc->xl_entry) - name_size);
2046                 if (!xi_local)
2047                         ocfs2_xa_install_value_root(loc);
2048         } else {
2049                 orig_clusters = ocfs2_xa_value_clusters(loc);
2050                 if (xi_local) {
2051                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2052                         if (rc < 0)
2053                                 mlog_errno(rc);
2054                         else
2055                                 memset(nameval_buf + name_size, 0,
2056                                        namevalue_size_xe(loc->xl_entry) -
2057                                        name_size);
2058                 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2059                            xi->xi_value_len) {
2060                         rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2061                                                      ctxt);
2062                         if (rc < 0)
2063                                 mlog_errno(rc);
2064                 }
2065
2066                 if (rc) {
2067                         ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2068                                                         orig_clusters);
2069                         goto out;
2070                 }
2071         }
2072
2073         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2074         ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2075
2076 out:
2077         return rc;
2078 }
2079
2080 /*
2081  * Prepares loc->xl_entry to receive the new xattr.  This includes
2082  * properly setting up the name+value pair region.  If loc->xl_entry
2083  * already exists, it will take care of modifying it appropriately.
2084  *
2085  * Note that this modifies the data.  You did journal_access already,
2086  * right?
2087  */
2088 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2089                                   struct ocfs2_xattr_info *xi,
2090                                   u32 name_hash,
2091                                   struct ocfs2_xattr_set_ctxt *ctxt)
2092 {
2093         int rc = 0;
2094         unsigned int orig_clusters;
2095         __le64 orig_value_size = 0;
2096
2097         rc = ocfs2_xa_check_space(loc, xi);
2098         if (rc)
2099                 goto out;
2100
2101         if (loc->xl_entry) {
2102                 if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2103                         orig_value_size = loc->xl_entry->xe_value_size;
2104                         rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2105                         if (rc)
2106                                 goto out;
2107                         goto alloc_value;
2108                 }
2109
2110                 if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2111                         orig_clusters = ocfs2_xa_value_clusters(loc);
2112                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2113                         if (rc) {
2114                                 mlog_errno(rc);
2115                                 ocfs2_xa_cleanup_value_truncate(loc,
2116                                                                 "overwriting",
2117                                                                 orig_clusters);
2118                                 goto out;
2119                         }
2120                 }
2121                 ocfs2_xa_wipe_namevalue(loc);
2122         } else
2123                 ocfs2_xa_add_entry(loc, name_hash);
2124
2125         /*
2126          * If we get here, we have a blank entry.  Fill it.  We grow our
2127          * name+value pair back from the end.
2128          */
2129         ocfs2_xa_add_namevalue(loc, xi);
2130         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2131                 ocfs2_xa_install_value_root(loc);
2132
2133 alloc_value:
2134         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2135                 orig_clusters = ocfs2_xa_value_clusters(loc);
2136                 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2137                 if (rc < 0) {
2138                         /*
2139                          * If we tried to grow an existing external value,
2140                          * ocfs2_xa_cleanuP-value_truncate() is going to
2141                          * let it stand.  We have to restore its original
2142                          * value size.
2143                          */
2144                         loc->xl_entry->xe_value_size = orig_value_size;
2145                         ocfs2_xa_cleanup_value_truncate(loc, "growing",
2146                                                         orig_clusters);
2147                         mlog_errno(rc);
2148                 }
2149         }
2150
2151 out:
2152         return rc;
2153 }
2154
2155 /*
2156  * Store the value portion of the name+value pair.  This will skip
2157  * values that are stored externally.  Their tree roots were set up
2158  * by ocfs2_xa_prepare_entry().
2159  */
2160 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2161                                 struct ocfs2_xattr_info *xi,
2162                                 struct ocfs2_xattr_set_ctxt *ctxt)
2163 {
2164         int rc = 0;
2165         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2166         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2167         char *nameval_buf;
2168         struct ocfs2_xattr_value_buf vb;
2169
2170         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2171         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2172                 ocfs2_xa_fill_value_buf(loc, &vb);
2173                 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2174                                                      ctxt->handle, &vb,
2175                                                      xi->xi_value,
2176                                                      xi->xi_value_len);
2177         } else
2178                 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2179
2180         return rc;
2181 }
2182
2183 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2184                         struct ocfs2_xattr_info *xi,
2185                         struct ocfs2_xattr_set_ctxt *ctxt)
2186 {
2187         int ret;
2188         u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2189                                               xi->xi_name_len);
2190
2191         ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2192                                       OCFS2_JOURNAL_ACCESS_WRITE);
2193         if (ret) {
2194                 mlog_errno(ret);
2195                 goto out;
2196         }
2197
2198         /*
2199          * From here on out, everything is going to modify the buffer a
2200          * little.  Errors are going to leave the xattr header in a
2201          * sane state.  Thus, even with errors we dirty the sucker.
2202          */
2203
2204         /* Don't worry, we are never called with !xi_value and !xl_entry */
2205         if (!xi->xi_value) {
2206                 ret = ocfs2_xa_remove(loc, ctxt);
2207                 goto out_dirty;
2208         }
2209
2210         ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2211         if (ret) {
2212                 if (ret != -ENOSPC)
2213                         mlog_errno(ret);
2214                 goto out_dirty;
2215         }
2216
2217         ret = ocfs2_xa_store_value(loc, xi, ctxt);
2218         if (ret)
2219                 mlog_errno(ret);
2220
2221 out_dirty:
2222         ocfs2_xa_journal_dirty(ctxt->handle, loc);
2223
2224 out:
2225         return ret;
2226 }
2227
2228 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2229                                      struct inode *inode,
2230                                      struct buffer_head *bh,
2231                                      struct ocfs2_xattr_entry *entry)
2232 {
2233         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2234
2235         BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2236
2237         loc->xl_inode = inode;
2238         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2239         loc->xl_storage = bh;
2240         loc->xl_entry = entry;
2241         loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2242         loc->xl_header =
2243                 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2244                                               loc->xl_size);
2245 }
2246
2247 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2248                                           struct inode *inode,
2249                                           struct buffer_head *bh,
2250                                           struct ocfs2_xattr_entry *entry)
2251 {
2252         struct ocfs2_xattr_block *xb =
2253                 (struct ocfs2_xattr_block *)bh->b_data;
2254
2255         BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2256
2257         loc->xl_inode = inode;
2258         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2259         loc->xl_storage = bh;
2260         loc->xl_header = &(xb->xb_attrs.xb_header);
2261         loc->xl_entry = entry;
2262         loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2263                                              xb_attrs.xb_header);
2264 }
2265
2266 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2267                                            struct ocfs2_xattr_bucket *bucket,
2268                                            struct ocfs2_xattr_entry *entry)
2269 {
2270         loc->xl_inode = bucket->bu_inode;
2271         loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2272         loc->xl_storage = bucket;
2273         loc->xl_header = bucket_xh(bucket);
2274         loc->xl_entry = entry;
2275         loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2276 }
2277
2278 /*
2279  * In xattr remove, if it is stored outside and refcounted, we may have
2280  * the chance to split the refcount tree. So need the allocators.
2281  */
2282 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2283                                         struct ocfs2_xattr_value_root *xv,
2284                                         struct ocfs2_caching_info *ref_ci,
2285                                         struct buffer_head *ref_root_bh,
2286                                         struct ocfs2_alloc_context **meta_ac,
2287                                         int *ref_credits)
2288 {
2289         int ret, meta_add = 0;
2290         u32 p_cluster, num_clusters;
2291         unsigned int ext_flags;
2292
2293         *ref_credits = 0;
2294         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2295                                        &num_clusters,
2296                                        &xv->xr_list,
2297                                        &ext_flags);
2298         if (ret) {
2299                 mlog_errno(ret);
2300                 goto out;
2301         }
2302
2303         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2304                 goto out;
2305
2306         ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2307                                                  ref_root_bh, xv,
2308                                                  &meta_add, ref_credits);
2309         if (ret) {
2310                 mlog_errno(ret);
2311                 goto out;
2312         }
2313
2314         ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2315                                                 meta_add, meta_ac);
2316         if (ret)
2317                 mlog_errno(ret);
2318
2319 out:
2320         return ret;
2321 }
2322
2323 static int ocfs2_remove_value_outside(struct inode*inode,
2324                                       struct ocfs2_xattr_value_buf *vb,
2325                                       struct ocfs2_xattr_header *header,
2326                                       struct ocfs2_caching_info *ref_ci,
2327                                       struct buffer_head *ref_root_bh)
2328 {
2329         int ret = 0, i, ref_credits;
2330         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2331         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2332         void *val;
2333
2334         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2335
2336         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2337                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2338
2339                 if (ocfs2_xattr_is_local(entry))
2340                         continue;
2341
2342                 val = (void *)header +
2343                         le16_to_cpu(entry->xe_name_offset);
2344                 vb->vb_xv = (struct ocfs2_xattr_value_root *)
2345                         (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2346
2347                 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2348                                                          ref_ci, ref_root_bh,
2349                                                          &ctxt.meta_ac,
2350                                                          &ref_credits);
2351
2352                 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2353                                         ocfs2_remove_extent_credits(osb->sb));
2354                 if (IS_ERR(ctxt.handle)) {
2355                         ret = PTR_ERR(ctxt.handle);
2356                         mlog_errno(ret);
2357                         break;
2358                 }
2359
2360                 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2361                 if (ret < 0) {
2362                         mlog_errno(ret);
2363                         break;
2364                 }
2365
2366                 ocfs2_commit_trans(osb, ctxt.handle);
2367                 if (ctxt.meta_ac) {
2368                         ocfs2_free_alloc_context(ctxt.meta_ac);
2369                         ctxt.meta_ac = NULL;
2370                 }
2371         }
2372
2373         if (ctxt.meta_ac)
2374                 ocfs2_free_alloc_context(ctxt.meta_ac);
2375         ocfs2_schedule_truncate_log_flush(osb, 1);
2376         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2377         return ret;
2378 }
2379
2380 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2381                                     struct buffer_head *di_bh,
2382                                     struct ocfs2_caching_info *ref_ci,
2383                                     struct buffer_head *ref_root_bh)
2384 {
2385
2386         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2387         struct ocfs2_xattr_header *header;
2388         int ret;
2389         struct ocfs2_xattr_value_buf vb = {
2390                 .vb_bh = di_bh,
2391                 .vb_access = ocfs2_journal_access_di,
2392         };
2393
2394         header = (struct ocfs2_xattr_header *)
2395                  ((void *)di + inode->i_sb->s_blocksize -
2396                  le16_to_cpu(di->i_xattr_inline_size));
2397
2398         ret = ocfs2_remove_value_outside(inode, &vb, header,
2399                                          ref_ci, ref_root_bh);
2400
2401         return ret;
2402 }
2403
2404 struct ocfs2_rm_xattr_bucket_para {
2405         struct ocfs2_caching_info *ref_ci;
2406         struct buffer_head *ref_root_bh;
2407 };
2408
2409 static int ocfs2_xattr_block_remove(struct inode *inode,
2410                                     struct buffer_head *blk_bh,
2411                                     struct ocfs2_caching_info *ref_ci,
2412                                     struct buffer_head *ref_root_bh)
2413 {
2414         struct ocfs2_xattr_block *xb;
2415         int ret = 0;
2416         struct ocfs2_xattr_value_buf vb = {
2417                 .vb_bh = blk_bh,
2418                 .vb_access = ocfs2_journal_access_xb,
2419         };
2420         struct ocfs2_rm_xattr_bucket_para args = {
2421                 .ref_ci = ref_ci,
2422                 .ref_root_bh = ref_root_bh,
2423         };
2424
2425         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2426         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2427                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2428                 ret = ocfs2_remove_value_outside(inode, &vb, header,
2429                                                  ref_ci, ref_root_bh);
2430         } else
2431                 ret = ocfs2_iterate_xattr_index_block(inode,
2432                                                 blk_bh,
2433                                                 ocfs2_rm_xattr_cluster,
2434                                                 &args);
2435
2436         return ret;
2437 }
2438
2439 static int ocfs2_xattr_free_block(struct inode *inode,
2440                                   u64 block,
2441                                   struct ocfs2_caching_info *ref_ci,
2442                                   struct buffer_head *ref_root_bh)
2443 {
2444         struct inode *xb_alloc_inode;
2445         struct buffer_head *xb_alloc_bh = NULL;
2446         struct buffer_head *blk_bh = NULL;
2447         struct ocfs2_xattr_block *xb;
2448         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2449         handle_t *handle;
2450         int ret = 0;
2451         u64 blk, bg_blkno;
2452         u16 bit;
2453
2454         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2455         if (ret < 0) {
2456                 mlog_errno(ret);
2457                 goto out;
2458         }
2459
2460         ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2461         if (ret < 0) {
2462                 mlog_errno(ret);
2463                 goto out;
2464         }
2465
2466         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2467         blk = le64_to_cpu(xb->xb_blkno);
2468         bit = le16_to_cpu(xb->xb_suballoc_bit);
2469         if (xb->xb_suballoc_loc)
2470                 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2471         else
2472                 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2473
2474         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2475                                 EXTENT_ALLOC_SYSTEM_INODE,
2476                                 le16_to_cpu(xb->xb_suballoc_slot));
2477         if (!xb_alloc_inode) {
2478                 ret = -ENOMEM;
2479                 mlog_errno(ret);
2480                 goto out;
2481         }
2482         mutex_lock(&xb_alloc_inode->i_mutex);
2483
2484         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2485         if (ret < 0) {
2486                 mlog_errno(ret);
2487                 goto out_mutex;
2488         }
2489
2490         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2491         if (IS_ERR(handle)) {
2492                 ret = PTR_ERR(handle);
2493                 mlog_errno(ret);
2494                 goto out_unlock;
2495         }
2496
2497         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2498                                        bit, bg_blkno, 1);
2499         if (ret < 0)
2500                 mlog_errno(ret);
2501
2502         ocfs2_commit_trans(osb, handle);
2503 out_unlock:
2504         ocfs2_inode_unlock(xb_alloc_inode, 1);
2505         brelse(xb_alloc_bh);
2506 out_mutex:
2507         mutex_unlock(&xb_alloc_inode->i_mutex);
2508         iput(xb_alloc_inode);
2509 out:
2510         brelse(blk_bh);
2511         return ret;
2512 }
2513
2514 /*
2515  * ocfs2_xattr_remove()
2516  *
2517  * Free extended attribute resources associated with this inode.
2518  */
2519 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2520 {
2521         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2522         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2523         struct ocfs2_refcount_tree *ref_tree = NULL;
2524         struct buffer_head *ref_root_bh = NULL;
2525         struct ocfs2_caching_info *ref_ci = NULL;
2526         handle_t *handle;
2527         int ret;
2528
2529         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2530                 return 0;
2531
2532         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2533                 return 0;
2534
2535         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2536                 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2537                                                le64_to_cpu(di->i_refcount_loc),
2538                                                1, &ref_tree, &ref_root_bh);
2539                 if (ret) {
2540                         mlog_errno(ret);
2541                         goto out;
2542                 }
2543                 ref_ci = &ref_tree->rf_ci;
2544
2545         }
2546
2547         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2548                 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2549                                                ref_ci, ref_root_bh);
2550                 if (ret < 0) {
2551                         mlog_errno(ret);
2552                         goto out;
2553                 }
2554         }
2555
2556         if (di->i_xattr_loc) {
2557                 ret = ocfs2_xattr_free_block(inode,
2558                                              le64_to_cpu(di->i_xattr_loc),
2559                                              ref_ci, ref_root_bh);
2560                 if (ret < 0) {
2561                         mlog_errno(ret);
2562                         goto out;
2563                 }
2564         }
2565
2566         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2567                                    OCFS2_INODE_UPDATE_CREDITS);
2568         if (IS_ERR(handle)) {
2569                 ret = PTR_ERR(handle);
2570                 mlog_errno(ret);
2571                 goto out;
2572         }
2573         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2574                                       OCFS2_JOURNAL_ACCESS_WRITE);
2575         if (ret) {
2576                 mlog_errno(ret);
2577                 goto out_commit;
2578         }
2579
2580         di->i_xattr_loc = 0;
2581
2582         spin_lock(&oi->ip_lock);
2583         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2584         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2585         spin_unlock(&oi->ip_lock);
2586
2587         ocfs2_journal_dirty(handle, di_bh);
2588 out_commit:
2589         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2590 out:
2591         if (ref_tree)
2592                 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2593         brelse(ref_root_bh);
2594         return ret;
2595 }
2596
2597 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2598                                         struct ocfs2_dinode *di)
2599 {
2600         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2601         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2602         int free;
2603
2604         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2605                 return 0;
2606
2607         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2608                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2609                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2610         } else if (ocfs2_inode_is_fast_symlink(inode)) {
2611                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2612                         le64_to_cpu(di->i_size);
2613         } else {
2614                 struct ocfs2_extent_list *el = &di->id2.i_list;
2615                 free = (le16_to_cpu(el->l_count) -
2616                         le16_to_cpu(el->l_next_free_rec)) *
2617                         sizeof(struct ocfs2_extent_rec);
2618         }
2619         if (free >= xattrsize)
2620                 return 1;
2621
2622         return 0;
2623 }
2624
2625 /*
2626  * ocfs2_xattr_ibody_find()
2627  *
2628  * Find extended attribute in inode block and
2629  * fill search info into struct ocfs2_xattr_search.
2630  */
2631 static int ocfs2_xattr_ibody_find(struct inode *inode,
2632                                   int name_index,
2633                                   const char *name,
2634                                   struct ocfs2_xattr_search *xs)
2635 {
2636         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2637         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2638         int ret;
2639         int has_space = 0;
2640
2641         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2642                 return 0;
2643
2644         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2645                 down_read(&oi->ip_alloc_sem);
2646                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2647                 up_read(&oi->ip_alloc_sem);
2648                 if (!has_space)
2649                         return 0;
2650         }
2651
2652         xs->xattr_bh = xs->inode_bh;
2653         xs->end = (void *)di + inode->i_sb->s_blocksize;
2654         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2655                 xs->header = (struct ocfs2_xattr_header *)
2656                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2657         else
2658                 xs->header = (struct ocfs2_xattr_header *)
2659                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2660         xs->base = (void *)xs->header;
2661         xs->here = xs->header->xh_entries;
2662
2663         /* Find the named attribute. */
2664         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2665                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2666                 if (ret && ret != -ENODATA)
2667                         return ret;
2668                 xs->not_found = ret;
2669         }
2670
2671         return 0;
2672 }
2673
2674 static int ocfs2_xattr_ibody_init(struct inode *inode,
2675                                   struct buffer_head *di_bh,
2676                                   struct ocfs2_xattr_set_ctxt *ctxt)
2677 {
2678         int ret;
2679         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2680         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2681         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2682         unsigned int xattrsize = osb->s_xattr_inline_size;
2683
2684         if (!ocfs2_xattr_has_space_inline(inode, di)) {
2685                 ret = -ENOSPC;
2686                 goto out;
2687         }
2688
2689         ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2690                                       OCFS2_JOURNAL_ACCESS_WRITE);
2691         if (ret) {
2692                 mlog_errno(ret);
2693                 goto out;
2694         }
2695
2696         /*
2697          * Adjust extent record count or inline data size
2698          * to reserve space for extended attribute.
2699          */
2700         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2701                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2702                 le16_add_cpu(&idata->id_count, -xattrsize);
2703         } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2704                 struct ocfs2_extent_list *el = &di->id2.i_list;
2705                 le16_add_cpu(&el->l_count, -(xattrsize /
2706                                              sizeof(struct ocfs2_extent_rec)));
2707         }
2708         di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2709
2710         spin_lock(&oi->ip_lock);
2711         oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2712         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2713         spin_unlock(&oi->ip_lock);
2714
2715         ocfs2_journal_dirty(ctxt->handle, di_bh);
2716
2717 out:
2718         return ret;
2719 }
2720
2721 /*
2722  * ocfs2_xattr_ibody_set()
2723  *
2724  * Set, replace or remove an extended attribute into inode block.
2725  *
2726  */
2727 static int ocfs2_xattr_ibody_set(struct inode *inode,
2728                                  struct ocfs2_xattr_info *xi,
2729                                  struct ocfs2_xattr_search *xs,
2730                                  struct ocfs2_xattr_set_ctxt *ctxt)
2731 {
2732         int ret;
2733         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2734         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2735         struct ocfs2_xa_loc loc;
2736
2737         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2738                 return -ENOSPC;
2739
2740         down_write(&oi->ip_alloc_sem);
2741         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2742                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2743                         ret = -ENOSPC;
2744                         goto out;
2745                 }
2746         }
2747
2748         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2749                 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2750                 if (ret) {
2751                         if (ret != -ENOSPC)
2752                                 mlog_errno(ret);
2753                         goto out;
2754                 }
2755         }
2756
2757         ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2758                                  xs->not_found ? NULL : xs->here);
2759         ret = ocfs2_xa_set(&loc, xi, ctxt);
2760         if (ret) {
2761                 if (ret != -ENOSPC)
2762                         mlog_errno(ret);
2763                 goto out;
2764         }
2765         xs->here = loc.xl_entry;
2766
2767 out:
2768         up_write(&oi->ip_alloc_sem);
2769
2770         return ret;
2771 }
2772
2773 /*
2774  * ocfs2_xattr_block_find()
2775  *
2776  * Find extended attribute in external block and
2777  * fill search info into struct ocfs2_xattr_search.
2778  */
2779 static int ocfs2_xattr_block_find(struct inode *inode,
2780                                   int name_index,
2781                                   const char *name,
2782                                   struct ocfs2_xattr_search *xs)
2783 {
2784         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2785         struct buffer_head *blk_bh = NULL;
2786         struct ocfs2_xattr_block *xb;
2787         int ret = 0;
2788
2789         if (!di->i_xattr_loc)
2790                 return ret;
2791
2792         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2793                                      &blk_bh);
2794         if (ret < 0) {
2795                 mlog_errno(ret);
2796                 return ret;
2797         }
2798
2799         xs->xattr_bh = blk_bh;
2800         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2801
2802         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2803                 xs->header = &xb->xb_attrs.xb_header;
2804                 xs->base = (void *)xs->header;
2805                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2806                 xs->here = xs->header->xh_entries;
2807
2808                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2809         } else
2810                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2811                                                    name_index,
2812                                                    name, xs);
2813
2814         if (ret && ret != -ENODATA) {
2815                 xs->xattr_bh = NULL;
2816                 goto cleanup;
2817         }
2818         xs->not_found = ret;
2819         return 0;
2820 cleanup:
2821         brelse(blk_bh);
2822
2823         return ret;
2824 }
2825
2826 static int ocfs2_create_xattr_block(struct inode *inode,
2827                                     struct buffer_head *inode_bh,
2828                                     struct ocfs2_xattr_set_ctxt *ctxt,
2829                                     int indexed,
2830                                     struct buffer_head **ret_bh)
2831 {
2832         int ret;
2833         u16 suballoc_bit_start;
2834         u32 num_got;
2835         u64 suballoc_loc, first_blkno;
2836         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2837         struct buffer_head *new_bh = NULL;
2838         struct ocfs2_xattr_block *xblk;
2839
2840         ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2841                                       inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2842         if (ret < 0) {
2843                 mlog_errno(ret);
2844                 goto end;
2845         }
2846
2847         ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2848                                    &suballoc_loc, &suballoc_bit_start,
2849                                    &num_got, &first_blkno);
2850         if (ret < 0) {
2851                 mlog_errno(ret);
2852                 goto end;
2853         }
2854
2855         new_bh = sb_getblk(inode->i_sb, first_blkno);
2856         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2857
2858         ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2859                                       new_bh,
2860                                       OCFS2_JOURNAL_ACCESS_CREATE);
2861         if (ret < 0) {
2862                 mlog_errno(ret);
2863                 goto end;
2864         }
2865
2866         /* Initialize ocfs2_xattr_block */
2867         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2868         memset(xblk, 0, inode->i_sb->s_blocksize);
2869         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2870         xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2871         xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2872         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2873         xblk->xb_fs_generation =
2874                 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2875         xblk->xb_blkno = cpu_to_le64(first_blkno);
2876         if (indexed) {
2877                 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2878                 xr->xt_clusters = cpu_to_le32(1);
2879                 xr->xt_last_eb_blk = 0;
2880                 xr->xt_list.l_tree_depth = 0;
2881                 xr->xt_list.l_count = cpu_to_le16(
2882                                         ocfs2_xattr_recs_per_xb(inode->i_sb));
2883                 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2884                 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2885         }
2886         ocfs2_journal_dirty(ctxt->handle, new_bh);
2887
2888         /* Add it to the inode */
2889         di->i_xattr_loc = cpu_to_le64(first_blkno);
2890
2891         spin_lock(&OCFS2_I(inode)->ip_lock);
2892         OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2893         di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2894         spin_unlock(&OCFS2_I(inode)->ip_lock);
2895
2896         ocfs2_journal_dirty(ctxt->handle, inode_bh);
2897
2898         *ret_bh = new_bh;
2899         new_bh = NULL;
2900
2901 end:
2902         brelse(new_bh);
2903         return ret;
2904 }
2905
2906 /*
2907  * ocfs2_xattr_block_set()
2908  *
2909  * Set, replace or remove an extended attribute into external block.
2910  *
2911  */
2912 static int ocfs2_xattr_block_set(struct inode *inode,
2913                                  struct ocfs2_xattr_info *xi,
2914                                  struct ocfs2_xattr_search *xs,
2915                                  struct ocfs2_xattr_set_ctxt *ctxt)
2916 {
2917         struct buffer_head *new_bh = NULL;
2918         struct ocfs2_xattr_block *xblk = NULL;
2919         int ret;
2920         struct ocfs2_xa_loc loc;
2921
2922         if (!xs->xattr_bh) {
2923                 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2924                                                0, &new_bh);
2925                 if (ret) {
2926                         mlog_errno(ret);
2927                         goto end;
2928                 }
2929
2930                 xs->xattr_bh = new_bh;
2931                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2932                 xs->header = &xblk->xb_attrs.xb_header;
2933                 xs->base = (void *)xs->header;
2934                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2935                 xs->here = xs->header->xh_entries;
2936         } else
2937                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2938
2939         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2940                 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2941                                               xs->not_found ? NULL : xs->here);
2942
2943                 ret = ocfs2_xa_set(&loc, xi, ctxt);
2944                 if (!ret)
2945                         xs->here = loc.xl_entry;
2946                 else if (ret != -ENOSPC)
2947                         goto end;
2948                 else {
2949                         ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2950                         if (ret)
2951                                 goto end;
2952                 }
2953         }
2954
2955         if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
2956                 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2957
2958 end:
2959         return ret;
2960 }
2961
2962 /* Check whether the new xattr can be inserted into the inode. */
2963 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2964                                        struct ocfs2_xattr_info *xi,
2965                                        struct ocfs2_xattr_search *xs)
2966 {
2967         struct ocfs2_xattr_entry *last;
2968         int free, i;
2969         size_t min_offs = xs->end - xs->base;
2970
2971         if (!xs->header)
2972                 return 0;
2973
2974         last = xs->header->xh_entries;
2975
2976         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2977                 size_t offs = le16_to_cpu(last->xe_name_offset);
2978                 if (offs < min_offs)
2979                         min_offs = offs;
2980                 last += 1;
2981         }
2982
2983         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2984         if (free < 0)
2985                 return 0;
2986
2987         BUG_ON(!xs->not_found);
2988
2989         if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
2990                 return 1;
2991
2992         return 0;
2993 }
2994
2995 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2996                                      struct ocfs2_dinode *di,
2997                                      struct ocfs2_xattr_info *xi,
2998                                      struct ocfs2_xattr_search *xis,
2999                                      struct ocfs2_xattr_search *xbs,
3000                                      int *clusters_need,
3001                                      int *meta_need,
3002                                      int *credits_need)
3003 {
3004         int ret = 0, old_in_xb = 0;
3005         int clusters_add = 0, meta_add = 0, credits = 0;
3006         struct buffer_head *bh = NULL;
3007         struct ocfs2_xattr_block *xb = NULL;
3008         struct ocfs2_xattr_entry *xe = NULL;
3009         struct ocfs2_xattr_value_root *xv = NULL;
3010         char *base = NULL;
3011         int name_offset, name_len = 0;
3012         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3013                                                     xi->xi_value_len);
3014         u64 value_size;
3015
3016         /*
3017          * Calculate the clusters we need to write.
3018          * No matter whether we replace an old one or add a new one,
3019          * we need this for writing.
3020          */
3021         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3022                 credits += new_clusters *
3023                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
3024
3025         if (xis->not_found && xbs->not_found) {
3026                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3027
3028                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3029                         clusters_add += new_clusters;
3030                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3031                                                         &def_xv.xv.xr_list,
3032                                                         new_clusters);
3033                 }
3034
3035                 goto meta_guess;
3036         }
3037
3038         if (!xis->not_found) {
3039                 xe = xis->here;
3040                 name_offset = le16_to_cpu(xe->xe_name_offset);
3041                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3042                 base = xis->base;
3043                 credits += OCFS2_INODE_UPDATE_CREDITS;
3044         } else {
3045                 int i, block_off = 0;
3046                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3047                 xe = xbs->here;
3048                 name_offset = le16_to_cpu(xe->xe_name_offset);
3049                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3050                 i = xbs->here - xbs->header->xh_entries;
3051                 old_in_xb = 1;
3052
3053                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3054                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3055                                                         bucket_xh(xbs->bucket),
3056                                                         i, &block_off,
3057                                                         &name_offset);
3058                         base = bucket_block(xbs->bucket, block_off);
3059                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3060                 } else {
3061                         base = xbs->base;
3062                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3063                 }
3064         }
3065
3066         /*
3067          * delete a xattr doesn't need metadata and cluster allocation.
3068          * so just calculate the credits and return.
3069          *
3070          * The credits for removing the value tree will be extended
3071          * by ocfs2_remove_extent itself.
3072          */
3073         if (!xi->xi_value) {
3074                 if (!ocfs2_xattr_is_local(xe))
3075                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3076
3077                 goto out;
3078         }
3079
3080         /* do cluster allocation guess first. */
3081         value_size = le64_to_cpu(xe->xe_value_size);
3082
3083         if (old_in_xb) {
3084                 /*
3085                  * In xattr set, we always try to set the xe in inode first,
3086                  * so if it can be inserted into inode successfully, the old
3087                  * one will be removed from the xattr block, and this xattr
3088                  * will be inserted into inode as a new xattr in inode.
3089                  */
3090                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3091                         clusters_add += new_clusters;
3092                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
3093                                     OCFS2_INODE_UPDATE_CREDITS;
3094                         if (!ocfs2_xattr_is_local(xe))
3095                                 credits += ocfs2_calc_extend_credits(
3096                                                         inode->i_sb,
3097                                                         &def_xv.xv.xr_list,
3098                                                         new_clusters);
3099                         goto out;
3100                 }
3101         }
3102
3103         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3104                 /* the new values will be stored outside. */
3105                 u32 old_clusters = 0;
3106
3107                 if (!ocfs2_xattr_is_local(xe)) {
3108                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3109                                                                  value_size);
3110                         xv = (struct ocfs2_xattr_value_root *)
3111                              (base + name_offset + name_len);
3112                         value_size = OCFS2_XATTR_ROOT_SIZE;
3113                 } else
3114                         xv = &def_xv.xv;
3115
3116                 if (old_clusters >= new_clusters) {
3117                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3118                         goto out;
3119                 } else {
3120                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3121                         clusters_add += new_clusters - old_clusters;
3122                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3123                                                              &xv->xr_list,
3124                                                              new_clusters -
3125                                                              old_clusters);
3126                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3127                                 goto out;
3128                 }
3129         } else {
3130                 /*
3131                  * Now the new value will be stored inside. So if the new
3132                  * value is smaller than the size of value root or the old
3133                  * value, we don't need any allocation, otherwise we have
3134                  * to guess metadata allocation.
3135                  */
3136                 if ((ocfs2_xattr_is_local(xe) &&
3137                      (value_size >= xi->xi_value_len)) ||
3138                     (!ocfs2_xattr_is_local(xe) &&
3139                      OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3140                         goto out;
3141         }
3142
3143 meta_guess:
3144         /* calculate metadata allocation. */
3145         if (di->i_xattr_loc) {
3146                 if (!xbs->xattr_bh) {
3147                         ret = ocfs2_read_xattr_block(inode,
3148                                                      le64_to_cpu(di->i_xattr_loc),
3149                                                      &bh);
3150                         if (ret) {
3151                                 mlog_errno(ret);
3152                                 goto out;
3153                         }
3154
3155                         xb = (struct ocfs2_xattr_block *)bh->b_data;
3156                 } else
3157                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3158
3159                 /*
3160                  * If there is already an xattr tree, good, we can calculate
3161                  * like other b-trees. Otherwise we may have the chance of
3162                  * create a tree, the credit calculation is borrowed from
3163                  * ocfs2_calc_extend_credits with root_el = NULL. And the
3164                  * new tree will be cluster based, so no meta is needed.
3165                  */
3166                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3167                         struct ocfs2_extent_list *el =
3168                                  &xb->xb_attrs.xb_root.xt_list;
3169                         meta_add += ocfs2_extend_meta_needed(el);
3170                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3171                                                              el, 1);
3172                 } else
3173                         credits += OCFS2_SUBALLOC_ALLOC + 1;
3174
3175                 /*
3176                  * This cluster will be used either for new bucket or for
3177                  * new xattr block.
3178                  * If the cluster size is the same as the bucket size, one
3179                  * more is needed since we may need to extend the bucket
3180                  * also.
3181                  */
3182                 clusters_add += 1;
3183                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3184                 if (OCFS2_XATTR_BUCKET_SIZE ==
3185                         OCFS2_SB(inode->i_sb)->s_clustersize) {
3186                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3187                         clusters_add += 1;
3188                 }
3189         } else {
3190                 meta_add += 1;
3191                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3192         }
3193 out:
3194         if (clusters_need)
3195                 *clusters_need = clusters_add;
3196         if (meta_need)
3197                 *meta_need = meta_add;
3198         if (credits_need)
3199                 *credits_need = credits;
3200         brelse(bh);
3201         return ret;
3202 }
3203
3204 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3205                                      struct ocfs2_dinode *di,
3206                                      struct ocfs2_xattr_info *xi,
3207                                      struct ocfs2_xattr_search *xis,
3208                                      struct ocfs2_xattr_search *xbs,
3209                                      struct ocfs2_xattr_set_ctxt *ctxt,
3210                                      int extra_meta,
3211                                      int *credits)
3212 {
3213         int clusters_add, meta_add, ret;
3214         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3215
3216         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3217
3218         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3219
3220         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3221                                         &clusters_add, &meta_add, credits);
3222         if (ret) {
3223                 mlog_errno(ret);
3224                 return ret;
3225         }
3226
3227         meta_add += extra_meta;
3228         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3229              "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3230
3231         if (meta_add) {
3232                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3233                                                         &ctxt->meta_ac);
3234                 if (ret) {
3235                         mlog_errno(ret);
3236                         goto out;
3237                 }
3238         }
3239
3240         if (clusters_add) {
3241                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3242                 if (ret)
3243                         mlog_errno(ret);
3244         }
3245 out:
3246         if (ret) {
3247                 if (ctxt->meta_ac) {
3248                         ocfs2_free_alloc_context(ctxt->meta_ac);
3249                         ctxt->meta_ac = NULL;
3250                 }
3251
3252                 /*
3253                  * We cannot have an error and a non null ctxt->data_ac.
3254                  */
3255         }
3256
3257         return ret;
3258 }
3259
3260 static int __ocfs2_xattr_set_handle(struct inode *inode,
3261                                     struct ocfs2_dinode *di,
3262                                     struct ocfs2_xattr_info *xi,
3263                                     struct ocfs2_xattr_search *xis,
3264                                     struct ocfs2_xattr_search *xbs,
3265                                     struct ocfs2_xattr_set_ctxt *ctxt)
3266 {
3267         int ret = 0, credits, old_found;
3268
3269         if (!xi->xi_value) {
3270                 /* Remove existing extended attribute */
3271                 if (!xis->not_found)
3272                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3273                 else if (!xbs->not_found)
3274                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3275         } else {
3276                 /* We always try to set extended attribute into inode first*/
3277                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3278                 if (!ret && !xbs->not_found) {
3279                         /*
3280                          * If succeed and that extended attribute existing in
3281                          * external block, then we will remove it.
3282                          */
3283                         xi->xi_value = NULL;
3284                         xi->xi_value_len = 0;
3285
3286                         old_found = xis->not_found;
3287                         xis->not_found = -ENODATA;
3288                         ret = ocfs2_calc_xattr_set_need(inode,
3289                                                         di,
3290                                                         xi,
3291                                                         xis,
3292                                                         xbs,
3293                                                         NULL,
3294                                                         NULL,
3295                                                         &credits);
3296                         xis->not_found = old_found;
3297                         if (ret) {
3298                                 mlog_errno(ret);
3299                                 goto out;
3300                         }
3301
3302                         ret = ocfs2_extend_trans(ctxt->handle, credits);
3303                         if (ret) {
3304                                 mlog_errno(ret);
3305                                 goto out;
3306                         }
3307                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3308                 } else if (ret == -ENOSPC) {
3309                         if (di->i_xattr_loc && !xbs->xattr_bh) {
3310                                 ret = ocfs2_xattr_block_find(inode,
3311                                                              xi->xi_name_index,
3312                                                              xi->xi_name, xbs);
3313                                 if (ret)
3314                                         goto out;
3315
3316                                 old_found = xis->not_found;
3317                                 xis->not_found = -ENODATA;
3318                                 ret = ocfs2_calc_xattr_set_need(inode,
3319                                                                 di,
3320                                                                 xi,
3321                                                                 xis,
3322                                                                 xbs,
3323                                                                 NULL,
3324                                                                 NULL,
3325                                                                 &credits);
3326                                 xis->not_found = old_found;
3327                                 if (ret) {
3328                                         mlog_errno(ret);
3329                                         goto out;
3330                                 }
3331
3332                                 ret = ocfs2_extend_trans(ctxt->handle, credits);
3333                                 if (ret) {
3334                                         mlog_errno(ret);
3335                                         goto out;
3336                                 }
3337                         }
3338                         /*
3339                          * If no space in inode, we will set extended attribute
3340                          * into external block.
3341                          */
3342                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3343                         if (ret)
3344                                 goto out;
3345                         if (!xis->not_found) {
3346                                 /*
3347                                  * If succeed and that extended attribute
3348                                  * existing in inode, we will remove it.
3349                                  */
3350                                 xi->xi_value = NULL;
3351                                 xi->xi_value_len = 0;
3352                                 xbs->not_found = -ENODATA;
3353                                 ret = ocfs2_calc_xattr_set_need(inode,
3354                                                                 di,
3355                                                                 xi,
3356                                                                 xis,
3357                                                                 xbs,
3358                                                                 NULL,
3359                                                                 NULL,
3360                                                                 &credits);
3361                                 if (ret) {
3362                                         mlog_errno(ret);
3363                                         goto out;
3364                                 }
3365
3366                                 ret = ocfs2_extend_trans(ctxt->handle, credits);
3367                                 if (ret) {
3368                                         mlog_errno(ret);
3369                                         goto out;
3370                                 }
3371                                 ret = ocfs2_xattr_ibody_set(inode, xi,
3372                                                             xis, ctxt);
3373                         }
3374                 }
3375         }
3376
3377         if (!ret) {
3378                 /* Update inode ctime. */
3379                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3380                                               xis->inode_bh,
3381                                               OCFS2_JOURNAL_ACCESS_WRITE);
3382                 if (ret) {
3383                         mlog_errno(ret);
3384                         goto out;
3385                 }
3386
3387                 inode->i_ctime = CURRENT_TIME;
3388                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3389                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3390                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3391         }
3392 out:
3393         return ret;
3394 }
3395
3396 /*
3397  * This function only called duing creating inode
3398  * for init security/acl xattrs of the new inode.
3399  * All transanction credits have been reserved in mknod.
3400  */
3401 int ocfs2_xattr_set_handle(handle_t *handle,
3402                            struct inode *inode,
3403                            struct buffer_head *di_bh,
3404                            int name_index,
3405                            const char *name,
3406                            const void *value,
3407                            size_t value_len,
3408                            int flags,
3409                            struct ocfs2_alloc_context *meta_ac,
3410                            struct ocfs2_alloc_context *data_ac)
3411 {
3412         struct ocfs2_dinode *di;
3413         int ret;
3414
3415         struct ocfs2_xattr_info xi = {
3416                 .xi_name_index = name_index,
3417                 .xi_name = name,
3418                 .xi_name_len = strlen(name),
3419                 .xi_value = value,
3420                 .xi_value_len = value_len,
3421         };
3422
3423         struct ocfs2_xattr_search xis = {
3424                 .not_found = -ENODATA,
3425         };
3426
3427         struct ocfs2_xattr_search xbs = {
3428                 .not_found = -ENODATA,
3429         };
3430
3431         struct ocfs2_xattr_set_ctxt ctxt = {
3432                 .handle = handle,
3433                 .meta_ac = meta_ac,
3434                 .data_ac = data_ac,
3435         };
3436
3437         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3438                 return -EOPNOTSUPP;
3439
3440         /*
3441          * In extreme situation, may need xattr bucket when
3442          * block size is too small. And we have already reserved
3443          * the credits for bucket in mknod.
3444          */
3445         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3446                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
3447                 if (!xbs.bucket) {
3448                         mlog_errno(-ENOMEM);
3449                         return -ENOMEM;
3450                 }
3451         }
3452
3453         xis.inode_bh = xbs.inode_bh = di_bh;
3454         di = (struct ocfs2_dinode *)di_bh->b_data;
3455
3456         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3457
3458         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3459         if (ret)
3460                 goto cleanup;
3461         if (xis.not_found) {
3462                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3463                 if (ret)
3464                         goto cleanup;
3465         }
3466
3467         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3468
3469 cleanup:
3470         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3471         brelse(xbs.xattr_bh);
3472         ocfs2_xattr_bucket_free(xbs.bucket);
3473
3474         return ret;
3475 }
3476
3477 /*
3478  * ocfs2_xattr_set()
3479  *
3480  * Set, replace or remove an extended attribute for this inode.
3481  * value is NULL to remove an existing extended attribute, else either
3482  * create or replace an extended attribute.
3483  */
3484 int ocfs2_xattr_set(struct inode *inode,
3485                     int name_index,
3486                     const char *name,
3487                     const void *value,
3488                     size_t value_len,
3489                     int flags)
3490 {
3491         struct buffer_head *di_bh = NULL;
3492         struct ocfs2_dinode *di;
3493         int ret, credits, ref_meta = 0, ref_credits = 0;
3494         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3495         struct inode *tl_inode = osb->osb_tl_inode;
3496         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3497         struct ocfs2_refcount_tree *ref_tree = NULL;
3498
3499         struct ocfs2_xattr_info xi = {
3500                 .xi_name_index = name_index,
3501                 .xi_name = name,
3502                 .xi_name_len = strlen(name),
3503                 .xi_value = value,
3504                 .xi_value_len = value_len,
3505         };
3506
3507         struct ocfs2_xattr_search xis = {
3508                 .not_found = -ENODATA,
3509         };
3510
3511         struct ocfs2_xattr_search xbs = {
3512                 .not_found = -ENODATA,
3513         };
3514
3515         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3516                 return -EOPNOTSUPP;
3517
3518         /*
3519          * Only xbs will be used on indexed trees.  xis doesn't need a
3520          * bucket.
3521          */
3522         xbs.bucket = ocfs2_xattr_bucket_new(inode);
3523         if (!xbs.bucket) {
3524                 mlog_errno(-ENOMEM);
3525                 return -ENOMEM;
3526         }
3527
3528         ret = ocfs2_inode_lock(inode, &di_bh, 1);
3529         if (ret < 0) {
3530                 mlog_errno(ret);
3531                 goto cleanup_nolock;
3532         }
3533         xis.inode_bh = xbs.inode_bh = di_bh;
3534         di = (struct ocfs2_dinode *)di_bh->b_data;
3535
3536         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3537         /*
3538          * Scan inode and external block to find the same name
3539          * extended attribute and collect search infomation.
3540          */
3541         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3542         if (ret)
3543                 goto cleanup;
3544         if (xis.not_found) {
3545                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3546                 if (ret)
3547                         goto cleanup;
3548         }
3549
3550         if (xis.not_found && xbs.not_found) {
3551                 ret = -ENODATA;
3552                 if (flags & XATTR_REPLACE)
3553                         goto cleanup;
3554                 ret = 0;
3555                 if (!value)
3556                         goto cleanup;
3557         } else {
3558                 ret = -EEXIST;
3559                 if (flags & XATTR_CREATE)
3560                         goto cleanup;
3561         }
3562
3563         /* Check whether the value is refcounted and do some prepartion. */
3564         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3565             (!xis.not_found || !xbs.not_found)) {
3566                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3567                                                    &xis, &xbs, &ref_tree,
3568                                                    &ref_meta, &ref_credits);
3569                 if (ret) {
3570                         mlog_errno(ret);
3571                         goto cleanup;
3572                 }
3573         }
3574
3575         mutex_lock(&tl_inode->i_mutex);
3576
3577         if (ocfs2_truncate_log_needs_flush(osb)) {
3578                 ret = __ocfs2_flush_truncate_log(osb);
3579                 if (ret < 0) {
3580                         mutex_unlock(&tl_inode->i_mutex);
3581                         mlog_errno(ret);
3582                         goto cleanup;
3583                 }
3584         }
3585         mutex_unlock(&tl_inode->i_mutex);
3586
3587         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3588                                         &xbs, &ctxt, ref_meta, &credits);
3589         if (ret) {
3590                 mlog_errno(ret);
3591                 goto cleanup;
3592         }
3593
3594         /* we need to update inode's ctime field, so add credit for it. */
3595         credits += OCFS2_INODE_UPDATE_CREDITS;
3596         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3597         if (IS_ERR(ctxt.handle)) {
3598                 ret = PTR_ERR(ctxt.handle);
3599                 mlog_errno(ret);
3600                 goto cleanup;
3601         }
3602
3603         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3604
3605         ocfs2_commit_trans(osb, ctxt.handle);
3606
3607         if (ctxt.data_ac)
3608                 ocfs2_free_alloc_context(ctxt.data_ac);
3609         if (ctxt.meta_ac)
3610                 ocfs2_free_alloc_context(ctxt.meta_ac);
3611         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3612                 ocfs2_schedule_truncate_log_flush(osb, 1);
3613         ocfs2_run_deallocs(osb, &ctxt.dealloc);
3614
3615 cleanup:
3616         if (ref_tree)
3617                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3618         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3619         if (!value && !ret) {
3620                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3621                 if (ret)
3622                         mlog_errno(ret);
3623         }
3624         ocfs2_inode_unlock(inode, 1);
3625 cleanup_nolock:
3626         brelse(di_bh);
3627         brelse(xbs.xattr_bh);
3628         ocfs2_xattr_bucket_free(xbs.bucket);
3629
3630         return ret;
3631 }
3632
3633 /*
3634  * Find the xattr extent rec which may contains name_hash.
3635  * e_cpos will be the first name hash of the xattr rec.
3636  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3637  */
3638 static int ocfs2_xattr_get_rec(struct inode *inode,
3639                                u32 name_hash,
3640                                u64 *p_blkno,
3641                                u32 *e_cpos,
3642                                u32 *num_clusters,
3643                                struct ocfs2_extent_list *el)
3644 {
3645         int ret = 0, i;
3646         struct buffer_head *eb_bh = NULL;
3647         struct ocfs2_extent_block *eb;
3648         struct ocfs2_extent_rec *rec = NULL;
3649         u64 e_blkno = 0;
3650
3651         if (el->l_tree_depth) {
3652                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3653                                       &eb_bh);
3654                 if (ret) {
3655                         mlog_errno(ret);
3656                         goto out;
3657                 }
3658
3659                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3660                 el = &eb->h_list;
3661
3662                 if (el->l_tree_depth) {
3663                         ocfs2_error(inode->i_sb,
3664                                     "Inode %lu has non zero tree depth in "
3665                                     "xattr tree block %llu\n", inode->i_ino,
3666                                     (unsigned long long)eb_bh->b_blocknr);
3667                         ret = -EROFS;
3668                         goto out;
3669                 }
3670         }
3671
3672         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3673                 rec = &el->l_recs[i];
3674
3675                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3676                         e_blkno = le64_to_cpu(rec->e_blkno);
3677                         break;
3678                 }
3679         }
3680
3681         if (!e_blkno) {
3682                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3683                             "record (%u, %u, 0) in xattr", inode->i_ino,
3684                             le32_to_cpu(rec->e_cpos),
3685                             ocfs2_rec_clusters(el, rec));
3686                 ret = -EROFS;
3687                 goto out;
3688         }
3689
3690         *p_blkno = le64_to_cpu(rec->e_blkno);
3691         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3692         if (e_cpos)
3693                 *e_cpos = le32_to_cpu(rec->e_cpos);
3694 out:
3695         brelse(eb_bh);
3696         return ret;
3697 }
3698
3699 typedef int (xattr_bucket_func)(struct inode *inode,
3700                                 struct ocfs2_xattr_bucket *bucket,
3701                                 void *para);
3702
3703 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3704                                    struct ocfs2_xattr_bucket *bucket,
3705                                    int name_index,
3706                                    const char *name,
3707                                    u32 name_hash,
3708                                    u16 *xe_index,
3709                                    int *found)
3710 {
3711         int i, ret = 0, cmp = 1, block_off, new_offset;
3712         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3713         size_t name_len = strlen(name);
3714         struct ocfs2_xattr_entry *xe = NULL;
3715         char *xe_name;
3716
3717         /*
3718          * We don't use binary search in the bucket because there
3719          * may be multiple entries with the same name hash.
3720          */
3721         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3722                 xe = &xh->xh_entries[i];
3723
3724                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3725                         continue;
3726                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3727                         break;
3728
3729                 cmp = name_index - ocfs2_xattr_get_type(xe);
3730                 if (!cmp)
3731                         cmp = name_len - xe->xe_name_len;
3732                 if (cmp)
3733                         continue;
3734
3735                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3736                                                         xh,
3737                                                         i,
3738                                                         &block_off,
3739                                                         &new_offset);
3740                 if (ret) {
3741                         mlog_errno(ret);
3742                         break;
3743                 }
3744
3745
3746                 xe_name = bucket_block(bucket, block_off) + new_offset;
3747                 if (!memcmp(name, xe_name, name_len)) {
3748                         *xe_index = i;
3749                         *found = 1;
3750                         ret = 0;
3751                         break;
3752                 }
3753         }
3754
3755         return ret;
3756 }
3757
3758 /*
3759  * Find the specified xattr entry in a series of buckets.
3760  * This series start from p_blkno and last for num_clusters.
3761  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3762  * the num of the valid buckets.
3763  *
3764  * Return the buffer_head this xattr should reside in. And if the xattr's
3765  * hash is in the gap of 2 buckets, return the lower bucket.
3766  */
3767 static int ocfs2_xattr_bucket_find(struct inode *inode,
3768                                    int name_index,
3769                                    const char *name,
3770                                    u32 name_hash,
3771                                    u64 p_blkno,
3772                                    u32 first_hash,
3773                                    u32 num_clusters,
3774                                    struct ocfs2_xattr_search *xs)
3775 {
3776         int ret, found = 0;
3777         struct ocfs2_xattr_header *xh = NULL;
3778         struct ocfs2_xattr_entry *xe = NULL;
3779         u16 index = 0;
3780         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3781         int low_bucket = 0, bucket, high_bucket;
3782         struct ocfs2_xattr_bucket *search;
3783         u32 last_hash;
3784         u64 blkno, lower_blkno = 0;
3785
3786         search = ocfs2_xattr_bucket_new(inode);
3787         if (!search) {
3788                 ret = -ENOMEM;
3789                 mlog_errno(ret);
3790                 goto out;
3791         }
3792
3793         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3794         if (ret) {
3795                 mlog_errno(ret);
3796                 goto out;
3797         }
3798
3799         xh = bucket_xh(search);
3800         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3801         while (low_bucket <= high_bucket) {
3802                 ocfs2_xattr_bucket_relse(search);
3803
3804                 bucket = (low_bucket + high_bucket) / 2;
3805                 blkno = p_blkno + bucket * blk_per_bucket;
3806                 ret = ocfs2_read_xattr_bucket(search, blkno);
3807                 if (ret) {
3808                         mlog_errno(ret);
3809                         goto out;
3810                 }
3811
3812                 xh = bucket_xh(search);
3813                 xe = &xh->xh_entries[0];
3814                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3815                         high_bucket = bucket - 1;
3816                         continue;
3817                 }
3818
3819                 /*
3820                  * Check whether the hash of the last entry in our
3821                  * bucket is larger than the search one. for an empty
3822                  * bucket, the last one is also the first one.
3823                  */
3824                 if (xh->xh_count)
3825                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3826
3827                 last_hash = le32_to_cpu(xe->xe_name_hash);
3828
3829                 /* record lower_blkno which may be the insert place. */
3830                 lower_blkno = blkno;
3831
3832                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3833                         low_bucket = bucket + 1;
3834                         continue;
3835                 }
3836
3837                 /* the searched xattr should reside in this bucket if exists. */
3838                 ret = ocfs2_find_xe_in_bucket(inode, search,
3839                                               name_index, name, name_hash,
3840                                               &index, &found);
3841                 if (ret) {
3842                         mlog_errno(ret);
3843                         goto out;
3844                 }
3845                 break;
3846         }
3847
3848         /*
3849          * Record the bucket we have found.
3850          * When the xattr's hash value is in the gap of 2 buckets, we will
3851          * always set it to the previous bucket.
3852          */
3853         if (!lower_blkno)
3854                 lower_blkno = p_blkno;
3855
3856         /* This should be in cache - we just read it during the search */
3857         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3858         if (ret) {
3859                 mlog_errno(ret);
3860                 goto out;
3861         }
3862
3863         xs->header = bucket_xh(xs->bucket);
3864         xs->base = bucket_block(xs->bucket, 0);
3865         xs->end = xs->base + inode->i_sb->s_blocksize;
3866
3867         if (found) {
3868                 xs->here = &xs->header->xh_entries[index];
3869                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3870                      (unsigned long long)bucket_blkno(xs->bucket), index);
3871         } else
3872                 ret = -ENODATA;
3873
3874 out:
3875         ocfs2_xattr_bucket_free(search);
3876         return ret;
3877 }
3878
3879 static int ocfs2_xattr_index_block_find(struct inode *inode,
3880                                         struct buffer_head *root_bh,
3881                                         int name_index,
3882                                         const char *name,
3883                                         struct ocfs2_xattr_search *xs)
3884 {
3885         int ret;
3886         struct ocfs2_xattr_block *xb =
3887                         (struct ocfs2_xattr_block *)root_bh->b_data;
3888         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3889         struct ocfs2_extent_list *el = &xb_root->xt_list;
3890         u64 p_blkno = 0;
3891         u32 first_hash, num_clusters = 0;
3892         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3893
3894         if (le16_to_cpu(el->l_next_free_rec) == 0)
3895                 return -ENODATA;
3896
3897         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3898              name, name_hash, name_index);
3899
3900         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3901                                   &num_clusters, el);
3902         if (ret) {
3903                 mlog_errno(ret);
3904                 goto out;
3905         }
3906
3907         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3908
3909         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3910              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3911              first_hash);
3912
3913         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3914                                       p_blkno, first_hash, num_clusters, xs);
3915
3916 out:
3917         return ret;
3918 }
3919
3920 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3921                                        u64 blkno,
3922                                        u32 clusters,
3923                                        xattr_bucket_func *func,
3924                                        void *para)
3925 {
3926         int i, ret = 0;
3927         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3928         u32 num_buckets = clusters * bpc;
3929         struct ocfs2_xattr_bucket *bucket;
3930
3931         bucket = ocfs2_xattr_bucket_new(inode);
3932         if (!bucket) {
3933                 mlog_errno(-ENOMEM);
3934                 return -ENOMEM;
3935         }
3936
3937         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3938              clusters, (unsigned long long)blkno);
3939
3940         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3941                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3942                 if (ret) {
3943                         mlog_errno(ret);
3944                         break;
3945                 }
3946
3947                 /*
3948                  * The real bucket num in this series of blocks is stored
3949                  * in the 1st bucket.
3950                  */
3951                 if (i == 0)
3952                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3953
3954                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3955                      (unsigned long long)blkno,
3956                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3957                 if (func) {
3958                         ret = func(inode, bucket, para);
3959                         if (ret && ret != -ERANGE)
3960                                 mlog_errno(ret);
3961                         /* Fall through to bucket_relse() */
3962                 }
3963
3964                 ocfs2_xattr_bucket_relse(bucket);
3965                 if (ret)
3966                         break;
3967         }
3968
3969         ocfs2_xattr_bucket_free(bucket);
3970         return ret;
3971 }
3972
3973 struct ocfs2_xattr_tree_list {
3974         char *buffer;
3975         size_t buffer_size;
3976         size_t result;
3977 };
3978
3979 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3980                                              struct ocfs2_xattr_header *xh,
3981                                              int index,
3982                                              int *block_off,
3983                                              int *new_offset)
3984 {
3985         u16 name_offset;
3986
3987         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3988                 return -EINVAL;
3989
3990         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3991
3992         *block_off = name_offset >> sb->s_blocksize_bits;
3993         *new_offset = name_offset % sb->s_blocksize;
3994
3995         return 0;
3996 }
3997
3998 static int ocfs2_list_xattr_bucket(struct inode *inode,
3999                                    struct ocfs2_xattr_bucket *bucket,
4000                                    void *para)
4001 {
4002         int ret = 0, type;
4003         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4004         int i, block_off, new_offset;
4005         const char *prefix, *name;
4006
4007         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4008                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4009                 type = ocfs2_xattr_get_type(entry);
4010                 prefix = ocfs2_xattr_prefix(type);
4011
4012                 if (prefix) {
4013                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4014                                                                 bucket_xh(bucket),
4015                                                                 i,
4016                                                                 &block_off,
4017                                                                 &new_offset);
4018                         if (ret)
4019                                 break;
4020
4021                         name = (const char *)bucket_block(bucket, block_off) +
4022                                 new_offset;
4023                         ret = ocfs2_xattr_list_entry(xl->buffer,
4024                                                      xl->buffer_size,
4025                                                      &xl->result,
4026                                                      prefix, name,
4027                                                      entry->xe_name_len);
4028                         if (ret)
4029                                 break;
4030                 }
4031         }
4032
4033         return ret;
4034 }
4035
4036 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4037                                            struct buffer_head *blk_bh,
4038                                            xattr_tree_rec_func *rec_func,
4039                                            void *para)
4040 {
4041         struct ocfs2_xattr_block *xb =
4042                         (struct ocfs2_xattr_block *)blk_bh->b_data;
4043         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4044         int ret = 0;
4045         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4046         u64 p_blkno = 0;
4047
4048         if (!el->l_next_free_rec || !rec_func)
4049                 return 0;
4050
4051         while (name_hash > 0) {
4052                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4053                                           &e_cpos, &num_clusters, el);
4054                 if (ret) {
4055                         mlog_errno(ret);
4056                         break;
4057                 }
4058
4059                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4060                                num_clusters, para);
4061                 if (ret) {
4062                         if (ret != -ERANGE)
4063                                 mlog_errno(ret);
4064                         break;
4065                 }
4066
4067                 if (e_cpos == 0)
4068                         break;
4069
4070                 name_hash = e_cpos - 1;
4071         }
4072
4073         return ret;
4074
4075 }
4076
4077 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4078                                      struct buffer_head *root_bh,
4079                                      u64 blkno, u32 cpos, u32 len, void *para)
4080 {
4081         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4082                                            ocfs2_list_xattr_bucket, para);
4083 }
4084
4085 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4086                                              struct buffer_head *blk_bh,
4087                                              char *buffer,
4088                                              size_t buffer_size)
4089 {
4090         int ret;
4091         struct ocfs2_xattr_tree_list xl = {
4092                 .buffer = buffer,
4093                 .buffer_size = buffer_size,
4094                 .result = 0,
4095         };
4096
4097         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4098                                               ocfs2_list_xattr_tree_rec, &xl);
4099         if (ret) {
4100                 mlog_errno(ret);
4101                 goto out;
4102         }
4103
4104         ret = xl.result;
4105 out:
4106         return ret;
4107 }
4108
4109 static int cmp_xe(const void *a, const void *b)
4110 {
4111         const struct ocfs2_xattr_entry *l = a, *r = b;
4112         u32 l_hash = le32_to_cpu(l->xe_name_hash);
4113         u32 r_hash = le32_to_cpu(r->xe_name_hash);
4114
4115         if (l_hash > r_hash)
4116                 return 1;
4117         if (l_hash < r_hash)
4118                 return -1;
4119         return 0;
4120 }
4121
4122 static void swap_xe(void *a, void *b, int size)
4123 {
4124         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4125
4126         tmp = *l;
4127         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4128         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4129 }
4130
4131 /*
4132  * When the ocfs2_xattr_block is filled up, new bucket will be created
4133  * and all the xattr entries will be moved to the new bucket.
4134  * The header goes at the start of the bucket, and the names+values are
4135  * filled from the end.  This is why *target starts as the last buffer.
4136  * Note: we need to sort the entries since they are not saved in order
4137  * in the ocfs2_xattr_block.
4138  */
4139 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4140                                            struct buffer_head *xb_bh,
4141                                            struct ocfs2_xattr_bucket *bucket)
4142 {
4143         int i, blocksize = inode->i_sb->s_blocksize;
4144         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4145         u16 offset, size, off_change;
4146         struct ocfs2_xattr_entry *xe;
4147         struct ocfs2_xattr_block *xb =
4148                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
4149         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4150         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4151         u16 count = le16_to_cpu(xb_xh->xh_count);
4152         char *src = xb_bh->b_data;
4153         char *target = bucket_block(bucket, blks - 1);
4154
4155         mlog(0, "cp xattr from block %llu to bucket %llu\n",
4156              (unsigned long long)xb_bh->b_blocknr,
4157              (unsigned long long)bucket_blkno(bucket));
4158
4159         for (i = 0; i < blks; i++)
4160                 memset(bucket_block(bucket, i), 0, blocksize);
4161
4162         /*
4163          * Since the xe_name_offset is based on ocfs2_xattr_header,
4164          * there is a offset change corresponding to the change of
4165          * ocfs2_xattr_header's position.
4166          */
4167         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4168         xe = &xb_xh->xh_entries[count - 1];
4169         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4170         size = blocksize - offset;
4171
4172         /* copy all the names and values. */
4173         memcpy(target + offset, src + offset, size);
4174
4175         /* Init new header now. */
4176         xh->xh_count = xb_xh->xh_count;
4177         xh->xh_num_buckets = cpu_to_le16(1);
4178         xh->xh_name_value_len = cpu_to_le16(size);
4179         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4180
4181         /* copy all the entries. */
4182         target = bucket_block(bucket, 0);
4183         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4184         size = count * sizeof(struct ocfs2_xattr_entry);
4185         memcpy(target + offset, (char *)xb_xh + offset, size);
4186
4187         /* Change the xe offset for all the xe because of the move. */
4188         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4189                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4190         for (i = 0; i < count; i++)
4191                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4192
4193         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4194              offset, size, off_change);
4195
4196         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4197              cmp_xe, swap_xe);
4198 }
4199
4200 /*
4201  * After we move xattr from block to index btree, we have to
4202  * update ocfs2_xattr_search to the new xe and base.
4203  *
4204  * When the entry is in xattr block, xattr_bh indicates the storage place.
4205  * While if the entry is in index b-tree, "bucket" indicates the
4206  * real place of the xattr.
4207  */
4208 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4209                                             struct ocfs2_xattr_search *xs,
4210                                             struct buffer_head *old_bh)
4211 {
4212         char *buf = old_bh->b_data;
4213         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4214         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4215         int i;
4216
4217         xs->header = bucket_xh(xs->bucket);
4218         xs->base = bucket_block(xs->bucket, 0);
4219         xs->end = xs->base + inode->i_sb->s_blocksize;
4220
4221         if (xs->not_found)
4222                 return;
4223
4224         i = xs->here - old_xh->xh_entries;
4225         xs->here = &xs->header->xh_entries[i];
4226 }
4227
4228 static int ocfs2_xattr_create_index_block(struct inode *inode,
4229                                           struct ocfs2_xattr_search *xs,
4230                                           struct ocfs2_xattr_set_ctxt *ctxt)
4231 {
4232         int ret;
4233         u32 bit_off, len;
4234         u64 blkno;
4235         handle_t *handle = ctxt->handle;
4236         struct ocfs2_inode_info *oi = OCFS2_I(inode);
4237         struct buffer_head *xb_bh = xs->xattr_bh;
4238         struct ocfs2_xattr_block *xb =
4239                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4240         struct ocfs2_xattr_tree_root *xr;
4241         u16 xb_flags = le16_to_cpu(xb->xb_flags);
4242
4243         mlog(0, "create xattr index block for %llu\n",
4244              (unsigned long long)xb_bh->b_blocknr);
4245
4246         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4247         BUG_ON(!xs->bucket);
4248
4249         /*
4250          * XXX:
4251          * We can use this lock for now, and maybe move to a dedicated mutex
4252          * if performance becomes a problem later.
4253          */
4254         down_write(&oi->ip_alloc_sem);
4255
4256         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4257                                       OCFS2_JOURNAL_ACCESS_WRITE);
4258         if (ret) {
4259                 mlog_errno(ret);
4260                 goto out;
4261         }
4262
4263         ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4264                                      1, 1, &bit_off, &len);
4265         if (ret) {
4266                 mlog_errno(ret);
4267                 goto out;
4268         }
4269
4270         /*
4271          * The bucket may spread in many blocks, and
4272          * we will only touch the 1st block and the last block
4273          * in the whole bucket(one for entry and one for data).
4274          */
4275         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4276
4277         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4278              (unsigned long long)blkno);
4279
4280         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4281         if (ret) {
4282                 mlog_errno(ret);
4283                 goto out;
4284         }
4285
4286         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4287                                                 OCFS2_JOURNAL_ACCESS_CREATE);
4288         if (ret) {
4289                 mlog_errno(ret);
4290                 goto out;
4291         }
4292
4293         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4294         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4295
4296         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4297
4298         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4299         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4300                offsetof(struct ocfs2_xattr_block, xb_attrs));
4301
4302         xr = &xb->xb_attrs.xb_root;
4303         xr->xt_clusters = cpu_to_le32(1);
4304         xr->xt_last_eb_blk = 0;
4305         xr->xt_list.l_tree_depth = 0;
4306         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4307         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4308
4309         xr->xt_list.l_recs[0].e_cpos = 0;
4310         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4311         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4312
4313         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4314
4315         ocfs2_journal_dirty(handle, xb_bh);
4316
4317 out:
4318         up_write(&oi->ip_alloc_sem);
4319
4320         return ret;
4321 }
4322
4323 static int cmp_xe_offset(const void *a, const void *b)
4324 {
4325         const struct ocfs2_xattr_entry *l = a, *r = b;
4326         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4327         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4328
4329         if (l_name_offset < r_name_offset)
4330                 return 1;
4331         if (l_name_offset > r_name_offset)
4332                 return -1;
4333         return 0;
4334 }
4335
4336 /*
4337  * defrag a xattr bucket if we find that the bucket has some
4338  * holes beteen name/value pairs.
4339  * We will move all the name/value pairs to the end of the bucket
4340  * so that we can spare some space for insertion.
4341  */
4342 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4343                                      handle_t *handle,
4344                                      struct ocfs2_xattr_bucket *bucket)
4345 {
4346         int ret, i;
4347         size_t end, offset, len;
4348         struct ocfs2_xattr_header *xh;
4349         char *entries, *buf, *bucket_buf = NULL;
4350         u64 blkno = bucket_blkno(bucket);
4351         u16 xh_free_start;
4352         size_t blocksize = inode->i_sb->s_blocksize;
4353         struct ocfs2_xattr_entry *xe;
4354
4355         /*
4356          * In order to make the operation more efficient and generic,
4357          * we copy all the blocks into a contiguous memory and do the
4358          * defragment there, so if anything is error, we will not touch
4359          * the real block.
4360          */
4361         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4362         if (!bucket_buf) {
4363                 ret = -EIO;
4364                 goto out;
4365         }
4366
4367         buf = bucket_buf;
4368         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4369                 memcpy(buf, bucket_block(bucket, i), blocksize);
4370
4371         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4372                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4373         if (ret < 0) {
4374                 mlog_errno(ret);
4375                 goto out;
4376         }
4377
4378         xh = (struct ocfs2_xattr_header *)bucket_buf;
4379         entries = (char *)xh->xh_entries;
4380         xh_free_start = le16_to_cpu(xh->xh_free_start);
4381
4382         mlog(0, "adjust xattr bucket in %llu, count = %u, "
4383              "xh_free_start = %u, xh_name_value_len = %u.\n",
4384              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4385              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4386
4387         /*
4388          * sort all the entries by their offset.
4389          * the largest will be the first, so that we can
4390          * move them to the end one by one.
4391          */
4392         sort(entries, le16_to_cpu(xh->xh_count),
4393              sizeof(struct ocfs2_xattr_entry),
4394              cmp_xe_offset, swap_xe);
4395
4396         /* Move all name/values to the end of the bucket. */
4397         xe = xh->xh_entries;
4398         end = OCFS2_XATTR_BUCKET_SIZE;
4399         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4400                 offset = le16_to_cpu(xe->xe_name_offset);
4401                 len = namevalue_size_xe(xe);
4402
4403                 /*
4404                  * We must make sure that the name/value pair
4405                  * exist in the same block. So adjust end to
4406                  * the previous block end if needed.
4407                  */
4408                 if (((end - len) / blocksize !=
4409                         (end - 1) / blocksize))
4410                         end = end - end % blocksize;
4411
4412                 if (end > offset + len) {
4413                         memmove(bucket_buf + end - len,
4414                                 bucket_buf + offset, len);
4415                         xe->xe_name_offset = cpu_to_le16(end - len);
4416                 }
4417
4418                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4419                                 "bucket %llu\n", (unsigned long long)blkno);
4420
4421                 end -= len;
4422         }
4423
4424         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4425                         "bucket %llu\n", (unsigned long long)blkno);
4426
4427         if (xh_free_start == end)
4428                 goto out;
4429
4430         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4431         xh->xh_free_start = cpu_to_le16(end);
4432
4433         /* sort the entries by their name_hash. */
4434         sort(entries, le16_to_cpu(xh->xh_count),
4435              sizeof(struct ocfs2_xattr_entry),
4436              cmp_xe, swap_xe);
4437
4438         buf = bucket_buf;
4439         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4440                 memcpy(bucket_block(bucket, i), buf, blocksize);
4441         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4442
4443 out:
4444         kfree(bucket_buf);
4445         return ret;
4446 }
4447
4448 /*
4449  * prev_blkno points to the start of an existing extent.  new_blkno
4450  * points to a newly allocated extent.  Because we know each of our
4451  * clusters contains more than bucket, we can easily split one cluster
4452  * at a bucket boundary.  So we take the last cluster of the existing
4453  * extent and split it down the middle.  We move the last half of the
4454  * buckets in the last cluster of the existing extent over to the new
4455  * extent.
4456  *
4457  * first_bh is the buffer at prev_blkno so we can update the existing
4458  * extent's bucket count.  header_bh is the bucket were we were hoping
4459  * to insert our xattr.  If the bucket move places the target in the new
4460  * extent, we'll update first_bh and header_bh after modifying the old
4461  * extent.
4462  *
4463  * first_hash will be set as the 1st xe's name_hash in the new extent.
4464  */
4465 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4466                                                handle_t *handle,
4467                                                struct ocfs2_xattr_bucket *first,
4468                                                struct ocfs2_xattr_bucket *target,
4469                                                u64 new_blkno,
4470                                                u32 num_clusters,
4471                                                u32 *first_hash)
4472 {
4473         int ret;
4474         struct super_block *sb = inode->i_sb;
4475         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4476         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4477         int to_move = num_buckets / 2;
4478         u64 src_blkno;
4479         u64 last_cluster_blkno = bucket_blkno(first) +
4480                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4481
4482         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4483         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4484
4485         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4486              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4487
4488         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4489                                      last_cluster_blkno, new_blkno,
4490                                      to_move, first_hash);
4491         if (ret) {
4492                 mlog_errno(ret);
4493                 goto out;
4494         }
4495
4496         /* This is the first bucket that got moved */
4497         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4498
4499         /*
4500          * If the target bucket was part of the moved buckets, we need to
4501          * update first and target.
4502          */
4503         if (bucket_blkno(target) >= src_blkno) {
4504                 /* Find the block for the new target bucket */
4505                 src_blkno = new_blkno +
4506                         (bucket_blkno(target) - src_blkno);
4507
4508                 ocfs2_xattr_bucket_relse(first);
4509                 ocfs2_xattr_bucket_relse(target);
4510
4511                 /*
4512                  * These shouldn't fail - the buffers are in the
4513                  * journal from ocfs2_cp_xattr_bucket().
4514                  */
4515                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
4516                 if (ret) {
4517                         mlog_errno(ret);
4518                         goto out;
4519                 }
4520                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
4521                 if (ret)
4522                         mlog_errno(ret);
4523
4524         }
4525
4526 out:
4527         return ret;
4528 }
4529
4530 /*
4531  * Find the suitable pos when we divide a bucket into 2.
4532  * We have to make sure the xattrs with the same hash value exist
4533  * in the same bucket.
4534  *
4535  * If this ocfs2_xattr_header covers more than one hash value, find a
4536  * place where the hash value changes.  Try to find the most even split.
4537  * The most common case is that all entries have different hash values,
4538  * and the first check we make will find a place to split.
4539  */
4540 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4541 {
4542         struct ocfs2_xattr_entry *entries = xh->xh_entries;
4543         int count = le16_to_cpu(xh->xh_count);
4544         int delta, middle = count / 2;
4545
4546         /*
4547          * We start at the middle.  Each step gets farther away in both
4548          * directions.  We therefore hit the change in hash value
4549          * nearest to the middle.  Note that this loop does not execute for
4550          * count < 2.
4551          */
4552         for (delta = 0; delta < middle; delta++) {
4553                 /* Let's check delta earlier than middle */
4554                 if (cmp_xe(&entries[middle - delta - 1],
4555                            &entries[middle - delta]))
4556                         return middle - delta;
4557
4558                 /* For even counts, don't walk off the end */
4559                 if ((middle + delta + 1) == count)
4560                         continue;
4561
4562                 /* Now try delta past middle */
4563                 if (cmp_xe(&entries[middle + delta],
4564                            &entries[middle + delta + 1]))
4565                         return middle + delta + 1;
4566         }
4567
4568         /* Every entry had the same hash */
4569         return count;
4570 }
4571
4572 /*
4573  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4574  * first_hash will record the 1st hash of the new bucket.
4575  *
4576  * Normally half of the xattrs will be moved.  But we have to make
4577  * sure that the xattrs with the same hash value are stored in the
4578  * same bucket. If all the xattrs in this bucket have the same hash
4579  * value, the new bucket will be initialized as an empty one and the
4580  * first_hash will be initialized as (hash_value+1).
4581  */
4582 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4583                                     handle_t *handle,
4584                                     u64 blk,
4585                                     u64 new_blk,
4586                                     u32 *first_hash,
4587                                     int new_bucket_head)
4588 {
4589         int ret, i;
4590         int count, start, len, name_value_len = 0, name_offset = 0;
4591         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4592         struct ocfs2_xattr_header *xh;
4593         struct ocfs2_xattr_entry *xe;
4594         int blocksize = inode->i_sb->s_blocksize;
4595
4596         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4597              (unsigned long long)blk, (unsigned long long)new_blk);
4598
4599         s_bucket = ocfs2_xattr_bucket_new(inode);
4600         t_bucket = ocfs2_xattr_bucket_new(inode);
4601         if (!s_bucket || !t_bucket) {
4602                 ret = -ENOMEM;
4603                 mlog_errno(ret);
4604                 goto out;
4605         }
4606
4607         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4608         if (ret) {
4609                 mlog_errno(ret);
4610                 goto out;
4611         }
4612
4613         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4614                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4615         if (ret) {
4616                 mlog_errno(ret);
4617                 goto out;
4618         }
4619
4620         /*
4621          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4622          * there's no need to read it.
4623          */
4624         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4625         if (ret) {
4626                 mlog_errno(ret);
4627                 goto out;
4628         }
4629
4630         /*
4631          * Hey, if we're overwriting t_bucket, what difference does
4632          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4633          * same part of ocfs2_cp_xattr_bucket().
4634          */
4635         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4636                                                 new_bucket_head ?
4637                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4638                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4639         if (ret) {
4640                 mlog_errno(ret);
4641                 goto out;
4642         }
4643
4644         xh = bucket_xh(s_bucket);
4645         count = le16_to_cpu(xh->xh_count);
4646         start = ocfs2_xattr_find_divide_pos(xh);
4647
4648         if (start == count) {
4649                 xe = &xh->xh_entries[start-1];
4650
4651                 /*
4652                  * initialized a new empty bucket here.
4653                  * The hash value is set as one larger than
4654                  * that of the last entry in the previous bucket.
4655                  */
4656                 for (i = 0; i < t_bucket->bu_blocks; i++)
4657                         memset(bucket_block(t_bucket, i), 0, blocksize);
4658
4659                 xh = bucket_xh(t_bucket);
4660                 xh->xh_free_start = cpu_to_le16(blocksize);
4661                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4662                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4663
4664                 goto set_num_buckets;
4665         }
4666
4667         /* copy the whole bucket to the new first. */
4668         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4669
4670         /* update the new bucket. */
4671         xh = bucket_xh(t_bucket);
4672
4673         /*
4674          * Calculate the total name/value len and xh_free_start for
4675          * the old bucket first.
4676          */
4677         name_offset = OCFS2_XATTR_BUCKET_SIZE;
4678         name_value_len = 0;
4679         for (i = 0; i < start; i++) {
4680                 xe = &xh->xh_entries[i];
4681                 name_value_len += namevalue_size_xe(xe);
4682                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4683                         name_offset = le16_to_cpu(xe->xe_name_offset);
4684         }
4685
4686         /*
4687          * Now begin the modification to the new bucket.
4688          *
4689          * In the new bucket, We just move the xattr entry to the beginning
4690          * and don't touch the name/value. So there will be some holes in the
4691          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4692          * called.
4693          */
4694         xe = &xh->xh_entries[start];
4695         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4696         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4697              (int)((char *)xe - (char *)xh),
4698              (int)((char *)xh->xh_entries - (char *)xh));
4699         memmove((char *)xh->xh_entries, (char *)xe, len);
4700         xe = &xh->xh_entries[count - start];
4701         len = sizeof(struct ocfs2_xattr_entry) * start;
4702         memset((char *)xe, 0, len);
4703
4704         le16_add_cpu(&xh->xh_count, -start);
4705         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4706
4707         /* Calculate xh_free_start for the new bucket. */
4708         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4709         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4710                 xe = &xh->xh_entries[i];
4711                 if (le16_to_cpu(xe->xe_name_offset) <
4712                     le16_to_cpu(xh->xh_free_start))
4713                         xh->xh_free_start = xe->xe_name_offset;
4714         }
4715
4716 set_num_buckets:
4717         /* set xh->xh_num_buckets for the new xh. */
4718         if (new_bucket_head)
4719                 xh->xh_num_buckets = cpu_to_le16(1);
4720         else
4721                 xh->xh_num_buckets = 0;
4722
4723         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4724
4725         /* store the first_hash of the new bucket. */
4726         if (first_hash)
4727                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4728
4729         /*
4730          * Now only update the 1st block of the old bucket.  If we
4731          * just added a new empty bucket, there is no need to modify
4732          * it.
4733          */
4734         if (start == count)
4735                 goto out;
4736
4737         xh = bucket_xh(s_bucket);
4738         memset(&xh->xh_entries[start], 0,
4739                sizeof(struct ocfs2_xattr_entry) * (count - start));
4740         xh->xh_count = cpu_to_le16(start);
4741         xh->xh_free_start = cpu_to_le16(name_offset);
4742         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4743
4744         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4745
4746 out:
4747         ocfs2_xattr_bucket_free(s_bucket);
4748         ocfs2_xattr_bucket_free(t_bucket);
4749
4750         return ret;
4751 }
4752
4753 /*
4754  * Copy xattr from one bucket to another bucket.
4755  *
4756  * The caller must make sure that the journal transaction
4757  * has enough space for journaling.
4758  */
4759 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4760                                  handle_t *handle,
4761                                  u64 s_blkno,
4762                                  u64 t_blkno,
4763                                  int t_is_new)
4764 {
4765         int ret;
4766         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4767
4768         BUG_ON(s_blkno == t_blkno);
4769
4770         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4771              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4772              t_is_new);
4773
4774         s_bucket = ocfs2_xattr_bucket_new(inode);
4775         t_bucket = ocfs2_xattr_bucket_new(inode);
4776         if (!s_bucket || !t_bucket) {
4777                 ret = -ENOMEM;
4778                 mlog_errno(ret);
4779                 goto out;
4780         }
4781
4782         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4783         if (ret)
4784                 goto out;
4785
4786         /*
4787          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4788          * there's no need to read it.
4789          */
4790         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4791         if (ret)
4792                 goto out;
4793
4794         /*
4795          * Hey, if we're overwriting t_bucket, what difference does
4796          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4797          * cluster to fill, we came here from
4798          * ocfs2_mv_xattr_buckets(), and it is really new -
4799          * ACCESS_CREATE is required.  But we also might have moved data
4800          * out of t_bucket before extending back into it.
4801          * ocfs2_add_new_xattr_bucket() can do this - its call to
4802          * ocfs2_add_new_xattr_cluster() may have created a new extent
4803          * and copied out the end of the old extent.  Then it re-extends
4804          * the old extent back to create space for new xattrs.  That's
4805          * how we get here, and the bucket isn't really new.
4806          */
4807         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4808                                                 t_is_new ?
4809                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4810                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4811         if (ret)
4812                 goto out;
4813
4814         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4815         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4816
4817 out:
4818         ocfs2_xattr_bucket_free(t_bucket);
4819         ocfs2_xattr_bucket_free(s_bucket);
4820
4821         return ret;
4822 }
4823
4824 /*
4825  * src_blk points to the start of an existing extent.  last_blk points to
4826  * last cluster in that extent.  to_blk points to a newly allocated
4827  * extent.  We copy the buckets from the cluster at last_blk to the new
4828  * extent.  If start_bucket is non-zero, we skip that many buckets before
4829  * we start copying.  The new extent's xh_num_buckets gets set to the
4830  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4831  * by the same amount.
4832  */
4833 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4834                                   u64 src_blk, u64 last_blk, u64 to_blk,
4835                                   unsigned int start_bucket,
4836                                   u32 *first_hash)
4837 {
4838         int i, ret, credits;
4839         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4840         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4841         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4842         struct ocfs2_xattr_bucket *old_first, *new_first;
4843
4844         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4845              (unsigned long long)last_blk, (unsigned long long)to_blk);
4846
4847         BUG_ON(start_bucket >= num_buckets);
4848         if (start_bucket) {
4849                 num_buckets -= start_bucket;
4850                 last_blk += (start_bucket * blks_per_bucket);
4851         }
4852
4853         /* The first bucket of the original extent */
4854         old_first = ocfs2_xattr_bucket_new(inode);
4855         /* The first bucket of the new extent */
4856         new_first = ocfs2_xattr_bucket_new(inode);
4857         if (!old_first || !new_first) {
4858                 ret = -ENOMEM;
4859                 mlog_errno(ret);
4860                 goto out;
4861         }
4862
4863         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4864         if (ret) {
4865                 mlog_errno(ret);
4866                 goto out;
4867         }
4868
4869         /*
4870          * We need to update the first bucket of the old extent and all
4871          * the buckets going to the new extent.
4872          */
4873         credits = ((num_buckets + 1) * blks_per_bucket);
4874         ret = ocfs2_extend_trans(handle, credits);
4875         if (ret) {
4876                 mlog_errno(ret);
4877                 goto out;
4878         }
4879
4880         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4881                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4882         if (ret) {
4883                 mlog_errno(ret);
4884                 goto out;
4885         }
4886
4887         for (i = 0; i < num_buckets; i++) {
4888                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4889                                             last_blk + (i * blks_per_bucket),
4890                                             to_blk + (i * blks_per_bucket),
4891                                             1);
4892                 if (ret) {
4893                         mlog_errno(ret);
4894                         goto out;
4895                 }
4896         }
4897
4898         /*
4899          * Get the new bucket ready before we dirty anything
4900          * (This actually shouldn't fail, because we already dirtied
4901          * it once in ocfs2_cp_xattr_bucket()).
4902          */
4903         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4904         if (ret) {
4905                 mlog_errno(ret);
4906                 goto out;
4907         }
4908         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4909                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4910         if (ret) {
4911                 mlog_errno(ret);
4912                 goto out;
4913         }
4914
4915         /* Now update the headers */
4916         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4917         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4918
4919         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4920         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4921
4922         if (first_hash)
4923                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4924
4925 out:
4926         ocfs2_xattr_bucket_free(new_first);
4927         ocfs2_xattr_bucket_free(old_first);
4928         return ret;
4929 }
4930
4931 /*
4932  * Move some xattrs in this cluster to the new cluster.
4933  * This function should only be called when bucket size == cluster size.
4934  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4935  */
4936 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4937                                       handle_t *handle,
4938                                       u64 prev_blk,
4939                                       u64 new_blk,
4940                                       u32 *first_hash)
4941 {
4942         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4943         int ret, credits = 2 * blk_per_bucket;
4944
4945         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4946
4947         ret = ocfs2_extend_trans(handle, credits);
4948         if (ret) {
4949                 mlog_errno(ret);
4950                 return ret;
4951         }
4952
4953         /* Move half of the xattr in start_blk to the next bucket. */
4954         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4955                                           new_blk, first_hash, 1);
4956 }
4957
4958 /*
4959  * Move some xattrs from the old cluster to the new one since they are not
4960  * contiguous in ocfs2 xattr tree.
4961  *
4962  * new_blk starts a new separate cluster, and we will move some xattrs from
4963  * prev_blk to it. v_start will be set as the first name hash value in this
4964  * new cluster so that it can be used as e_cpos during tree insertion and
4965  * don't collide with our original b-tree operations. first_bh and header_bh
4966  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4967  * to extend the insert bucket.
4968  *
4969  * The problem is how much xattr should we move to the new one and when should
4970  * we update first_bh and header_bh?
4971  * 1. If cluster size > bucket size, that means the previous cluster has more
4972  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4973  *    update the first_bh and header_bh if the insert bucket has been moved
4974  *    to the new cluster.
4975  * 2. If cluster_size == bucket_size:
4976  *    a) If the previous extent rec has more than one cluster and the insert
4977  *       place isn't in the last cluster, copy the entire last cluster to the
4978  *       new one. This time, we don't need to upate the first_bh and header_bh
4979  *       since they will not be moved into the new cluster.
4980  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4981  *       the new one. And we set the extend flag to zero if the insert place is
4982  *       moved into the new allocated cluster since no extend is needed.
4983  */
4984 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4985                                             handle_t *handle,
4986                                             struct ocfs2_xattr_bucket *first,
4987                                             struct ocfs2_xattr_bucket *target,
4988                                             u64 new_blk,
4989                                             u32 prev_clusters,
4990                                             u32 *v_start,
4991                                             int *extend)
4992 {
4993         int ret;
4994
4995         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4996              (unsigned long long)bucket_blkno(first), prev_clusters,
4997              (unsigned long long)new_blk);
4998
4999         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5000                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5001                                                           handle,
5002                                                           first, target,
5003                                                           new_blk,
5004                                                           prev_clusters,
5005                                                           v_start);
5006                 if (ret)
5007                         mlog_errno(ret);
5008         } else {
5009                 /* The start of the last cluster in the first extent */
5010                 u64 last_blk = bucket_blkno(first) +
5011                         ((prev_clusters - 1) *
5012                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
5013
5014                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5015                         ret = ocfs2_mv_xattr_buckets(inode, handle,
5016                                                      bucket_blkno(first),
5017                                                      last_blk, new_blk, 0,
5018                                                      v_start);
5019                         if (ret)
5020                                 mlog_errno(ret);
5021                 } else {
5022                         ret = ocfs2_divide_xattr_cluster(inode, handle,
5023                                                          last_blk, new_blk,
5024                                                          v_start);
5025                         if (ret)
5026                                 mlog_errno(ret);
5027
5028                         if ((bucket_blkno(target) == last_blk) && extend)
5029                                 *extend = 0;
5030                 }
5031         }
5032
5033         return ret;
5034 }
5035
5036 /*
5037  * Add a new cluster for xattr storage.
5038  *
5039  * If the new cluster is contiguous with the previous one, it will be
5040  * appended to the same extent record, and num_clusters will be updated.
5041  * If not, we will insert a new extent for it and move some xattrs in
5042  * the last cluster into the new allocated one.
5043  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5044  * lose the benefits of hashing because we'll have to search large leaves.
5045  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5046  * if it's bigger).
5047  *
5048  * first_bh is the first block of the previous extent rec and header_bh
5049  * indicates the bucket we will insert the new xattrs. They will be updated
5050  * when the header_bh is moved into the new cluster.
5051  */
5052 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5053                                        struct buffer_head *root_bh,
5054                                        struct ocfs2_xattr_bucket *first,
5055                                        struct ocfs2_xattr_bucket *target,
5056                                        u32 *num_clusters,
5057                                        u32 prev_cpos,
5058                                        int *extend,
5059                                        struct ocfs2_xattr_set_ctxt *ctxt)
5060 {
5061         int ret;
5062         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5063         u32 prev_clusters = *num_clusters;
5064         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5065         u64 block;
5066         handle_t *handle = ctxt->handle;
5067         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5068         struct ocfs2_extent_tree et;
5069
5070         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
5071              "previous xattr blkno = %llu\n",
5072              (unsigned long long)OCFS2_I(inode)->ip_blkno,
5073              prev_cpos, (unsigned long long)bucket_blkno(first));
5074
5075         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5076
5077         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5078                                       OCFS2_JOURNAL_ACCESS_WRITE);
5079         if (ret < 0) {
5080                 mlog_errno(ret);
5081                 goto leave;
5082         }
5083
5084         ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5085                                      clusters_to_add, &bit_off, &num_bits);
5086         if (ret < 0) {
5087                 if (ret != -ENOSPC)
5088                         mlog_errno(ret);
5089                 goto leave;
5090         }
5091
5092         BUG_ON(num_bits > clusters_to_add);
5093
5094         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5095         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5096              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5097
5098         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5099             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5100              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5101                 /*
5102                  * If this cluster is contiguous with the old one and
5103                  * adding this new cluster, we don't surpass the limit of
5104                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5105                  * initialized and used like other buckets in the previous
5106                  * cluster.
5107                  * So add it as a contiguous one. The caller will handle
5108                  * its init process.
5109                  */
5110                 v_start = prev_cpos + prev_clusters;
5111                 *num_clusters = prev_clusters + num_bits;
5112                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5113                      num_bits);
5114         } else {
5115                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5116                                                        handle,
5117                                                        first,
5118                                                        target,
5119                                                        block,
5120                                                        prev_clusters,
5121                                                        &v_start,
5122                                                        extend);
5123                 if (ret) {
5124                         mlog_errno(ret);
5125                         goto leave;
5126                 }
5127         }
5128
5129         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5130              num_bits, (unsigned long long)block, v_start);
5131         ret = ocfs2_insert_extent(handle, &et, v_start, block,
5132                                   num_bits, 0, ctxt->meta_ac);
5133         if (ret < 0) {
5134                 mlog_errno(ret);
5135                 goto leave;
5136         }
5137
5138         ocfs2_journal_dirty(handle, root_bh);
5139
5140 leave:
5141         return ret;
5142 }
5143
5144 /*
5145  * We are given an extent.  'first' is the bucket at the very front of
5146  * the extent.  The extent has space for an additional bucket past
5147  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5148  * of the target bucket.  We wish to shift every bucket past the target
5149  * down one, filling in that additional space.  When we get back to the
5150  * target, we split the target between itself and the now-empty bucket
5151  * at target+1 (aka, target_blkno + blks_per_bucket).
5152  */
5153 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5154                                      handle_t *handle,
5155                                      struct ocfs2_xattr_bucket *first,
5156                                      u64 target_blk,
5157                                      u32 num_clusters)
5158 {
5159         int ret, credits;
5160         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5161         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5162         u64 end_blk;
5163         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5164
5165         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5166              "from %llu, len = %u\n", (unsigned long long)target_blk,
5167              (unsigned long long)bucket_blkno(first), num_clusters);
5168
5169         /* The extent must have room for an additional bucket */
5170         BUG_ON(new_bucket >=
5171                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5172
5173         /* end_blk points to the last existing bucket */
5174         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5175
5176         /*
5177          * end_blk is the start of the last existing bucket.
5178          * Thus, (end_blk - target_blk) covers the target bucket and
5179          * every bucket after it up to, but not including, the last
5180          * existing bucket.  Then we add the last existing bucket, the
5181          * new bucket, and the first bucket (3 * blk_per_bucket).
5182          */
5183         credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5184         ret = ocfs2_extend_trans(handle, credits);
5185         if (ret) {
5186                 mlog_errno(ret);
5187                 goto out;
5188         }
5189
5190         ret = ocfs2_xattr_bucket_journal_access(handle, first,
5191                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5192         if (ret) {
5193                 mlog_errno(ret);
5194                 goto out;
5195         }
5196
5197         while (end_blk != target_blk) {
5198                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5199                                             end_blk + blk_per_bucket, 0);
5200                 if (ret)
5201                         goto out;
5202                 end_blk -= blk_per_bucket;
5203         }
5204
5205         /* Move half of the xattr in target_blkno to the next bucket. */
5206         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5207                                         target_blk + blk_per_bucket, NULL, 0);
5208
5209         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5210         ocfs2_xattr_bucket_journal_dirty(handle, first);
5211
5212 out:
5213         return ret;
5214 }
5215
5216 /*
5217  * Add new xattr bucket in an extent record and adjust the buckets
5218  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5219  * bucket we want to insert into.
5220  *
5221  * In the easy case, we will move all the buckets after target down by
5222  * one. Half of target's xattrs will be moved to the next bucket.
5223  *
5224  * If current cluster is full, we'll allocate a new one.  This may not
5225  * be contiguous.  The underlying calls will make sure that there is
5226  * space for the insert, shifting buckets around if necessary.
5227  * 'target' may be moved by those calls.
5228  */
5229 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5230                                       struct buffer_head *xb_bh,
5231                                       struct ocfs2_xattr_bucket *target,
5232                                       struct ocfs2_xattr_set_ctxt *ctxt)
5233 {
5234         struct ocfs2_xattr_block *xb =
5235                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5236         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5237         struct ocfs2_extent_list *el = &xb_root->xt_list;
5238         u32 name_hash =
5239                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5240         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5241         int ret, num_buckets, extend = 1;
5242         u64 p_blkno;
5243         u32 e_cpos, num_clusters;
5244         /* The bucket at the front of the extent */
5245         struct ocfs2_xattr_bucket *first;
5246
5247         mlog(0, "Add new xattr bucket starting from %llu\n",
5248              (unsigned long long)bucket_blkno(target));
5249
5250         /* The first bucket of the original extent */
5251         first = ocfs2_xattr_bucket_new(inode);
5252         if (!first) {
5253                 ret = -ENOMEM;
5254                 mlog_errno(ret);
5255                 goto out;
5256         }
5257
5258         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5259                                   &num_clusters, el);
5260         if (ret) {
5261                 mlog_errno(ret);
5262                 goto out;
5263         }
5264
5265         ret = ocfs2_read_xattr_bucket(first, p_blkno);
5266         if (ret) {
5267                 mlog_errno(ret);
5268                 goto out;
5269         }
5270
5271         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5272         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5273                 /*
5274                  * This can move first+target if the target bucket moves
5275                  * to the new extent.
5276                  */
5277                 ret = ocfs2_add_new_xattr_cluster(inode,
5278                                                   xb_bh,
5279                                                   first,
5280                                                   target,
5281                                                   &num_clusters,
5282                                                   e_cpos,
5283                                                   &extend,
5284                                                   ctxt);
5285                 if (ret) {
5286                         mlog_errno(ret);
5287                         goto out;
5288                 }
5289         }
5290
5291         if (extend) {
5292                 ret = ocfs2_extend_xattr_bucket(inode,
5293                                                 ctxt->handle,
5294                                                 first,
5295                                                 bucket_blkno(target),
5296                                                 num_clusters);
5297                 if (ret)
5298                         mlog_errno(ret);
5299         }
5300
5301 out:
5302         ocfs2_xattr_bucket_free(first);
5303
5304         return ret;
5305 }
5306
5307 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5308                                         struct ocfs2_xattr_bucket *bucket,
5309                                         int offs)
5310 {
5311         int block_off = offs >> inode->i_sb->s_blocksize_bits;
5312
5313         offs = offs % inode->i_sb->s_blocksize;
5314         return bucket_block(bucket, block_off) + offs;
5315 }
5316
5317 /*
5318  * Truncate the specified xe_off entry in xattr bucket.
5319  * bucket is indicated by header_bh and len is the new length.
5320  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5321  *
5322  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5323  */
5324 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5325                                              struct ocfs2_xattr_bucket *bucket,
5326                                              int xe_off,
5327                                              int len,
5328                                              struct ocfs2_xattr_set_ctxt *ctxt)
5329 {
5330         int ret, offset;
5331         u64 value_blk;
5332         struct ocfs2_xattr_entry *xe;
5333         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5334         size_t blocksize = inode->i_sb->s_blocksize;
5335         struct ocfs2_xattr_value_buf vb = {
5336                 .vb_access = ocfs2_journal_access,
5337         };
5338
5339         xe = &xh->xh_entries[xe_off];
5340
5341         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5342
5343         offset = le16_to_cpu(xe->xe_name_offset) +
5344                  OCFS2_XATTR_SIZE(xe->xe_name_len);
5345
5346         value_blk = offset / blocksize;
5347
5348         /* We don't allow ocfs2_xattr_value to be stored in different block. */
5349         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5350
5351         vb.vb_bh = bucket->bu_bhs[value_blk];
5352         BUG_ON(!vb.vb_bh);
5353
5354         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5355                 (vb.vb_bh->b_data + offset % blocksize);
5356
5357         /*
5358          * From here on out we have to dirty the bucket.  The generic
5359          * value calls only modify one of the bucket's bhs, but we need
5360          * to send the bucket at once.  So if they error, they *could* have
5361          * modified something.  We have to assume they did, and dirty
5362          * the whole bucket.  This leaves us in a consistent state.
5363          */
5364         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5365              xe_off, (unsigned long long)bucket_blkno(bucket), len);
5366         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5367         if (ret) {
5368                 mlog_errno(ret);
5369                 goto out;
5370         }
5371
5372         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5373                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5374         if (ret) {
5375                 mlog_errno(ret);
5376                 goto out;
5377         }
5378
5379         xe->xe_value_size = cpu_to_le64(len);
5380
5381         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5382
5383 out:
5384         return ret;
5385 }
5386
5387 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5388                                   struct buffer_head *root_bh,
5389                                   u64 blkno,
5390                                   u32 cpos,
5391                                   u32 len,
5392                                   void *para)
5393 {
5394         int ret;
5395         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5396         struct inode *tl_inode = osb->osb_tl_inode;
5397         handle_t *handle;
5398         struct ocfs2_xattr_block *xb =
5399                         (struct ocfs2_xattr_block *)root_bh->b_data;
5400         struct ocfs2_alloc_context *meta_ac = NULL;
5401         struct ocfs2_cached_dealloc_ctxt dealloc;
5402         struct ocfs2_extent_tree et;
5403
5404         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5405                                           ocfs2_delete_xattr_in_bucket, para);
5406         if (ret) {
5407                 mlog_errno(ret);
5408                 return ret;
5409         }
5410
5411         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5412
5413         ocfs2_init_dealloc_ctxt(&dealloc);
5414
5415         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5416              cpos, len, (unsigned long long)blkno);
5417
5418         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5419                                                len);
5420
5421         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5422         if (ret) {
5423                 mlog_errno(ret);
5424                 return ret;
5425         }
5426
5427         mutex_lock(&tl_inode->i_mutex);
5428
5429         if (ocfs2_truncate_log_needs_flush(osb)) {
5430                 ret = __ocfs2_flush_truncate_log(osb);
5431                 if (ret < 0) {
5432                         mlog_errno(ret);
5433                         goto out;
5434                 }
5435         }
5436
5437         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5438         if (IS_ERR(handle)) {
5439                 ret = -ENOMEM;
5440                 mlog_errno(ret);
5441                 goto out;
5442         }
5443
5444         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5445                                       OCFS2_JOURNAL_ACCESS_WRITE);
5446         if (ret) {
5447                 mlog_errno(ret);
5448                 goto out_commit;
5449         }
5450
5451         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5452                                   &dealloc);
5453         if (ret) {
5454                 mlog_errno(ret);
5455                 goto out_commit;
5456         }
5457
5458         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5459         ocfs2_journal_dirty(handle, root_bh);
5460
5461         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5462         if (ret)
5463                 mlog_errno(ret);
5464
5465 out_commit:
5466         ocfs2_commit_trans(osb, handle);
5467 out:
5468         ocfs2_schedule_truncate_log_flush(osb, 1);
5469
5470         mutex_unlock(&tl_inode->i_mutex);
5471
5472         if (meta_ac)
5473                 ocfs2_free_alloc_context(meta_ac);
5474
5475         ocfs2_run_deallocs(osb, &dealloc);
5476
5477         return ret;
5478 }
5479
5480 /*
5481  * check whether the xattr bucket is filled up with the same hash value.
5482  * If we want to insert the xattr with the same hash, return -ENOSPC.
5483  * If we want to insert a xattr with different hash value, go ahead
5484  * and ocfs2_divide_xattr_bucket will handle this.
5485  */
5486 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5487                                               struct ocfs2_xattr_bucket *bucket,
5488                                               const char *name)
5489 {
5490         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5491         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5492
5493         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5494                 return 0;
5495
5496         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5497             xh->xh_entries[0].xe_name_hash) {
5498                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5499                      "hash = %u\n",
5500                      (unsigned long long)bucket_blkno(bucket),
5501                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5502                 return -ENOSPC;
5503         }
5504
5505         return 0;
5506 }
5507
5508 /*
5509  * Try to set the entry in the current bucket.  If we fail, the caller
5510  * will handle getting us another bucket.
5511  */
5512 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5513                                         struct ocfs2_xattr_info *xi,
5514                                         struct ocfs2_xattr_search *xs,
5515                                         struct ocfs2_xattr_set_ctxt *ctxt)
5516 {
5517         int ret;
5518         struct ocfs2_xa_loc loc;
5519
5520         mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name);
5521
5522         ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5523                                        xs->not_found ? NULL : xs->here);
5524         ret = ocfs2_xa_set(&loc, xi, ctxt);
5525         if (!ret) {
5526                 xs->here = loc.xl_entry;
5527                 goto out;
5528         }
5529         if (ret != -ENOSPC) {
5530                 mlog_errno(ret);
5531                 goto out;
5532         }
5533
5534         /* Ok, we need space.  Let's try defragmenting the bucket. */
5535         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5536                                         xs->bucket);
5537         if (ret) {
5538                 mlog_errno(ret);
5539                 goto out;
5540         }
5541
5542         ret = ocfs2_xa_set(&loc, xi, ctxt);
5543         if (!ret) {
5544                 xs->here = loc.xl_entry;
5545                 goto out;
5546         }
5547         if (ret != -ENOSPC)
5548                 mlog_errno(ret);
5549
5550
5551 out:
5552         mlog_exit(ret);
5553         return ret;
5554 }
5555
5556 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5557                                              struct ocfs2_xattr_info *xi,
5558                                              struct ocfs2_xattr_search *xs,
5559                                              struct ocfs2_xattr_set_ctxt *ctxt)
5560 {
5561         int ret;
5562
5563         mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5564
5565         ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5566         if (!ret)
5567                 goto out;
5568         if (ret != -ENOSPC) {
5569                 mlog_errno(ret);
5570                 goto out;
5571         }
5572
5573         /* Ack, need more space.  Let's try to get another bucket! */
5574
5575         /*
5576          * We do not allow for overlapping ranges between buckets. And
5577          * the maximum number of collisions we will allow for then is
5578          * one bucket's worth, so check it here whether we need to
5579          * add a new bucket for the insert.
5580          */
5581         ret = ocfs2_check_xattr_bucket_collision(inode,
5582                                                  xs->bucket,
5583                                                  xi->xi_name);
5584         if (ret) {
5585                 mlog_errno(ret);
5586                 goto out;
5587         }
5588
5589         ret = ocfs2_add_new_xattr_bucket(inode,
5590                                          xs->xattr_bh,
5591                                          xs->bucket,
5592                                          ctxt);
5593         if (ret) {
5594                 mlog_errno(ret);
5595                 goto out;
5596         }
5597
5598         /*
5599          * ocfs2_add_new_xattr_bucket() will have updated
5600          * xs->bucket if it moved, but it will not have updated
5601          * any of the other search fields.  Thus, we drop it and
5602          * re-search.  Everything should be cached, so it'll be
5603          * quick.
5604          */
5605         ocfs2_xattr_bucket_relse(xs->bucket);
5606         ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5607                                            xi->xi_name_index,
5608                                            xi->xi_name, xs);
5609         if (ret && ret != -ENODATA)
5610                 goto out;
5611         xs->not_found = ret;
5612
5613         /* Ok, we have a new bucket, let's try again */
5614         ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5615         if (ret && (ret != -ENOSPC))
5616                 mlog_errno(ret);
5617
5618 out:
5619         mlog_exit(ret);
5620         return ret;
5621 }
5622
5623 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5624                                         struct ocfs2_xattr_bucket *bucket,
5625                                         void *para)
5626 {
5627         int ret = 0, ref_credits;
5628         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5629         u16 i;
5630         struct ocfs2_xattr_entry *xe;
5631         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5632         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5633         int credits = ocfs2_remove_extent_credits(osb->sb) +
5634                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5635         struct ocfs2_xattr_value_root *xv;
5636         struct ocfs2_rm_xattr_bucket_para *args =
5637                         (struct ocfs2_rm_xattr_bucket_para *)para;
5638
5639         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5640
5641         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5642                 xe = &xh->xh_entries[i];
5643                 if (ocfs2_xattr_is_local(xe))
5644                         continue;
5645
5646                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5647                                                       i, &xv, NULL);
5648
5649                 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5650                                                          args->ref_ci,
5651                                                          args->ref_root_bh,
5652                                                          &ctxt.meta_ac,
5653                                                          &ref_credits);
5654
5655                 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5656                 if (IS_ERR(ctxt.handle)) {
5657                         ret = PTR_ERR(ctxt.handle);
5658                         mlog_errno(ret);
5659                         break;
5660                 }
5661
5662                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5663                                                         i, 0, &ctxt);
5664
5665                 ocfs2_commit_trans(osb, ctxt.handle);
5666                 if (ctxt.meta_ac) {
5667                         ocfs2_free_alloc_context(ctxt.meta_ac);
5668                         ctxt.meta_ac = NULL;
5669                 }
5670                 if (ret) {
5671                         mlog_errno(ret);
5672                         break;
5673                 }
5674         }
5675
5676         if (ctxt.meta_ac)
5677                 ocfs2_free_alloc_context(ctxt.meta_ac);
5678         ocfs2_schedule_truncate_log_flush(osb, 1);
5679         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5680         return ret;
5681 }
5682
5683 /*
5684  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5685  * or change the extent record flag), we need to recalculate
5686  * the metaecc for the whole bucket. So it is done here.
5687  *
5688  * Note:
5689  * We have to give the extra credits for the caller.
5690  */
5691 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5692                                             handle_t *handle,
5693                                             void *para)
5694 {
5695         int ret;
5696         struct ocfs2_xattr_bucket *bucket =
5697                         (struct ocfs2_xattr_bucket *)para;
5698
5699         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5700                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5701         if (ret) {
5702                 mlog_errno(ret);
5703                 return ret;
5704         }
5705
5706         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5707
5708         return 0;
5709 }
5710
5711 /*
5712  * Special action we need if the xattr value is refcounted.
5713  *
5714  * 1. If the xattr is refcounted, lock the tree.
5715  * 2. CoW the xattr if we are setting the new value and the value
5716  *    will be stored outside.
5717  * 3. In other case, decrease_refcount will work for us, so just
5718  *    lock the refcount tree, calculate the meta and credits is OK.
5719  *
5720  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5721  * currently CoW is a completed transaction, while this function
5722  * will also lock the allocators and let us deadlock. So we will
5723  * CoW the whole xattr value.
5724  */
5725 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5726                                         struct ocfs2_dinode *di,
5727                                         struct ocfs2_xattr_info *xi,
5728                                         struct ocfs2_xattr_search *xis,
5729                                         struct ocfs2_xattr_search *xbs,
5730                                         struct ocfs2_refcount_tree **ref_tree,
5731                                         int *meta_add,
5732                                         int *credits)
5733 {
5734         int ret = 0;
5735         struct ocfs2_xattr_block *xb;
5736         struct ocfs2_xattr_entry *xe;
5737         char *base;
5738         u32 p_cluster, num_clusters;
5739         unsigned int ext_flags;
5740         int name_offset, name_len;
5741         struct ocfs2_xattr_value_buf vb;
5742         struct ocfs2_xattr_bucket *bucket = NULL;
5743         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5744         struct ocfs2_post_refcount refcount;
5745         struct ocfs2_post_refcount *p = NULL;
5746         struct buffer_head *ref_root_bh = NULL;
5747
5748         if (!xis->not_found) {
5749                 xe = xis->here;
5750                 name_offset = le16_to_cpu(xe->xe_name_offset);
5751                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5752                 base = xis->base;
5753                 vb.vb_bh = xis->inode_bh;
5754                 vb.vb_access = ocfs2_journal_access_di;
5755         } else {
5756                 int i, block_off = 0;
5757                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5758                 xe = xbs->here;
5759                 name_offset = le16_to_cpu(xe->xe_name_offset);
5760                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5761                 i = xbs->here - xbs->header->xh_entries;
5762
5763                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5764                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5765                                                         bucket_xh(xbs->bucket),
5766                                                         i, &block_off,
5767                                                         &name_offset);
5768                         if (ret) {
5769                                 mlog_errno(ret);
5770                                 goto out;
5771                         }
5772                         base = bucket_block(xbs->bucket, block_off);
5773                         vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5774                         vb.vb_access = ocfs2_journal_access;
5775
5776                         if (ocfs2_meta_ecc(osb)) {
5777                                 /*create parameters for ocfs2_post_refcount. */
5778                                 bucket = xbs->bucket;
5779                                 refcount.credits = bucket->bu_blocks;
5780                                 refcount.para = bucket;
5781                                 refcount.func =
5782                                         ocfs2_xattr_bucket_post_refcount;
5783                                 p = &refcount;
5784                         }
5785                 } else {
5786                         base = xbs->base;
5787                         vb.vb_bh = xbs->xattr_bh;
5788                         vb.vb_access = ocfs2_journal_access_xb;
5789                 }
5790         }
5791
5792         if (ocfs2_xattr_is_local(xe))
5793                 goto out;
5794
5795         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5796                                 (base + name_offset + name_len);
5797
5798         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5799                                        &num_clusters, &vb.vb_xv->xr_list,
5800                                        &ext_flags);
5801         if (ret) {
5802                 mlog_errno(ret);
5803                 goto out;
5804         }
5805
5806         /*
5807          * We just need to check the 1st extent record, since we always
5808          * CoW the whole xattr. So there shouldn't be a xattr with
5809          * some REFCOUNT extent recs after the 1st one.
5810          */
5811         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5812                 goto out;
5813
5814         ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5815                                        1, ref_tree, &ref_root_bh);
5816         if (ret) {
5817                 mlog_errno(ret);
5818                 goto out;
5819         }
5820
5821         /*
5822          * If we are deleting the xattr or the new size will be stored inside,
5823          * cool, leave it there, the xattr truncate process will remove them
5824          * for us(it still needs the refcount tree lock and the meta, credits).
5825          * And the worse case is that every cluster truncate will split the
5826          * refcount tree, and make the original extent become 3. So we will need
5827          * 2 * cluster more extent recs at most.
5828          */
5829         if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5830
5831                 ret = ocfs2_refcounted_xattr_delete_need(inode,
5832                                                          &(*ref_tree)->rf_ci,
5833                                                          ref_root_bh, vb.vb_xv,
5834                                                          meta_add, credits);
5835                 if (ret)
5836                         mlog_errno(ret);
5837                 goto out;
5838         }
5839
5840         ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5841                                        *ref_tree, ref_root_bh, 0,
5842                                        le32_to_cpu(vb.vb_xv->xr_clusters), p);
5843         if (ret)
5844                 mlog_errno(ret);
5845
5846 out:
5847         brelse(ref_root_bh);
5848         return ret;
5849 }
5850
5851 /*
5852  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5853  * The physical clusters will be added to refcount tree.
5854  */
5855 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5856                                 struct ocfs2_xattr_value_root *xv,
5857                                 struct ocfs2_extent_tree *value_et,
5858                                 struct ocfs2_caching_info *ref_ci,
5859                                 struct buffer_head *ref_root_bh,
5860                                 struct ocfs2_cached_dealloc_ctxt *dealloc,
5861                                 struct ocfs2_post_refcount *refcount)
5862 {
5863         int ret = 0;
5864         u32 clusters = le32_to_cpu(xv->xr_clusters);
5865         u32 cpos, p_cluster, num_clusters;
5866         struct ocfs2_extent_list *el = &xv->xr_list;
5867         unsigned int ext_flags;
5868
5869         cpos = 0;
5870         while (cpos < clusters) {
5871                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5872                                                &num_clusters, el, &ext_flags);
5873
5874                 cpos += num_clusters;
5875                 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5876                         continue;
5877
5878                 BUG_ON(!p_cluster);
5879
5880                 ret = ocfs2_add_refcount_flag(inode, value_et,
5881                                               ref_ci, ref_root_bh,
5882                                               cpos - num_clusters,
5883                                               p_cluster, num_clusters,
5884                                               dealloc, refcount);
5885                 if (ret) {
5886                         mlog_errno(ret);
5887                         break;
5888                 }
5889         }
5890
5891         return ret;
5892 }
5893
5894 /*
5895  * Given a normal ocfs2_xattr_header, refcount all the entries which
5896  * have value stored outside.
5897  * Used for xattrs stored in inode and ocfs2_xattr_block.
5898  */
5899 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5900                                 struct ocfs2_xattr_value_buf *vb,
5901                                 struct ocfs2_xattr_header *header,
5902                                 struct ocfs2_caching_info *ref_ci,
5903                                 struct buffer_head *ref_root_bh,
5904                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5905 {
5906
5907         struct ocfs2_xattr_entry *xe;
5908         struct ocfs2_xattr_value_root *xv;
5909         struct ocfs2_extent_tree et;
5910         int i, ret = 0;
5911
5912         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5913                 xe = &header->xh_entries[i];
5914
5915                 if (ocfs2_xattr_is_local(xe))
5916                         continue;
5917
5918                 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5919                         le16_to_cpu(xe->xe_name_offset) +
5920                         OCFS2_XATTR_SIZE(xe->xe_name_len));
5921
5922                 vb->vb_xv = xv;
5923                 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5924
5925                 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5926                                                         ref_ci, ref_root_bh,
5927                                                         dealloc, NULL);
5928                 if (ret) {
5929                         mlog_errno(ret);
5930                         break;
5931                 }
5932         }
5933
5934         return ret;
5935 }
5936
5937 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5938                                 struct buffer_head *fe_bh,
5939                                 struct ocfs2_caching_info *ref_ci,
5940                                 struct buffer_head *ref_root_bh,
5941                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5942 {
5943         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5944         struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5945                                 (fe_bh->b_data + inode->i_sb->s_blocksize -
5946                                 le16_to_cpu(di->i_xattr_inline_size));
5947         struct ocfs2_xattr_value_buf vb = {
5948                 .vb_bh = fe_bh,
5949                 .vb_access = ocfs2_journal_access_di,
5950         };
5951
5952         return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5953                                                   ref_ci, ref_root_bh, dealloc);
5954 }
5955
5956 struct ocfs2_xattr_tree_value_refcount_para {
5957         struct ocfs2_caching_info *ref_ci;
5958         struct buffer_head *ref_root_bh;
5959         struct ocfs2_cached_dealloc_ctxt *dealloc;
5960 };
5961
5962 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5963                                            struct ocfs2_xattr_bucket *bucket,
5964                                            int offset,
5965                                            struct ocfs2_xattr_value_root **xv,
5966                                            struct buffer_head **bh)
5967 {
5968         int ret, block_off, name_offset;
5969         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5970         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5971         void *base;
5972
5973         ret = ocfs2_xattr_bucket_get_name_value(sb,
5974                                                 bucket_xh(bucket),
5975                                                 offset,
5976                                                 &block_off,
5977                                                 &name_offset);
5978         if (ret) {
5979                 mlog_errno(ret);
5980                 goto out;
5981         }
5982
5983         base = bucket_block(bucket, block_off);
5984
5985         *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
5986                          OCFS2_XATTR_SIZE(xe->xe_name_len));
5987
5988         if (bh)
5989                 *bh = bucket->bu_bhs[block_off];
5990 out:
5991         return ret;
5992 }
5993
5994 /*
5995  * For a given xattr bucket, refcount all the entries which
5996  * have value stored outside.
5997  */
5998 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
5999                                              struct ocfs2_xattr_bucket *bucket,
6000                                              void *para)
6001 {
6002         int i, ret = 0;
6003         struct ocfs2_extent_tree et;
6004         struct ocfs2_xattr_tree_value_refcount_para *ref =
6005                         (struct ocfs2_xattr_tree_value_refcount_para *)para;
6006         struct ocfs2_xattr_header *xh =
6007                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6008         struct ocfs2_xattr_entry *xe;
6009         struct ocfs2_xattr_value_buf vb = {
6010                 .vb_access = ocfs2_journal_access,
6011         };
6012         struct ocfs2_post_refcount refcount = {
6013                 .credits = bucket->bu_blocks,
6014                 .para = bucket,
6015                 .func = ocfs2_xattr_bucket_post_refcount,
6016         };
6017         struct ocfs2_post_refcount *p = NULL;
6018
6019         /* We only need post_refcount if we support metaecc. */
6020         if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6021                 p = &refcount;
6022
6023         mlog(0, "refcount bucket %llu, count = %u\n",
6024              (unsigned long long)bucket_blkno(bucket),
6025              le16_to_cpu(xh->xh_count));
6026         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6027                 xe = &xh->xh_entries[i];
6028
6029                 if (ocfs2_xattr_is_local(xe))
6030                         continue;
6031
6032                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6033                                                       &vb.vb_xv, &vb.vb_bh);
6034                 if (ret) {
6035                         mlog_errno(ret);
6036                         break;
6037                 }
6038
6039                 ocfs2_init_xattr_value_extent_tree(&et,
6040                                                    INODE_CACHE(inode), &vb);
6041
6042                 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6043                                                         &et, ref->ref_ci,
6044                                                         ref->ref_root_bh,
6045                                                         ref->dealloc, p);
6046                 if (ret) {
6047                         mlog_errno(ret);
6048                         break;
6049                 }
6050         }
6051
6052         return ret;
6053
6054 }
6055
6056 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6057                                      struct buffer_head *root_bh,
6058                                      u64 blkno, u32 cpos, u32 len, void *para)
6059 {
6060         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6061                                            ocfs2_xattr_bucket_value_refcount,
6062                                            para);
6063 }
6064
6065 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6066                                 struct buffer_head *blk_bh,
6067                                 struct ocfs2_caching_info *ref_ci,
6068                                 struct buffer_head *ref_root_bh,
6069                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6070 {
6071         int ret = 0;
6072         struct ocfs2_xattr_block *xb =
6073                                 (struct ocfs2_xattr_block *)blk_bh->b_data;
6074
6075         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6076                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6077                 struct ocfs2_xattr_value_buf vb = {
6078                         .vb_bh = blk_bh,
6079                         .vb_access = ocfs2_journal_access_xb,
6080                 };
6081
6082                 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6083                                                          ref_ci, ref_root_bh,
6084                                                          dealloc);
6085         } else {
6086                 struct ocfs2_xattr_tree_value_refcount_para para = {
6087                         .ref_ci = ref_ci,
6088                         .ref_root_bh = ref_root_bh,
6089                         .dealloc = dealloc,
6090                 };
6091
6092                 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6093                                                 ocfs2_refcount_xattr_tree_rec,
6094                                                 &para);
6095         }
6096
6097         return ret;
6098 }
6099
6100 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6101                                      struct buffer_head *fe_bh,
6102                                      struct ocfs2_caching_info *ref_ci,
6103                                      struct buffer_head *ref_root_bh,
6104                                      struct ocfs2_cached_dealloc_ctxt *dealloc)
6105 {
6106         int ret = 0;
6107         struct ocfs2_inode_info *oi = OCFS2_I(inode);
6108         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6109         struct buffer_head *blk_bh = NULL;
6110
6111         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6112                 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6113                                                          ref_ci, ref_root_bh,
6114                                                          dealloc);
6115                 if (ret) {
6116                         mlog_errno(ret);
6117                         goto out;
6118                 }
6119         }
6120
6121         if (!di->i_xattr_loc)
6122                 goto out;
6123
6124         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6125                                      &blk_bh);
6126         if (ret < 0) {
6127                 mlog_errno(ret);
6128                 goto out;
6129         }
6130
6131         ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6132                                                 ref_root_bh, dealloc);
6133         if (ret)
6134                 mlog_errno(ret);
6135
6136         brelse(blk_bh);
6137 out:
6138
6139         return ret;
6140 }
6141
6142 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6143 /*
6144  * Store the information we need in xattr reflink.
6145  * old_bh and new_bh are inode bh for the old and new inode.
6146  */
6147 struct ocfs2_xattr_reflink {
6148         struct inode *old_inode;
6149         struct inode *new_inode;
6150         struct buffer_head *old_bh;
6151         struct buffer_head *new_bh;
6152         struct ocfs2_caching_info *ref_ci;
6153         struct buffer_head *ref_root_bh;
6154         struct ocfs2_cached_dealloc_ctxt *dealloc;
6155         should_xattr_reflinked *xattr_reflinked;
6156 };
6157
6158 /*
6159  * Given a xattr header and xe offset,
6160  * return the proper xv and the corresponding bh.
6161  * xattr in inode, block and xattr tree have different implementaions.
6162  */
6163 typedef int (get_xattr_value_root)(struct super_block *sb,
6164                                    struct buffer_head *bh,
6165                                    struct ocfs2_xattr_header *xh,
6166                                    int offset,
6167                                    struct ocfs2_xattr_value_root **xv,
6168                                    struct buffer_head **ret_bh,
6169                                    void *para);
6170
6171 /*
6172  * Calculate all the xattr value root metadata stored in this xattr header and
6173  * credits we need if we create them from the scratch.
6174  * We use get_xattr_value_root so that all types of xattr container can use it.
6175  */
6176 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6177                                              struct buffer_head *bh,
6178                                              struct ocfs2_xattr_header *xh,
6179                                              int *metas, int *credits,
6180                                              int *num_recs,
6181                                              get_xattr_value_root *func,
6182                                              void *para)
6183 {
6184         int i, ret = 0;
6185         struct ocfs2_xattr_value_root *xv;
6186         struct ocfs2_xattr_entry *xe;
6187
6188         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6189                 xe = &xh->xh_entries[i];
6190                 if (ocfs2_xattr_is_local(xe))
6191                         continue;
6192
6193                 ret = func(sb, bh, xh, i, &xv, NULL, para);
6194                 if (ret) {
6195                         mlog_errno(ret);
6196                         break;
6197                 }
6198
6199                 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6200                           le16_to_cpu(xv->xr_list.l_next_free_rec);
6201
6202                 *credits += ocfs2_calc_extend_credits(sb,
6203                                                 &def_xv.xv.xr_list,
6204                                                 le32_to_cpu(xv->xr_clusters));
6205
6206                 /*
6207                  * If the value is a tree with depth > 1, We don't go deep
6208                  * to the extent block, so just calculate a maximum record num.
6209                  */
6210                 if (!xv->xr_list.l_tree_depth)
6211                         *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6212                 else
6213                         *num_recs += ocfs2_clusters_for_bytes(sb,
6214                                                               XATTR_SIZE_MAX);
6215         }
6216
6217         return ret;
6218 }
6219
6220 /* Used by xattr inode and block to return the right xv and buffer_head. */
6221 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6222                                       struct buffer_head *bh,
6223                                       struct ocfs2_xattr_header *xh,
6224                                       int offset,
6225                                       struct ocfs2_xattr_value_root **xv,
6226                                       struct buffer_head **ret_bh,
6227                                       void *para)
6228 {
6229         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6230
6231         *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6232                 le16_to_cpu(xe->xe_name_offset) +
6233                 OCFS2_XATTR_SIZE(xe->xe_name_len));
6234
6235         if (ret_bh)
6236                 *ret_bh = bh;
6237
6238         return 0;
6239 }
6240
6241 /*
6242  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6243  * It is only used for inline xattr and xattr block.
6244  */
6245 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6246                                         struct ocfs2_xattr_header *xh,
6247                                         struct buffer_head *ref_root_bh,
6248                                         int *credits,
6249                                         struct ocfs2_alloc_context **meta_ac)
6250 {
6251         int ret, meta_add = 0, num_recs = 0;
6252         struct ocfs2_refcount_block *rb =
6253                         (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6254
6255         *credits = 0;
6256
6257         ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6258                                                 &meta_add, credits, &num_recs,
6259                                                 ocfs2_get_xattr_value_root,
6260                                                 NULL);
6261         if (ret) {
6262                 mlog_errno(ret);
6263                 goto out;
6264         }
6265
6266         /*
6267          * We need to add/modify num_recs in refcount tree, so just calculate
6268          * an approximate number we need for refcount tree change.
6269          * Sometimes we need to split the tree, and after split,  half recs
6270          * will be moved to the new block, and a new block can only provide
6271          * half number of recs. So we multiple new blocks by 2.
6272          */
6273         num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6274         meta_add += num_recs;
6275         *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6276         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6277                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6278                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6279         else
6280                 *credits += 1;
6281
6282         ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6283         if (ret)
6284                 mlog_errno(ret);
6285
6286 out:
6287         return ret;
6288 }
6289
6290 /*
6291  * Given a xattr header, reflink all the xattrs in this container.
6292  * It can be used for inode, block and bucket.
6293  *
6294  * NOTE:
6295  * Before we call this function, the caller has memcpy the xattr in
6296  * old_xh to the new_xh.
6297  *
6298  * If args.xattr_reflinked is set, call it to decide whether the xe should
6299  * be reflinked or not. If not, remove it from the new xattr header.
6300  */
6301 static int ocfs2_reflink_xattr_header(handle_t *handle,
6302                                       struct ocfs2_xattr_reflink *args,
6303                                       struct buffer_head *old_bh,
6304                                       struct ocfs2_xattr_header *xh,
6305                                       struct buffer_head *new_bh,
6306                                       struct ocfs2_xattr_header *new_xh,
6307                                       struct ocfs2_xattr_value_buf *vb,
6308                                       struct ocfs2_alloc_context *meta_ac,
6309                                       get_xattr_value_root *func,
6310                                       void *para)
6311 {
6312         int ret = 0, i, j;
6313         struct super_block *sb = args->old_inode->i_sb;
6314         struct buffer_head *value_bh;
6315         struct ocfs2_xattr_entry *xe, *last;
6316         struct ocfs2_xattr_value_root *xv, *new_xv;
6317         struct ocfs2_extent_tree data_et;
6318         u32 clusters, cpos, p_cluster, num_clusters;
6319         unsigned int ext_flags = 0;
6320
6321         mlog(0, "reflink xattr in container %llu, count = %u\n",
6322              (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6323
6324         last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6325         for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6326                 xe = &xh->xh_entries[i];
6327
6328                 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6329                         xe = &new_xh->xh_entries[j];
6330
6331                         le16_add_cpu(&new_xh->xh_count, -1);
6332                         if (new_xh->xh_count) {
6333                                 memmove(xe, xe + 1,
6334                                         (void *)last - (void *)xe);
6335                                 memset(last, 0,
6336                                        sizeof(struct ocfs2_xattr_entry));
6337                         }
6338
6339                         /*
6340                          * We don't want j to increase in the next round since
6341                          * it is already moved ahead.
6342                          */
6343                         j--;
6344                         continue;
6345                 }
6346
6347                 if (ocfs2_xattr_is_local(xe))
6348                         continue;
6349
6350                 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6351                 if (ret) {
6352                         mlog_errno(ret);
6353                         break;
6354                 }
6355
6356                 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6357                 if (ret) {
6358                         mlog_errno(ret);
6359                         break;
6360                 }
6361
6362                 /*
6363                  * For the xattr which has l_tree_depth = 0, all the extent
6364                  * recs have already be copied to the new xh with the
6365                  * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6366                  * increase the refount count int the refcount tree.
6367                  *
6368                  * For the xattr which has l_tree_depth > 0, we need
6369                  * to initialize it to the empty default value root,
6370                  * and then insert the extents one by one.
6371                  */
6372                 if (xv->xr_list.l_tree_depth) {
6373                         memcpy(new_xv, &def_xv, sizeof(def_xv));
6374                         vb->vb_xv = new_xv;
6375                         vb->vb_bh = value_bh;
6376                         ocfs2_init_xattr_value_extent_tree(&data_et,
6377                                         INODE_CACHE(args->new_inode), vb);
6378                 }
6379
6380                 clusters = le32_to_cpu(xv->xr_clusters);
6381                 cpos = 0;
6382                 while (cpos < clusters) {
6383                         ret = ocfs2_xattr_get_clusters(args->old_inode,
6384                                                        cpos,
6385                                                        &p_cluster,
6386                                                        &num_clusters,
6387                                                        &xv->xr_list,
6388                                                        &ext_flags);
6389                         if (ret) {
6390                                 mlog_errno(ret);
6391                                 goto out;
6392                         }
6393
6394                         BUG_ON(!p_cluster);
6395
6396                         if (xv->xr_list.l_tree_depth) {
6397                                 ret = ocfs2_insert_extent(handle,
6398                                                 &data_et, cpos,
6399                                                 ocfs2_clusters_to_blocks(
6400                                                         args->old_inode->i_sb,
6401                                                         p_cluster),
6402                                                 num_clusters, ext_flags,
6403                                                 meta_ac);
6404                                 if (ret) {
6405                                         mlog_errno(ret);
6406                                         goto out;
6407                                 }
6408                         }
6409
6410                         ret = ocfs2_increase_refcount(handle, args->ref_ci,
6411                                                       args->ref_root_bh,
6412                                                       p_cluster, num_clusters,
6413                                                       meta_ac, args->dealloc);
6414                         if (ret) {
6415                                 mlog_errno(ret);
6416                                 goto out;
6417                         }
6418
6419                         cpos += num_clusters;
6420                 }
6421         }
6422
6423 out:
6424         return ret;
6425 }
6426
6427 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6428 {
6429         int ret = 0, credits = 0;
6430         handle_t *handle;
6431         struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6432         struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6433         int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6434         int header_off = osb->sb->s_blocksize - inline_size;
6435         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6436                                         (args->old_bh->b_data + header_off);
6437         struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6438                                         (args->new_bh->b_data + header_off);
6439         struct ocfs2_alloc_context *meta_ac = NULL;
6440         struct ocfs2_inode_info *new_oi;
6441         struct ocfs2_dinode *new_di;
6442         struct ocfs2_xattr_value_buf vb = {
6443                 .vb_bh = args->new_bh,
6444                 .vb_access = ocfs2_journal_access_di,
6445         };
6446
6447         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6448                                                   &credits, &meta_ac);
6449         if (ret) {
6450                 mlog_errno(ret);
6451                 goto out;
6452         }
6453
6454         handle = ocfs2_start_trans(osb, credits);
6455         if (IS_ERR(handle)) {
6456                 ret = PTR_ERR(handle);
6457                 mlog_errno(ret);
6458                 goto out;
6459         }
6460
6461         ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6462                                       args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6463         if (ret) {
6464                 mlog_errno(ret);
6465                 goto out_commit;
6466         }
6467
6468         memcpy(args->new_bh->b_data + header_off,
6469                args->old_bh->b_data + header_off, inline_size);
6470
6471         new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6472         new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6473
6474         ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6475                                          args->new_bh, new_xh, &vb, meta_ac,
6476                                          ocfs2_get_xattr_value_root, NULL);
6477         if (ret) {
6478                 mlog_errno(ret);
6479                 goto out_commit;
6480         }
6481
6482         new_oi = OCFS2_I(args->new_inode);
6483         spin_lock(&new_oi->ip_lock);
6484         new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6485         new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6486         spin_unlock(&new_oi->ip_lock);
6487
6488         ocfs2_journal_dirty(handle, args->new_bh);
6489
6490 out_commit:
6491         ocfs2_commit_trans(osb, handle);
6492
6493 out:
6494         if (meta_ac)
6495                 ocfs2_free_alloc_context(meta_ac);
6496         return ret;
6497 }
6498
6499 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6500                                           struct buffer_head *fe_bh,
6501                                           struct buffer_head **ret_bh,
6502                                           int indexed)
6503 {
6504         int ret;
6505         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6506         struct ocfs2_xattr_set_ctxt ctxt;
6507
6508         memset(&ctxt, 0, sizeof(ctxt));
6509         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6510         if (ret < 0) {
6511                 mlog_errno(ret);
6512                 return ret;
6513         }
6514
6515         ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6516         if (IS_ERR(ctxt.handle)) {
6517                 ret = PTR_ERR(ctxt.handle);
6518                 mlog_errno(ret);
6519                 goto out;
6520         }
6521
6522         mlog(0, "create new xattr block for inode %llu, index = %d\n",
6523              (unsigned long long)fe_bh->b_blocknr, indexed);
6524         ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6525                                        ret_bh);
6526         if (ret)
6527                 mlog_errno(ret);
6528
6529         ocfs2_commit_trans(osb, ctxt.handle);
6530 out:
6531         ocfs2_free_alloc_context(ctxt.meta_ac);
6532         return ret;
6533 }
6534
6535 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6536                                      struct buffer_head *blk_bh,
6537                                      struct buffer_head *new_blk_bh)
6538 {
6539         int ret = 0, credits = 0;
6540         handle_t *handle;
6541         struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6542         struct ocfs2_dinode *new_di;
6543         struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6544         int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6545         struct ocfs2_xattr_block *xb =
6546                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6547         struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6548         struct ocfs2_xattr_block *new_xb =
6549                         (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6550         struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6551         struct ocfs2_alloc_context *meta_ac;
6552         struct ocfs2_xattr_value_buf vb = {
6553                 .vb_bh = new_blk_bh,
6554                 .vb_access = ocfs2_journal_access_xb,
6555         };
6556
6557         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6558                                                   &credits, &meta_ac);
6559         if (ret) {
6560                 mlog_errno(ret);
6561                 return ret;
6562         }
6563
6564         /* One more credits in case we need to add xattr flags in new inode. */
6565         handle = ocfs2_start_trans(osb, credits + 1);
6566         if (IS_ERR(handle)) {
6567                 ret = PTR_ERR(handle);
6568                 mlog_errno(ret);
6569                 goto out;
6570         }
6571
6572         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6573                 ret = ocfs2_journal_access_di(handle,
6574                                               INODE_CACHE(args->new_inode),
6575                                               args->new_bh,
6576                                               OCFS2_JOURNAL_ACCESS_WRITE);
6577                 if (ret) {
6578                         mlog_errno(ret);
6579                         goto out_commit;
6580                 }
6581         }
6582
6583         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6584                                       new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6585         if (ret) {
6586                 mlog_errno(ret);
6587                 goto out_commit;
6588         }
6589
6590         memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6591                osb->sb->s_blocksize - header_off);
6592
6593         ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6594                                          new_blk_bh, new_xh, &vb, meta_ac,
6595                                          ocfs2_get_xattr_value_root, NULL);
6596         if (ret) {
6597                 mlog_errno(ret);
6598                 goto out_commit;
6599         }
6600
6601         ocfs2_journal_dirty(handle, new_blk_bh);
6602
6603         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6604                 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6605                 spin_lock(&new_oi->ip_lock);
6606                 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6607                 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6608                 spin_unlock(&new_oi->ip_lock);
6609
6610                 ocfs2_journal_dirty(handle, args->new_bh);
6611         }
6612
6613 out_commit:
6614         ocfs2_commit_trans(osb, handle);
6615
6616 out:
6617         ocfs2_free_alloc_context(meta_ac);
6618         return ret;
6619 }
6620
6621 struct ocfs2_reflink_xattr_tree_args {
6622         struct ocfs2_xattr_reflink *reflink;
6623         struct buffer_head *old_blk_bh;
6624         struct buffer_head *new_blk_bh;
6625         struct ocfs2_xattr_bucket *old_bucket;
6626         struct ocfs2_xattr_bucket *new_bucket;
6627 };
6628
6629 /*
6630  * NOTE:
6631  * We have to handle the case that both old bucket and new bucket
6632  * will call this function to get the right ret_bh.
6633  * So The caller must give us the right bh.
6634  */
6635 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6636                                         struct buffer_head *bh,
6637                                         struct ocfs2_xattr_header *xh,
6638                                         int offset,
6639                                         struct ocfs2_xattr_value_root **xv,
6640                                         struct buffer_head **ret_bh,
6641                                         void *para)
6642 {
6643         struct ocfs2_reflink_xattr_tree_args *args =
6644                         (struct ocfs2_reflink_xattr_tree_args *)para;
6645         struct ocfs2_xattr_bucket *bucket;
6646
6647         if (bh == args->old_bucket->bu_bhs[0])
6648                 bucket = args->old_bucket;
6649         else
6650                 bucket = args->new_bucket;
6651
6652         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6653                                                xv, ret_bh);
6654 }
6655
6656 struct ocfs2_value_tree_metas {
6657         int num_metas;
6658         int credits;
6659         int num_recs;
6660 };
6661
6662 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6663                                         struct buffer_head *bh,
6664                                         struct ocfs2_xattr_header *xh,
6665                                         int offset,
6666                                         struct ocfs2_xattr_value_root **xv,
6667                                         struct buffer_head **ret_bh,
6668                                         void *para)
6669 {
6670         struct ocfs2_xattr_bucket *bucket =
6671                                 (struct ocfs2_xattr_bucket *)para;
6672
6673         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6674                                                xv, ret_bh);
6675 }
6676
6677 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6678                                       struct ocfs2_xattr_bucket *bucket,
6679                                       void *para)
6680 {
6681         struct ocfs2_value_tree_metas *metas =
6682                         (struct ocfs2_value_tree_metas *)para;
6683         struct ocfs2_xattr_header *xh =
6684                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6685
6686         /* Add the credits for this bucket first. */
6687         metas->credits += bucket->bu_blocks;
6688         return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6689                                         xh, &metas->num_metas,
6690                                         &metas->credits, &metas->num_recs,
6691                                         ocfs2_value_tree_metas_in_bucket,
6692                                         bucket);
6693 }
6694
6695 /*
6696  * Given a xattr extent rec starting from blkno and having len clusters,
6697  * iterate all the buckets calculate how much metadata we need for reflinking
6698  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6699  */
6700 static int ocfs2_lock_reflink_xattr_rec_allocators(
6701                                 struct ocfs2_reflink_xattr_tree_args *args,
6702                                 struct ocfs2_extent_tree *xt_et,
6703                                 u64 blkno, u32 len, int *credits,
6704                                 struct ocfs2_alloc_context **meta_ac,
6705                                 struct ocfs2_alloc_context **data_ac)
6706 {
6707         int ret, num_free_extents;
6708         struct ocfs2_value_tree_metas metas;
6709         struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6710         struct ocfs2_refcount_block *rb;
6711
6712         memset(&metas, 0, sizeof(metas));
6713
6714         ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6715                                           ocfs2_calc_value_tree_metas, &metas);
6716         if (ret) {
6717                 mlog_errno(ret);
6718                 goto out;
6719         }
6720
6721         *credits = metas.credits;
6722
6723         /*
6724          * Calculate we need for refcount tree change.
6725          *
6726          * We need to add/modify num_recs in refcount tree, so just calculate
6727          * an approximate number we need for refcount tree change.
6728          * Sometimes we need to split the tree, and after split,  half recs
6729          * will be moved to the new block, and a new block can only provide
6730          * half number of recs. So we multiple new blocks by 2.
6731          * In the end, we have to add credits for modifying the already
6732          * existed refcount block.
6733          */
6734         rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6735         metas.num_recs =
6736                 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6737                  ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6738         metas.num_metas += metas.num_recs;
6739         *credits += metas.num_recs +
6740                     metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6741         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6742                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6743                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6744         else
6745                 *credits += 1;
6746
6747         /* count in the xattr tree change. */
6748         num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6749         if (num_free_extents < 0) {
6750                 ret = num_free_extents;
6751                 mlog_errno(ret);
6752                 goto out;
6753         }
6754
6755         if (num_free_extents < len)
6756                 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6757
6758         *credits += ocfs2_calc_extend_credits(osb->sb,
6759                                               xt_et->et_root_el, len);
6760
6761         if (metas.num_metas) {
6762                 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6763                                                         meta_ac);
6764                 if (ret) {
6765                         mlog_errno(ret);
6766                         goto out;
6767                 }
6768         }
6769
6770         if (len) {
6771                 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6772                 if (ret)
6773                         mlog_errno(ret);
6774         }
6775 out:
6776         if (ret) {
6777                 if (*meta_ac) {
6778                         ocfs2_free_alloc_context(*meta_ac);
6779                         meta_ac = NULL;
6780                 }
6781         }
6782
6783         return ret;
6784 }
6785
6786 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6787                                 u64 blkno, u64 new_blkno, u32 clusters,
6788                                 struct ocfs2_alloc_context *meta_ac,
6789                                 struct ocfs2_alloc_context *data_ac,
6790                                 struct ocfs2_reflink_xattr_tree_args *args)
6791 {
6792         int i, j, ret = 0;
6793         struct super_block *sb = args->reflink->old_inode->i_sb;
6794         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6795         u32 num_buckets = clusters * bpc;
6796         int bpb = args->old_bucket->bu_blocks;
6797         struct ocfs2_xattr_value_buf vb = {
6798                 .vb_access = ocfs2_journal_access,
6799         };
6800
6801         for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6802                 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6803                 if (ret) {
6804                         mlog_errno(ret);
6805                         break;
6806                 }
6807
6808                 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6809                 if (ret) {
6810                         mlog_errno(ret);
6811                         break;
6812                 }
6813
6814                 /*
6815                  * The real bucket num in this series of blocks is stored
6816                  * in the 1st bucket.
6817                  */
6818                 if (i == 0)
6819                         num_buckets = le16_to_cpu(
6820                                 bucket_xh(args->old_bucket)->xh_num_buckets);
6821
6822                 ret = ocfs2_xattr_bucket_journal_access(handle,
6823                                                 args->new_bucket,
6824                                                 OCFS2_JOURNAL_ACCESS_CREATE);
6825                 if (ret) {
6826                         mlog_errno(ret);
6827                         break;
6828                 }
6829
6830                 for (j = 0; j < bpb; j++)
6831                         memcpy(bucket_block(args->new_bucket, j),
6832                                bucket_block(args->old_bucket, j),
6833                                sb->s_blocksize);
6834
6835                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6836
6837                 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6838                                         args->old_bucket->bu_bhs[0],
6839                                         bucket_xh(args->old_bucket),
6840                                         args->new_bucket->bu_bhs[0],
6841                                         bucket_xh(args->new_bucket),
6842                                         &vb, meta_ac,
6843                                         ocfs2_get_reflink_xattr_value_root,
6844                                         args);
6845                 if (ret) {
6846                         mlog_errno(ret);
6847                         break;
6848                 }
6849
6850                 /*
6851                  * Re-access and dirty the bucket to calculate metaecc.
6852                  * Because we may extend the transaction in reflink_xattr_header
6853                  * which will let the already accessed block gone.
6854                  */
6855                 ret = ocfs2_xattr_bucket_journal_access(handle,
6856                                                 args->new_bucket,
6857                                                 OCFS2_JOURNAL_ACCESS_WRITE);
6858                 if (ret) {
6859                         mlog_errno(ret);
6860                         break;
6861                 }
6862
6863                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6864                 ocfs2_xattr_bucket_relse(args->old_bucket);
6865                 ocfs2_xattr_bucket_relse(args->new_bucket);
6866         }
6867
6868         ocfs2_xattr_bucket_relse(args->old_bucket);
6869         ocfs2_xattr_bucket_relse(args->new_bucket);
6870         return ret;
6871 }
6872 /*
6873  * Create the same xattr extent record in the new inode's xattr tree.
6874  */
6875 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6876                                    struct buffer_head *root_bh,
6877                                    u64 blkno,
6878                                    u32 cpos,
6879                                    u32 len,
6880                                    void *para)
6881 {
6882         int ret, credits = 0;
6883         u32 p_cluster, num_clusters;
6884         u64 new_blkno;
6885         handle_t *handle;
6886         struct ocfs2_reflink_xattr_tree_args *args =
6887                         (struct ocfs2_reflink_xattr_tree_args *)para;
6888         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6889         struct ocfs2_alloc_context *meta_ac = NULL;
6890         struct ocfs2_alloc_context *data_ac = NULL;
6891         struct ocfs2_extent_tree et;
6892
6893         ocfs2_init_xattr_tree_extent_tree(&et,
6894                                           INODE_CACHE(args->reflink->new_inode),
6895                                           args->new_blk_bh);
6896
6897         ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6898                                                       len, &credits,
6899                                                       &meta_ac, &data_ac);
6900         if (ret) {
6901                 mlog_errno(ret);
6902                 goto out;
6903         }
6904
6905         handle = ocfs2_start_trans(osb, credits);
6906         if (IS_ERR(handle)) {
6907                 ret = PTR_ERR(handle);
6908                 mlog_errno(ret);
6909                 goto out;
6910         }
6911
6912         ret = ocfs2_claim_clusters(handle, data_ac,
6913                                    len, &p_cluster, &num_clusters);
6914         if (ret) {
6915                 mlog_errno(ret);
6916                 goto out_commit;
6917         }
6918
6919         new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6920
6921         mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6922              (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6923         ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6924                                           meta_ac, data_ac, args);
6925         if (ret) {
6926                 mlog_errno(ret);
6927                 goto out_commit;
6928         }
6929
6930         mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6931              (unsigned long long)new_blkno, len, cpos);
6932         ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6933                                   len, 0, meta_ac);
6934         if (ret)
6935                 mlog_errno(ret);
6936
6937 out_commit:
6938         ocfs2_commit_trans(osb, handle);
6939
6940 out:
6941         if (meta_ac)
6942                 ocfs2_free_alloc_context(meta_ac);
6943         if (data_ac)
6944                 ocfs2_free_alloc_context(data_ac);
6945         return ret;
6946 }
6947
6948 /*
6949  * Create reflinked xattr buckets.
6950  * We will add bucket one by one, and refcount all the xattrs in the bucket
6951  * if they are stored outside.
6952  */
6953 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6954                                     struct buffer_head *blk_bh,
6955                                     struct buffer_head *new_blk_bh)
6956 {
6957         int ret;
6958         struct ocfs2_reflink_xattr_tree_args para;
6959
6960         memset(&para, 0, sizeof(para));
6961         para.reflink = args;
6962         para.old_blk_bh = blk_bh;
6963         para.new_blk_bh = new_blk_bh;
6964
6965         para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6966         if (!para.old_bucket) {
6967                 mlog_errno(-ENOMEM);
6968                 return -ENOMEM;
6969         }
6970
6971         para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6972         if (!para.new_bucket) {
6973                 ret = -ENOMEM;
6974                 mlog_errno(ret);
6975                 goto out;
6976         }
6977
6978         ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
6979                                               ocfs2_reflink_xattr_rec,
6980                                               &para);
6981         if (ret)
6982                 mlog_errno(ret);
6983
6984 out:
6985         ocfs2_xattr_bucket_free(para.old_bucket);
6986         ocfs2_xattr_bucket_free(para.new_bucket);
6987         return ret;
6988 }
6989
6990 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
6991                                         struct buffer_head *blk_bh)
6992 {
6993         int ret, indexed = 0;
6994         struct buffer_head *new_blk_bh = NULL;
6995         struct ocfs2_xattr_block *xb =
6996                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6997
6998
6999         if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7000                 indexed = 1;
7001
7002         ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7003                                              &new_blk_bh, indexed);
7004         if (ret) {
7005                 mlog_errno(ret);
7006                 goto out;
7007         }
7008
7009         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
7010                 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7011         else
7012                 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7013         if (ret)
7014                 mlog_errno(ret);
7015
7016 out:
7017         brelse(new_blk_bh);
7018         return ret;
7019 }
7020
7021 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7022 {
7023         int type = ocfs2_xattr_get_type(xe);
7024
7025         return type != OCFS2_XATTR_INDEX_SECURITY &&
7026                type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7027                type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7028 }
7029
7030 int ocfs2_reflink_xattrs(struct inode *old_inode,
7031                          struct buffer_head *old_bh,
7032                          struct inode *new_inode,
7033                          struct buffer_head *new_bh,
7034                          bool preserve_security)
7035 {
7036         int ret;
7037         struct ocfs2_xattr_reflink args;
7038         struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7039         struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7040         struct buffer_head *blk_bh = NULL;
7041         struct ocfs2_cached_dealloc_ctxt dealloc;
7042         struct ocfs2_refcount_tree *ref_tree;
7043         struct buffer_head *ref_root_bh = NULL;
7044
7045         ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7046                                        le64_to_cpu(di->i_refcount_loc),
7047                                        1, &ref_tree, &ref_root_bh);
7048         if (ret) {
7049                 mlog_errno(ret);
7050                 goto out;
7051         }
7052
7053         ocfs2_init_dealloc_ctxt(&dealloc);
7054
7055         args.old_inode = old_inode;
7056         args.new_inode = new_inode;
7057         args.old_bh = old_bh;
7058         args.new_bh = new_bh;
7059         args.ref_ci = &ref_tree->rf_ci;
7060         args.ref_root_bh = ref_root_bh;
7061         args.dealloc = &dealloc;
7062         if (preserve_security)
7063                 args.xattr_reflinked = NULL;
7064         else
7065                 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7066
7067         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7068                 ret = ocfs2_reflink_xattr_inline(&args);
7069                 if (ret) {
7070                         mlog_errno(ret);
7071                         goto out_unlock;
7072                 }
7073         }
7074
7075         if (!di->i_xattr_loc)
7076                 goto out_unlock;
7077
7078         ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7079                                      &blk_bh);
7080         if (ret < 0) {
7081                 mlog_errno(ret);
7082                 goto out_unlock;
7083         }
7084
7085         ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7086         if (ret)
7087                 mlog_errno(ret);
7088
7089         brelse(blk_bh);
7090
7091 out_unlock:
7092         ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7093                                    ref_tree, 1);
7094         brelse(ref_root_bh);
7095
7096         if (ocfs2_dealloc_has_cluster(&dealloc)) {
7097                 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7098                 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7099         }
7100
7101 out:
7102         return ret;
7103 }
7104
7105 /*
7106  * Initialize security and acl for a already created inode.
7107  * Used for reflink a non-preserve-security file.
7108  *
7109  * It uses common api like ocfs2_xattr_set, so the caller
7110  * must not hold any lock expect i_mutex.
7111  */
7112 int ocfs2_init_security_and_acl(struct inode *dir,
7113                                 struct inode *inode)
7114 {
7115         int ret = 0;
7116         struct buffer_head *dir_bh = NULL;
7117         struct ocfs2_security_xattr_info si = {
7118                 .enable = 1,
7119         };
7120
7121         ret = ocfs2_init_security_get(inode, dir, &si);
7122         if (!ret) {
7123                 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7124                                       si.name, si.value, si.value_len,
7125                                       XATTR_CREATE);
7126                 if (ret) {
7127                         mlog_errno(ret);
7128                         goto leave;
7129                 }
7130         } else if (ret != -EOPNOTSUPP) {
7131                 mlog_errno(ret);
7132                 goto leave;
7133         }
7134
7135         ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7136         if (ret) {
7137                 mlog_errno(ret);
7138                 goto leave;
7139         }
7140
7141         ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7142         if (ret)
7143                 mlog_errno(ret);
7144
7145         ocfs2_inode_unlock(dir, 0);
7146         brelse(dir_bh);
7147 leave:
7148         return ret;
7149 }
7150 /*
7151  * 'security' attributes support
7152  */
7153 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7154                                         size_t list_size, const char *name,
7155                                         size_t name_len, int type)
7156 {
7157         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7158         const size_t total_len = prefix_len + name_len + 1;
7159
7160         if (list && total_len <= list_size) {
7161                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7162                 memcpy(list + prefix_len, name, name_len);
7163                 list[prefix_len + name_len] = '\0';
7164         }
7165         return total_len;
7166 }
7167
7168 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7169                                     void *buffer, size_t size, int type)
7170 {
7171         if (strcmp(name, "") == 0)
7172                 return -EINVAL;
7173         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7174                                name, buffer, size);
7175 }
7176
7177 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7178                 const void *value, size_t size, int flags, int type)
7179 {
7180         if (strcmp(name, "") == 0)
7181                 return -EINVAL;
7182
7183         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7184                                name, value, size, flags);
7185 }
7186
7187 int ocfs2_init_security_get(struct inode *inode,
7188                             struct inode *dir,
7189                             struct ocfs2_security_xattr_info *si)
7190 {
7191         /* check whether ocfs2 support feature xattr */
7192         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7193                 return -EOPNOTSUPP;
7194         return security_inode_init_security(inode, dir, &si->name, &si->value,
7195                                             &si->value_len);
7196 }
7197
7198 int ocfs2_init_security_set(handle_t *handle,
7199                             struct inode *inode,
7200                             struct buffer_head *di_bh,
7201                             struct ocfs2_security_xattr_info *si,
7202                             struct ocfs2_alloc_context *xattr_ac,
7203                             struct ocfs2_alloc_context *data_ac)
7204 {
7205         return ocfs2_xattr_set_handle(handle, inode, di_bh,
7206                                      OCFS2_XATTR_INDEX_SECURITY,
7207                                      si->name, si->value, si->value_len, 0,
7208                                      xattr_ac, data_ac);
7209 }
7210
7211 struct xattr_handler ocfs2_xattr_security_handler = {
7212         .prefix = XATTR_SECURITY_PREFIX,
7213         .list   = ocfs2_xattr_security_list,
7214         .get    = ocfs2_xattr_security_get,
7215         .set    = ocfs2_xattr_security_set,
7216 };
7217
7218 /*
7219  * 'trusted' attributes support
7220  */
7221 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7222                                        size_t list_size, const char *name,
7223                                        size_t name_len, int type)
7224 {
7225         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7226         const size_t total_len = prefix_len + name_len + 1;
7227
7228         if (list && total_len <= list_size) {
7229                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7230                 memcpy(list + prefix_len, name, name_len);
7231                 list[prefix_len + name_len] = '\0';
7232         }
7233         return total_len;
7234 }
7235
7236 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7237                 void *buffer, size_t size, int type)
7238 {
7239         if (strcmp(name, "") == 0)
7240                 return -EINVAL;
7241         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7242                                name, buffer, size);
7243 }
7244
7245 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7246                 const void *value, size_t size, int flags, int type)
7247 {
7248         if (strcmp(name, "") == 0)
7249                 return -EINVAL;
7250
7251         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7252                                name, value, size, flags);
7253 }
7254
7255 struct xattr_handler ocfs2_xattr_trusted_handler = {
7256         .prefix = XATTR_TRUSTED_PREFIX,
7257         .list   = ocfs2_xattr_trusted_list,
7258         .get    = ocfs2_xattr_trusted_get,
7259         .set    = ocfs2_xattr_trusted_set,
7260 };
7261
7262 /*
7263  * 'user' attributes support
7264  */
7265 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7266                                     size_t list_size, const char *name,
7267                                     size_t name_len, int type)
7268 {
7269         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7270         const size_t total_len = prefix_len + name_len + 1;
7271         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7272
7273         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7274                 return 0;
7275
7276         if (list && total_len <= list_size) {
7277                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7278                 memcpy(list + prefix_len, name, name_len);
7279                 list[prefix_len + name_len] = '\0';
7280         }
7281         return total_len;
7282 }
7283
7284 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7285                 void *buffer, size_t size, int type)
7286 {
7287         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7288
7289         if (strcmp(name, "") == 0)
7290                 return -EINVAL;
7291         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7292                 return -EOPNOTSUPP;
7293         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7294                                buffer, size);
7295 }
7296
7297 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7298                 const void *value, size_t size, int flags, int type)
7299 {
7300         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7301
7302         if (strcmp(name, "") == 0)
7303                 return -EINVAL;
7304         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7305                 return -EOPNOTSUPP;
7306
7307         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7308                                name, value, size, flags);
7309 }
7310
7311 struct xattr_handler ocfs2_xattr_user_handler = {
7312         .prefix = XATTR_USER_PREFIX,
7313         .list   = ocfs2_xattr_user_list,
7314         .get    = ocfs2_xattr_user_get,
7315         .set    = ocfs2_xattr_user_set,
7316 };