ocfs2: Gell into ocfs2_xa_set()
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60
61 struct ocfs2_xattr_def_value_root {
62         struct ocfs2_xattr_value_root   xv;
63         struct ocfs2_extent_rec         er;
64 };
65
66 struct ocfs2_xattr_bucket {
67         /* The inode these xattrs are associated with */
68         struct inode *bu_inode;
69
70         /* The actual buffers that make up the bucket */
71         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72
73         /* How many blocks make up one bucket for this filesystem */
74         int bu_blocks;
75 };
76
77 struct ocfs2_xattr_set_ctxt {
78         handle_t *handle;
79         struct ocfs2_alloc_context *meta_ac;
80         struct ocfs2_alloc_context *data_ac;
81         struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83
84 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP  4
87 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
88                                          - sizeof(struct ocfs2_xattr_header) \
89                                          - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
91                                          - sizeof(struct ocfs2_xattr_block) \
92                                          - sizeof(struct ocfs2_xattr_header) \
93                                          - OCFS2_XATTR_HEADER_GAP)
94
95 static struct ocfs2_xattr_def_value_root def_xv = {
96         .xv.xr_list.l_count = cpu_to_le16(1),
97 };
98
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100         &ocfs2_xattr_user_handler,
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103         &ocfs2_xattr_trusted_handler,
104         &ocfs2_xattr_security_handler,
105         NULL
106 };
107
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
115         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
116 };
117
118 struct ocfs2_xattr_info {
119         int             xi_name_index;
120         const char      *xi_name;
121         int             xi_name_len;
122         const void      *xi_value;
123         size_t          xi_value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144         /*
145          * Journal functions
146          */
147         int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
148                                   int type);
149         void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
150
151         /*
152          * Return a pointer to the appropriate buffer in loc->xl_storage
153          * at the given offset from loc->xl_header.
154          */
155         void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
156
157         /* Can we reuse the existing entry for the new value? */
158         int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
159                              struct ocfs2_xattr_info *xi);
160
161         /* How much space is needed for the new value? */
162         int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
163                                struct ocfs2_xattr_info *xi);
164
165         /*
166          * Return the offset of the first name+value pair.  This is
167          * the start of our downward-filling free space.
168          */
169         int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
170
171         /*
172          * Remove the name+value at this location.  Do whatever is
173          * appropriate with the remaining name+value pairs.
174          */
175         void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
176
177         /* Fill xl_entry with a new entry */
178         void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
179
180         /* Add name+value storage to an entry */
181         void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
182
183         /*
184          * Initialize the value buf's access and bh fields for this entry.
185          * ocfs2_xa_fill_value_buf() will handle the xv pointer.
186          */
187         void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
188                                    struct ocfs2_xattr_value_buf *vb);
189 };
190
191 /*
192  * Describes an xattr entry location.  This is a memory structure
193  * tracking the on-disk structure.
194  */
195 struct ocfs2_xa_loc {
196         /* This xattr belongs to this inode */
197         struct inode *xl_inode;
198
199         /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
200         struct ocfs2_xattr_header *xl_header;
201
202         /* Bytes from xl_header to the end of the storage */
203         int xl_size;
204
205         /*
206          * The ocfs2_xattr_entry this location describes.  If this is
207          * NULL, this location describes the on-disk structure where it
208          * would have been.
209          */
210         struct ocfs2_xattr_entry *xl_entry;
211
212         /*
213          * Internal housekeeping
214          */
215
216         /* Buffer(s) containing this entry */
217         void *xl_storage;
218
219         /* Operations on the storage backing this location */
220         const struct ocfs2_xa_loc_operations *xl_ops;
221 };
222
223 /*
224  * Convenience functions to calculate how much space is needed for a
225  * given name+value pair
226  */
227 static int namevalue_size(int name_len, uint64_t value_len)
228 {
229         if (value_len > OCFS2_XATTR_INLINE_SIZE)
230                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
231         else
232                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
233 }
234
235 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
236 {
237         return namevalue_size(xi->xi_name_len, xi->xi_value_len);
238 }
239
240 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
241 {
242         u64 value_len = le64_to_cpu(xe->xe_value_size);
243
244         BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
245                ocfs2_xattr_is_local(xe));
246         return namevalue_size(xe->xe_name_len, value_len);
247 }
248
249
250 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
251                                              struct ocfs2_xattr_header *xh,
252                                              int index,
253                                              int *block_off,
254                                              int *new_offset);
255
256 static int ocfs2_xattr_block_find(struct inode *inode,
257                                   int name_index,
258                                   const char *name,
259                                   struct ocfs2_xattr_search *xs);
260 static int ocfs2_xattr_index_block_find(struct inode *inode,
261                                         struct buffer_head *root_bh,
262                                         int name_index,
263                                         const char *name,
264                                         struct ocfs2_xattr_search *xs);
265
266 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
267                                         struct buffer_head *blk_bh,
268                                         char *buffer,
269                                         size_t buffer_size);
270
271 static int ocfs2_xattr_create_index_block(struct inode *inode,
272                                           struct ocfs2_xattr_search *xs,
273                                           struct ocfs2_xattr_set_ctxt *ctxt);
274
275 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
276                                              struct ocfs2_xattr_info *xi,
277                                              struct ocfs2_xattr_search *xs,
278                                              struct ocfs2_xattr_set_ctxt *ctxt);
279
280 typedef int (xattr_tree_rec_func)(struct inode *inode,
281                                   struct buffer_head *root_bh,
282                                   u64 blkno, u32 cpos, u32 len, void *para);
283 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
284                                            struct buffer_head *root_bh,
285                                            xattr_tree_rec_func *rec_func,
286                                            void *para);
287 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
288                                         struct ocfs2_xattr_bucket *bucket,
289                                         void *para);
290 static int ocfs2_rm_xattr_cluster(struct inode *inode,
291                                   struct buffer_head *root_bh,
292                                   u64 blkno,
293                                   u32 cpos,
294                                   u32 len,
295                                   void *para);
296
297 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
298                                   u64 src_blk, u64 last_blk, u64 to_blk,
299                                   unsigned int start_bucket,
300                                   u32 *first_hash);
301 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
302                                         struct ocfs2_dinode *di,
303                                         struct ocfs2_xattr_info *xi,
304                                         struct ocfs2_xattr_search *xis,
305                                         struct ocfs2_xattr_search *xbs,
306                                         struct ocfs2_refcount_tree **ref_tree,
307                                         int *meta_need,
308                                         int *credits);
309 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
310                                            struct ocfs2_xattr_bucket *bucket,
311                                            int offset,
312                                            struct ocfs2_xattr_value_root **xv,
313                                            struct buffer_head **bh);
314
315 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
316 {
317         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
318 }
319
320 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
321 {
322         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
323 }
324
325 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
326 {
327         u16 len = sb->s_blocksize -
328                  offsetof(struct ocfs2_xattr_header, xh_entries);
329
330         return len / sizeof(struct ocfs2_xattr_entry);
331 }
332
333 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
334 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
335 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
336
337 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
338 {
339         struct ocfs2_xattr_bucket *bucket;
340         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
341
342         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
343
344         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
345         if (bucket) {
346                 bucket->bu_inode = inode;
347                 bucket->bu_blocks = blks;
348         }
349
350         return bucket;
351 }
352
353 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
354 {
355         int i;
356
357         for (i = 0; i < bucket->bu_blocks; i++) {
358                 brelse(bucket->bu_bhs[i]);
359                 bucket->bu_bhs[i] = NULL;
360         }
361 }
362
363 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
364 {
365         if (bucket) {
366                 ocfs2_xattr_bucket_relse(bucket);
367                 bucket->bu_inode = NULL;
368                 kfree(bucket);
369         }
370 }
371
372 /*
373  * A bucket that has never been written to disk doesn't need to be
374  * read.  We just need the buffer_heads.  Don't call this for
375  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
376  * them fully.
377  */
378 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
379                                    u64 xb_blkno)
380 {
381         int i, rc = 0;
382
383         for (i = 0; i < bucket->bu_blocks; i++) {
384                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
385                                               xb_blkno + i);
386                 if (!bucket->bu_bhs[i]) {
387                         rc = -EIO;
388                         mlog_errno(rc);
389                         break;
390                 }
391
392                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
393                                            bucket->bu_bhs[i]))
394                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
395                                                       bucket->bu_bhs[i]);
396         }
397
398         if (rc)
399                 ocfs2_xattr_bucket_relse(bucket);
400         return rc;
401 }
402
403 /* Read the xattr bucket at xb_blkno */
404 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
405                                    u64 xb_blkno)
406 {
407         int rc;
408
409         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
410                                bucket->bu_blocks, bucket->bu_bhs, 0,
411                                NULL);
412         if (!rc) {
413                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
414                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
415                                                  bucket->bu_bhs,
416                                                  bucket->bu_blocks,
417                                                  &bucket_xh(bucket)->xh_check);
418                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
419                 if (rc)
420                         mlog_errno(rc);
421         }
422
423         if (rc)
424                 ocfs2_xattr_bucket_relse(bucket);
425         return rc;
426 }
427
428 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
429                                              struct ocfs2_xattr_bucket *bucket,
430                                              int type)
431 {
432         int i, rc = 0;
433
434         for (i = 0; i < bucket->bu_blocks; i++) {
435                 rc = ocfs2_journal_access(handle,
436                                           INODE_CACHE(bucket->bu_inode),
437                                           bucket->bu_bhs[i], type);
438                 if (rc) {
439                         mlog_errno(rc);
440                         break;
441                 }
442         }
443
444         return rc;
445 }
446
447 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
448                                              struct ocfs2_xattr_bucket *bucket)
449 {
450         int i;
451
452         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
453         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
454                                    bucket->bu_bhs, bucket->bu_blocks,
455                                    &bucket_xh(bucket)->xh_check);
456         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
457
458         for (i = 0; i < bucket->bu_blocks; i++)
459                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
460 }
461
462 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
463                                          struct ocfs2_xattr_bucket *src)
464 {
465         int i;
466         int blocksize = src->bu_inode->i_sb->s_blocksize;
467
468         BUG_ON(dest->bu_blocks != src->bu_blocks);
469         BUG_ON(dest->bu_inode != src->bu_inode);
470
471         for (i = 0; i < src->bu_blocks; i++) {
472                 memcpy(bucket_block(dest, i), bucket_block(src, i),
473                        blocksize);
474         }
475 }
476
477 static int ocfs2_validate_xattr_block(struct super_block *sb,
478                                       struct buffer_head *bh)
479 {
480         int rc;
481         struct ocfs2_xattr_block *xb =
482                 (struct ocfs2_xattr_block *)bh->b_data;
483
484         mlog(0, "Validating xattr block %llu\n",
485              (unsigned long long)bh->b_blocknr);
486
487         BUG_ON(!buffer_uptodate(bh));
488
489         /*
490          * If the ecc fails, we return the error but otherwise
491          * leave the filesystem running.  We know any error is
492          * local to this block.
493          */
494         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
495         if (rc)
496                 return rc;
497
498         /*
499          * Errors after here are fatal
500          */
501
502         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
503                 ocfs2_error(sb,
504                             "Extended attribute block #%llu has bad "
505                             "signature %.*s",
506                             (unsigned long long)bh->b_blocknr, 7,
507                             xb->xb_signature);
508                 return -EINVAL;
509         }
510
511         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
512                 ocfs2_error(sb,
513                             "Extended attribute block #%llu has an "
514                             "invalid xb_blkno of %llu",
515                             (unsigned long long)bh->b_blocknr,
516                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
517                 return -EINVAL;
518         }
519
520         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
521                 ocfs2_error(sb,
522                             "Extended attribute block #%llu has an invalid "
523                             "xb_fs_generation of #%u",
524                             (unsigned long long)bh->b_blocknr,
525                             le32_to_cpu(xb->xb_fs_generation));
526                 return -EINVAL;
527         }
528
529         return 0;
530 }
531
532 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
533                                   struct buffer_head **bh)
534 {
535         int rc;
536         struct buffer_head *tmp = *bh;
537
538         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
539                               ocfs2_validate_xattr_block);
540
541         /* If ocfs2_read_block() got us a new bh, pass it up. */
542         if (!rc && !*bh)
543                 *bh = tmp;
544
545         return rc;
546 }
547
548 static inline const char *ocfs2_xattr_prefix(int name_index)
549 {
550         struct xattr_handler *handler = NULL;
551
552         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
553                 handler = ocfs2_xattr_handler_map[name_index];
554
555         return handler ? handler->prefix : NULL;
556 }
557
558 static u32 ocfs2_xattr_name_hash(struct inode *inode,
559                                  const char *name,
560                                  int name_len)
561 {
562         /* Get hash value of uuid from super block */
563         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
564         int i;
565
566         /* hash extended attribute name */
567         for (i = 0; i < name_len; i++) {
568                 hash = (hash << OCFS2_HASH_SHIFT) ^
569                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
570                        *name++;
571         }
572
573         return hash;
574 }
575
576 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
577 {
578         return namevalue_size(name_len, value_len) +
579                 sizeof(struct ocfs2_xattr_entry);
580 }
581
582 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
583 {
584         return namevalue_size_xi(xi) +
585                 sizeof(struct ocfs2_xattr_entry);
586 }
587
588 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
589 {
590         return namevalue_size_xe(xe) +
591                 sizeof(struct ocfs2_xattr_entry);
592 }
593
594 int ocfs2_calc_security_init(struct inode *dir,
595                              struct ocfs2_security_xattr_info *si,
596                              int *want_clusters,
597                              int *xattr_credits,
598                              struct ocfs2_alloc_context **xattr_ac)
599 {
600         int ret = 0;
601         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
602         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
603                                                  si->value_len);
604
605         /*
606          * The max space of security xattr taken inline is
607          * 256(name) + 80(value) + 16(entry) = 352 bytes,
608          * So reserve one metadata block for it is ok.
609          */
610         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
611             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
612                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
613                 if (ret) {
614                         mlog_errno(ret);
615                         return ret;
616                 }
617                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
618         }
619
620         /* reserve clusters for xattr value which will be set in B tree*/
621         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
622                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
623                                                             si->value_len);
624
625                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
626                                                            new_clusters);
627                 *want_clusters += new_clusters;
628         }
629         return ret;
630 }
631
632 int ocfs2_calc_xattr_init(struct inode *dir,
633                           struct buffer_head *dir_bh,
634                           int mode,
635                           struct ocfs2_security_xattr_info *si,
636                           int *want_clusters,
637                           int *xattr_credits,
638                           int *want_meta)
639 {
640         int ret = 0;
641         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
642         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
643
644         if (si->enable)
645                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
646                                                      si->value_len);
647
648         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
649                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
650                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
651                                         "", NULL, 0);
652                 if (acl_len > 0) {
653                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
654                         if (S_ISDIR(mode))
655                                 a_size <<= 1;
656                 } else if (acl_len != 0 && acl_len != -ENODATA) {
657                         mlog_errno(ret);
658                         return ret;
659                 }
660         }
661
662         if (!(s_size + a_size))
663                 return ret;
664
665         /*
666          * The max space of security xattr taken inline is
667          * 256(name) + 80(value) + 16(entry) = 352 bytes,
668          * The max space of acl xattr taken inline is
669          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
670          * when blocksize = 512, may reserve one more cluser for
671          * xattr bucket, otherwise reserve one metadata block
672          * for them is ok.
673          * If this is a new directory with inline data,
674          * we choose to reserve the entire inline area for
675          * directory contents and force an external xattr block.
676          */
677         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
678             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
679             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
680                 *want_meta = *want_meta + 1;
681                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
682         }
683
684         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
685             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
686                 *want_clusters += 1;
687                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
688         }
689
690         /*
691          * reserve credits and clusters for xattrs which has large value
692          * and have to be set outside
693          */
694         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
695                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
696                                                         si->value_len);
697                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
698                                                            new_clusters);
699                 *want_clusters += new_clusters;
700         }
701         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
702             acl_len > OCFS2_XATTR_INLINE_SIZE) {
703                 /* for directory, it has DEFAULT and ACCESS two types of acls */
704                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
705                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
706                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
707                                                            new_clusters);
708                 *want_clusters += new_clusters;
709         }
710
711         return ret;
712 }
713
714 static int ocfs2_xattr_extend_allocation(struct inode *inode,
715                                          u32 clusters_to_add,
716                                          struct ocfs2_xattr_value_buf *vb,
717                                          struct ocfs2_xattr_set_ctxt *ctxt)
718 {
719         int status = 0;
720         handle_t *handle = ctxt->handle;
721         enum ocfs2_alloc_restarted why;
722         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
723         struct ocfs2_extent_tree et;
724
725         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
726
727         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
728
729         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
730                               OCFS2_JOURNAL_ACCESS_WRITE);
731         if (status < 0) {
732                 mlog_errno(status);
733                 goto leave;
734         }
735
736         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
737         status = ocfs2_add_clusters_in_btree(handle,
738                                              &et,
739                                              &logical_start,
740                                              clusters_to_add,
741                                              0,
742                                              ctxt->data_ac,
743                                              ctxt->meta_ac,
744                                              &why);
745         if (status < 0) {
746                 mlog_errno(status);
747                 goto leave;
748         }
749
750         status = ocfs2_journal_dirty(handle, vb->vb_bh);
751         if (status < 0) {
752                 mlog_errno(status);
753                 goto leave;
754         }
755
756         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
757
758         /*
759          * We should have already allocated enough space before the transaction,
760          * so no need to restart.
761          */
762         BUG_ON(why != RESTART_NONE || clusters_to_add);
763
764 leave:
765
766         return status;
767 }
768
769 static int __ocfs2_remove_xattr_range(struct inode *inode,
770                                       struct ocfs2_xattr_value_buf *vb,
771                                       u32 cpos, u32 phys_cpos, u32 len,
772                                       unsigned int ext_flags,
773                                       struct ocfs2_xattr_set_ctxt *ctxt)
774 {
775         int ret;
776         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
777         handle_t *handle = ctxt->handle;
778         struct ocfs2_extent_tree et;
779
780         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
781
782         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
783                             OCFS2_JOURNAL_ACCESS_WRITE);
784         if (ret) {
785                 mlog_errno(ret);
786                 goto out;
787         }
788
789         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
790                                   &ctxt->dealloc);
791         if (ret) {
792                 mlog_errno(ret);
793                 goto out;
794         }
795
796         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
797
798         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
799         if (ret) {
800                 mlog_errno(ret);
801                 goto out;
802         }
803
804         if (ext_flags & OCFS2_EXT_REFCOUNTED)
805                 ret = ocfs2_decrease_refcount(inode, handle,
806                                         ocfs2_blocks_to_clusters(inode->i_sb,
807                                                                  phys_blkno),
808                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
809         else
810                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
811                                                   phys_blkno, len);
812         if (ret)
813                 mlog_errno(ret);
814
815 out:
816         return ret;
817 }
818
819 static int ocfs2_xattr_shrink_size(struct inode *inode,
820                                    u32 old_clusters,
821                                    u32 new_clusters,
822                                    struct ocfs2_xattr_value_buf *vb,
823                                    struct ocfs2_xattr_set_ctxt *ctxt)
824 {
825         int ret = 0;
826         unsigned int ext_flags;
827         u32 trunc_len, cpos, phys_cpos, alloc_size;
828         u64 block;
829
830         if (old_clusters <= new_clusters)
831                 return 0;
832
833         cpos = new_clusters;
834         trunc_len = old_clusters - new_clusters;
835         while (trunc_len) {
836                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
837                                                &alloc_size,
838                                                &vb->vb_xv->xr_list, &ext_flags);
839                 if (ret) {
840                         mlog_errno(ret);
841                         goto out;
842                 }
843
844                 if (alloc_size > trunc_len)
845                         alloc_size = trunc_len;
846
847                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
848                                                  phys_cpos, alloc_size,
849                                                  ext_flags, ctxt);
850                 if (ret) {
851                         mlog_errno(ret);
852                         goto out;
853                 }
854
855                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
856                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
857                                                        block, alloc_size);
858                 cpos += alloc_size;
859                 trunc_len -= alloc_size;
860         }
861
862 out:
863         return ret;
864 }
865
866 static int ocfs2_xattr_value_truncate(struct inode *inode,
867                                       struct ocfs2_xattr_value_buf *vb,
868                                       int len,
869                                       struct ocfs2_xattr_set_ctxt *ctxt)
870 {
871         int ret;
872         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
873         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
874
875         if (new_clusters == old_clusters)
876                 return 0;
877
878         if (new_clusters > old_clusters)
879                 ret = ocfs2_xattr_extend_allocation(inode,
880                                                     new_clusters - old_clusters,
881                                                     vb, ctxt);
882         else
883                 ret = ocfs2_xattr_shrink_size(inode,
884                                               old_clusters, new_clusters,
885                                               vb, ctxt);
886
887         return ret;
888 }
889
890 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
891                                   size_t *result, const char *prefix,
892                                   const char *name, int name_len)
893 {
894         char *p = buffer + *result;
895         int prefix_len = strlen(prefix);
896         int total_len = prefix_len + name_len + 1;
897
898         *result += total_len;
899
900         /* we are just looking for how big our buffer needs to be */
901         if (!size)
902                 return 0;
903
904         if (*result > size)
905                 return -ERANGE;
906
907         memcpy(p, prefix, prefix_len);
908         memcpy(p + prefix_len, name, name_len);
909         p[prefix_len + name_len] = '\0';
910
911         return 0;
912 }
913
914 static int ocfs2_xattr_list_entries(struct inode *inode,
915                                     struct ocfs2_xattr_header *header,
916                                     char *buffer, size_t buffer_size)
917 {
918         size_t result = 0;
919         int i, type, ret;
920         const char *prefix, *name;
921
922         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
923                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
924                 type = ocfs2_xattr_get_type(entry);
925                 prefix = ocfs2_xattr_prefix(type);
926
927                 if (prefix) {
928                         name = (const char *)header +
929                                 le16_to_cpu(entry->xe_name_offset);
930
931                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
932                                                      &result, prefix, name,
933                                                      entry->xe_name_len);
934                         if (ret)
935                                 return ret;
936                 }
937         }
938
939         return result;
940 }
941
942 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
943                                          struct ocfs2_dinode *di)
944 {
945         struct ocfs2_xattr_header *xh;
946         int i;
947
948         xh = (struct ocfs2_xattr_header *)
949                  ((void *)di + inode->i_sb->s_blocksize -
950                  le16_to_cpu(di->i_xattr_inline_size));
951
952         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
953                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
954                         return 1;
955
956         return 0;
957 }
958
959 static int ocfs2_xattr_ibody_list(struct inode *inode,
960                                   struct ocfs2_dinode *di,
961                                   char *buffer,
962                                   size_t buffer_size)
963 {
964         struct ocfs2_xattr_header *header = NULL;
965         struct ocfs2_inode_info *oi = OCFS2_I(inode);
966         int ret = 0;
967
968         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
969                 return ret;
970
971         header = (struct ocfs2_xattr_header *)
972                  ((void *)di + inode->i_sb->s_blocksize -
973                  le16_to_cpu(di->i_xattr_inline_size));
974
975         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
976
977         return ret;
978 }
979
980 static int ocfs2_xattr_block_list(struct inode *inode,
981                                   struct ocfs2_dinode *di,
982                                   char *buffer,
983                                   size_t buffer_size)
984 {
985         struct buffer_head *blk_bh = NULL;
986         struct ocfs2_xattr_block *xb;
987         int ret = 0;
988
989         if (!di->i_xattr_loc)
990                 return ret;
991
992         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
993                                      &blk_bh);
994         if (ret < 0) {
995                 mlog_errno(ret);
996                 return ret;
997         }
998
999         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1000         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1001                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1002                 ret = ocfs2_xattr_list_entries(inode, header,
1003                                                buffer, buffer_size);
1004         } else
1005                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1006                                                    buffer, buffer_size);
1007
1008         brelse(blk_bh);
1009
1010         return ret;
1011 }
1012
1013 ssize_t ocfs2_listxattr(struct dentry *dentry,
1014                         char *buffer,
1015                         size_t size)
1016 {
1017         int ret = 0, i_ret = 0, b_ret = 0;
1018         struct buffer_head *di_bh = NULL;
1019         struct ocfs2_dinode *di = NULL;
1020         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1021
1022         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1023                 return -EOPNOTSUPP;
1024
1025         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1026                 return ret;
1027
1028         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1029         if (ret < 0) {
1030                 mlog_errno(ret);
1031                 return ret;
1032         }
1033
1034         di = (struct ocfs2_dinode *)di_bh->b_data;
1035
1036         down_read(&oi->ip_xattr_sem);
1037         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1038         if (i_ret < 0)
1039                 b_ret = 0;
1040         else {
1041                 if (buffer) {
1042                         buffer += i_ret;
1043                         size -= i_ret;
1044                 }
1045                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1046                                                buffer, size);
1047                 if (b_ret < 0)
1048                         i_ret = 0;
1049         }
1050         up_read(&oi->ip_xattr_sem);
1051         ocfs2_inode_unlock(dentry->d_inode, 0);
1052
1053         brelse(di_bh);
1054
1055         return i_ret + b_ret;
1056 }
1057
1058 static int ocfs2_xattr_find_entry(int name_index,
1059                                   const char *name,
1060                                   struct ocfs2_xattr_search *xs)
1061 {
1062         struct ocfs2_xattr_entry *entry;
1063         size_t name_len;
1064         int i, cmp = 1;
1065
1066         if (name == NULL)
1067                 return -EINVAL;
1068
1069         name_len = strlen(name);
1070         entry = xs->here;
1071         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1072                 cmp = name_index - ocfs2_xattr_get_type(entry);
1073                 if (!cmp)
1074                         cmp = name_len - entry->xe_name_len;
1075                 if (!cmp)
1076                         cmp = memcmp(name, (xs->base +
1077                                      le16_to_cpu(entry->xe_name_offset)),
1078                                      name_len);
1079                 if (cmp == 0)
1080                         break;
1081                 entry += 1;
1082         }
1083         xs->here = entry;
1084
1085         return cmp ? -ENODATA : 0;
1086 }
1087
1088 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1089                                          struct ocfs2_xattr_value_root *xv,
1090                                          void *buffer,
1091                                          size_t len)
1092 {
1093         u32 cpos, p_cluster, num_clusters, bpc, clusters;
1094         u64 blkno;
1095         int i, ret = 0;
1096         size_t cplen, blocksize;
1097         struct buffer_head *bh = NULL;
1098         struct ocfs2_extent_list *el;
1099
1100         el = &xv->xr_list;
1101         clusters = le32_to_cpu(xv->xr_clusters);
1102         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1103         blocksize = inode->i_sb->s_blocksize;
1104
1105         cpos = 0;
1106         while (cpos < clusters) {
1107                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1108                                                &num_clusters, el, NULL);
1109                 if (ret) {
1110                         mlog_errno(ret);
1111                         goto out;
1112                 }
1113
1114                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1115                 /* Copy ocfs2_xattr_value */
1116                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1117                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1118                                                &bh, NULL);
1119                         if (ret) {
1120                                 mlog_errno(ret);
1121                                 goto out;
1122                         }
1123
1124                         cplen = len >= blocksize ? blocksize : len;
1125                         memcpy(buffer, bh->b_data, cplen);
1126                         len -= cplen;
1127                         buffer += cplen;
1128
1129                         brelse(bh);
1130                         bh = NULL;
1131                         if (len == 0)
1132                                 break;
1133                 }
1134                 cpos += num_clusters;
1135         }
1136 out:
1137         return ret;
1138 }
1139
1140 static int ocfs2_xattr_ibody_get(struct inode *inode,
1141                                  int name_index,
1142                                  const char *name,
1143                                  void *buffer,
1144                                  size_t buffer_size,
1145                                  struct ocfs2_xattr_search *xs)
1146 {
1147         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1148         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1149         struct ocfs2_xattr_value_root *xv;
1150         size_t size;
1151         int ret = 0;
1152
1153         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1154                 return -ENODATA;
1155
1156         xs->end = (void *)di + inode->i_sb->s_blocksize;
1157         xs->header = (struct ocfs2_xattr_header *)
1158                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1159         xs->base = (void *)xs->header;
1160         xs->here = xs->header->xh_entries;
1161
1162         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1163         if (ret)
1164                 return ret;
1165         size = le64_to_cpu(xs->here->xe_value_size);
1166         if (buffer) {
1167                 if (size > buffer_size)
1168                         return -ERANGE;
1169                 if (ocfs2_xattr_is_local(xs->here)) {
1170                         memcpy(buffer, (void *)xs->base +
1171                                le16_to_cpu(xs->here->xe_name_offset) +
1172                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1173                 } else {
1174                         xv = (struct ocfs2_xattr_value_root *)
1175                                 (xs->base + le16_to_cpu(
1176                                  xs->here->xe_name_offset) +
1177                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1178                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1179                                                             buffer, size);
1180                         if (ret < 0) {
1181                                 mlog_errno(ret);
1182                                 return ret;
1183                         }
1184                 }
1185         }
1186
1187         return size;
1188 }
1189
1190 static int ocfs2_xattr_block_get(struct inode *inode,
1191                                  int name_index,
1192                                  const char *name,
1193                                  void *buffer,
1194                                  size_t buffer_size,
1195                                  struct ocfs2_xattr_search *xs)
1196 {
1197         struct ocfs2_xattr_block *xb;
1198         struct ocfs2_xattr_value_root *xv;
1199         size_t size;
1200         int ret = -ENODATA, name_offset, name_len, i;
1201         int uninitialized_var(block_off);
1202
1203         xs->bucket = ocfs2_xattr_bucket_new(inode);
1204         if (!xs->bucket) {
1205                 ret = -ENOMEM;
1206                 mlog_errno(ret);
1207                 goto cleanup;
1208         }
1209
1210         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1211         if (ret) {
1212                 mlog_errno(ret);
1213                 goto cleanup;
1214         }
1215
1216         if (xs->not_found) {
1217                 ret = -ENODATA;
1218                 goto cleanup;
1219         }
1220
1221         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1222         size = le64_to_cpu(xs->here->xe_value_size);
1223         if (buffer) {
1224                 ret = -ERANGE;
1225                 if (size > buffer_size)
1226                         goto cleanup;
1227
1228                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1229                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1230                 i = xs->here - xs->header->xh_entries;
1231
1232                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1233                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1234                                                                 bucket_xh(xs->bucket),
1235                                                                 i,
1236                                                                 &block_off,
1237                                                                 &name_offset);
1238                         xs->base = bucket_block(xs->bucket, block_off);
1239                 }
1240                 if (ocfs2_xattr_is_local(xs->here)) {
1241                         memcpy(buffer, (void *)xs->base +
1242                                name_offset + name_len, size);
1243                 } else {
1244                         xv = (struct ocfs2_xattr_value_root *)
1245                                 (xs->base + name_offset + name_len);
1246                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1247                                                             buffer, size);
1248                         if (ret < 0) {
1249                                 mlog_errno(ret);
1250                                 goto cleanup;
1251                         }
1252                 }
1253         }
1254         ret = size;
1255 cleanup:
1256         ocfs2_xattr_bucket_free(xs->bucket);
1257
1258         brelse(xs->xattr_bh);
1259         xs->xattr_bh = NULL;
1260         return ret;
1261 }
1262
1263 int ocfs2_xattr_get_nolock(struct inode *inode,
1264                            struct buffer_head *di_bh,
1265                            int name_index,
1266                            const char *name,
1267                            void *buffer,
1268                            size_t buffer_size)
1269 {
1270         int ret;
1271         struct ocfs2_dinode *di = NULL;
1272         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1273         struct ocfs2_xattr_search xis = {
1274                 .not_found = -ENODATA,
1275         };
1276         struct ocfs2_xattr_search xbs = {
1277                 .not_found = -ENODATA,
1278         };
1279
1280         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1281                 return -EOPNOTSUPP;
1282
1283         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1284                 ret = -ENODATA;
1285
1286         xis.inode_bh = xbs.inode_bh = di_bh;
1287         di = (struct ocfs2_dinode *)di_bh->b_data;
1288
1289         down_read(&oi->ip_xattr_sem);
1290         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1291                                     buffer_size, &xis);
1292         if (ret == -ENODATA && di->i_xattr_loc)
1293                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1294                                             buffer_size, &xbs);
1295         up_read(&oi->ip_xattr_sem);
1296
1297         return ret;
1298 }
1299
1300 /* ocfs2_xattr_get()
1301  *
1302  * Copy an extended attribute into the buffer provided.
1303  * Buffer is NULL to compute the size of buffer required.
1304  */
1305 static int ocfs2_xattr_get(struct inode *inode,
1306                            int name_index,
1307                            const char *name,
1308                            void *buffer,
1309                            size_t buffer_size)
1310 {
1311         int ret;
1312         struct buffer_head *di_bh = NULL;
1313
1314         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1315         if (ret < 0) {
1316                 mlog_errno(ret);
1317                 return ret;
1318         }
1319         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1320                                      name, buffer, buffer_size);
1321
1322         ocfs2_inode_unlock(inode, 0);
1323
1324         brelse(di_bh);
1325
1326         return ret;
1327 }
1328
1329 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1330                                            handle_t *handle,
1331                                            struct ocfs2_xattr_value_buf *vb,
1332                                            const void *value,
1333                                            int value_len)
1334 {
1335         int ret = 0, i, cp_len;
1336         u16 blocksize = inode->i_sb->s_blocksize;
1337         u32 p_cluster, num_clusters;
1338         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1339         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1340         u64 blkno;
1341         struct buffer_head *bh = NULL;
1342         unsigned int ext_flags;
1343         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1344
1345         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1346
1347         while (cpos < clusters) {
1348                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1349                                                &num_clusters, &xv->xr_list,
1350                                                &ext_flags);
1351                 if (ret) {
1352                         mlog_errno(ret);
1353                         goto out;
1354                 }
1355
1356                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1357
1358                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1359
1360                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1361                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1362                                                &bh, NULL);
1363                         if (ret) {
1364                                 mlog_errno(ret);
1365                                 goto out;
1366                         }
1367
1368                         ret = ocfs2_journal_access(handle,
1369                                                    INODE_CACHE(inode),
1370                                                    bh,
1371                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1372                         if (ret < 0) {
1373                                 mlog_errno(ret);
1374                                 goto out;
1375                         }
1376
1377                         cp_len = value_len > blocksize ? blocksize : value_len;
1378                         memcpy(bh->b_data, value, cp_len);
1379                         value_len -= cp_len;
1380                         value += cp_len;
1381                         if (cp_len < blocksize)
1382                                 memset(bh->b_data + cp_len, 0,
1383                                        blocksize - cp_len);
1384
1385                         ret = ocfs2_journal_dirty(handle, bh);
1386                         if (ret < 0) {
1387                                 mlog_errno(ret);
1388                                 goto out;
1389                         }
1390                         brelse(bh);
1391                         bh = NULL;
1392
1393                         /*
1394                          * XXX: do we need to empty all the following
1395                          * blocks in this cluster?
1396                          */
1397                         if (!value_len)
1398                                 break;
1399                 }
1400                 cpos += num_clusters;
1401         }
1402 out:
1403         brelse(bh);
1404
1405         return ret;
1406 }
1407
1408 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1409                                        int num_entries)
1410 {
1411         int free_space;
1412
1413         if (!needed_space)
1414                 return 0;
1415
1416         free_space = free_start -
1417                 sizeof(struct ocfs2_xattr_header) -
1418                 (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1419                 OCFS2_XATTR_HEADER_GAP;
1420         if (free_space < 0)
1421                 return -EIO;
1422         if (free_space < needed_space)
1423                 return -ENOSPC;
1424
1425         return 0;
1426 }
1427
1428 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1429                                    int type)
1430 {
1431         return loc->xl_ops->xlo_journal_access(handle, loc, type);
1432 }
1433
1434 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1435 {
1436         loc->xl_ops->xlo_journal_dirty(handle, loc);
1437 }
1438
1439 /* Give a pointer into the storage for the given offset */
1440 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1441 {
1442         BUG_ON(offset >= loc->xl_size);
1443         return loc->xl_ops->xlo_offset_pointer(loc, offset);
1444 }
1445
1446 /*
1447  * Wipe the name+value pair and allow the storage to reclaim it.  This
1448  * must be followed by either removal of the entry or a call to
1449  * ocfs2_xa_add_namevalue().
1450  */
1451 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1452 {
1453         loc->xl_ops->xlo_wipe_namevalue(loc);
1454 }
1455
1456 /*
1457  * Find lowest offset to a name+value pair.  This is the start of our
1458  * downward-growing free space.
1459  */
1460 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1461 {
1462         return loc->xl_ops->xlo_get_free_start(loc);
1463 }
1464
1465 /* Can we reuse loc->xl_entry for xi? */
1466 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1467                                     struct ocfs2_xattr_info *xi)
1468 {
1469         return loc->xl_ops->xlo_can_reuse(loc, xi);
1470 }
1471
1472 /* How much free space is needed to set the new value */
1473 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1474                                 struct ocfs2_xattr_info *xi)
1475 {
1476         return loc->xl_ops->xlo_check_space(loc, xi);
1477 }
1478
1479 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1480 {
1481         loc->xl_ops->xlo_add_entry(loc, name_hash);
1482         loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1483         /*
1484          * We can't leave the new entry's xe_name_offset at zero or
1485          * add_namevalue() will go nuts.  We set it to the size of our
1486          * storage so that it can never be less than any other entry.
1487          */
1488         loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1489 }
1490
1491 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1492                                    struct ocfs2_xattr_info *xi)
1493 {
1494         int size = namevalue_size_xi(xi);
1495         int nameval_offset;
1496         char *nameval_buf;
1497
1498         loc->xl_ops->xlo_add_namevalue(loc, size);
1499         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1500         loc->xl_entry->xe_name_len = xi->xi_name_len;
1501         ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1502         ocfs2_xattr_set_local(loc->xl_entry,
1503                               xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1504
1505         nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1506         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1507         memset(nameval_buf, 0, size);
1508         memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1509 }
1510
1511 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1512                                     struct ocfs2_xattr_value_buf *vb)
1513 {
1514         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1515         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1516
1517         /* Value bufs are for value trees */
1518         BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1519         BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1520                (name_size + OCFS2_XATTR_ROOT_SIZE));
1521
1522         loc->xl_ops->xlo_fill_value_buf(loc, vb);
1523         vb->vb_xv =
1524                 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1525                                                         nameval_offset +
1526                                                         name_size);
1527 }
1528
1529 static int ocfs2_xa_block_journal_access(handle_t *handle,
1530                                          struct ocfs2_xa_loc *loc, int type)
1531 {
1532         struct buffer_head *bh = loc->xl_storage;
1533         ocfs2_journal_access_func access;
1534
1535         if (loc->xl_size == (bh->b_size -
1536                              offsetof(struct ocfs2_xattr_block,
1537                                       xb_attrs.xb_header)))
1538                 access = ocfs2_journal_access_xb;
1539         else
1540                 access = ocfs2_journal_access_di;
1541         return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1542 }
1543
1544 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1545                                          struct ocfs2_xa_loc *loc)
1546 {
1547         struct buffer_head *bh = loc->xl_storage;
1548
1549         ocfs2_journal_dirty(handle, bh);
1550 }
1551
1552 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1553                                            int offset)
1554 {
1555         return (char *)loc->xl_header + offset;
1556 }
1557
1558 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1559                                     struct ocfs2_xattr_info *xi)
1560 {
1561         /*
1562          * Block storage is strict.  If the sizes aren't exact, we will
1563          * remove the old one and reinsert the new.
1564          */
1565         return namevalue_size_xe(loc->xl_entry) ==
1566                 namevalue_size_xi(xi);
1567 }
1568
1569 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1570 {
1571         struct ocfs2_xattr_header *xh = loc->xl_header;
1572         int i, count = le16_to_cpu(xh->xh_count);
1573         int offset, free_start = loc->xl_size;
1574
1575         for (i = 0; i < count; i++) {
1576                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1577                 if (offset < free_start)
1578                         free_start = offset;
1579         }
1580
1581         return free_start;
1582 }
1583
1584 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1585                                       struct ocfs2_xattr_info *xi)
1586 {
1587         int count = le16_to_cpu(loc->xl_header->xh_count);
1588         int free_start = ocfs2_xa_get_free_start(loc);
1589         int needed_space = ocfs2_xi_entry_usage(xi);
1590
1591         /*
1592          * Block storage will reclaim the original entry before inserting
1593          * the new value, so we only need the difference.  If the new
1594          * entry is smaller than the old one, we don't need anything.
1595          */
1596         if (loc->xl_entry) {
1597                 /* Don't need space if we're reusing! */
1598                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1599                         needed_space = 0;
1600                 else
1601                         needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1602         }
1603         if (needed_space < 0)
1604                 needed_space = 0;
1605         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1606 }
1607
1608 /*
1609  * Block storage for xattrs keeps the name+value pairs compacted.  When
1610  * we remove one, we have to shift any that preceded it towards the end.
1611  */
1612 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1613 {
1614         int i, offset;
1615         int namevalue_offset, first_namevalue_offset, namevalue_size;
1616         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1617         struct ocfs2_xattr_header *xh = loc->xl_header;
1618         int count = le16_to_cpu(xh->xh_count);
1619
1620         namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1621         namevalue_size = namevalue_size_xe(entry);
1622         first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1623
1624         /* Shift the name+value pairs */
1625         memmove((char *)xh + first_namevalue_offset + namevalue_size,
1626                 (char *)xh + first_namevalue_offset,
1627                 namevalue_offset - first_namevalue_offset);
1628         memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1629
1630         /* Now tell xh->xh_entries about it */
1631         for (i = 0; i < count; i++) {
1632                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1633                 if (offset < namevalue_offset)
1634                         le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1635                                      namevalue_size);
1636         }
1637
1638         /*
1639          * Note that we don't update xh_free_start or xh_name_value_len
1640          * because they're not used in block-stored xattrs.
1641          */
1642 }
1643
1644 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1645 {
1646         int count = le16_to_cpu(loc->xl_header->xh_count);
1647         loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1648         le16_add_cpu(&loc->xl_header->xh_count, 1);
1649         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1650 }
1651
1652 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1653 {
1654         int free_start = ocfs2_xa_get_free_start(loc);
1655
1656         loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1657 }
1658
1659 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1660                                           struct ocfs2_xattr_value_buf *vb)
1661 {
1662         struct buffer_head *bh = loc->xl_storage;
1663
1664         if (loc->xl_size == (bh->b_size -
1665                              offsetof(struct ocfs2_xattr_block,
1666                                       xb_attrs.xb_header)))
1667                 vb->vb_access = ocfs2_journal_access_xb;
1668         else
1669                 vb->vb_access = ocfs2_journal_access_di;
1670         vb->vb_bh = bh;
1671 }
1672
1673 /*
1674  * Operations for xattrs stored in blocks.  This includes inline inode
1675  * storage and unindexed ocfs2_xattr_blocks.
1676  */
1677 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1678         .xlo_journal_access     = ocfs2_xa_block_journal_access,
1679         .xlo_journal_dirty      = ocfs2_xa_block_journal_dirty,
1680         .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1681         .xlo_check_space        = ocfs2_xa_block_check_space,
1682         .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1683         .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1684         .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1685         .xlo_add_entry          = ocfs2_xa_block_add_entry,
1686         .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1687         .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1688 };
1689
1690 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1691                                           struct ocfs2_xa_loc *loc, int type)
1692 {
1693         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1694
1695         return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1696 }
1697
1698 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1699                                           struct ocfs2_xa_loc *loc)
1700 {
1701         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1702
1703         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1704 }
1705
1706 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1707                                             int offset)
1708 {
1709         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1710         int block, block_offset;
1711
1712         /* The header is at the front of the bucket */
1713         block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1714         block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1715
1716         return bucket_block(bucket, block) + block_offset;
1717 }
1718
1719 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1720                                      struct ocfs2_xattr_info *xi)
1721 {
1722         return namevalue_size_xe(loc->xl_entry) >=
1723                 namevalue_size_xi(xi);
1724 }
1725
1726 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1727 {
1728         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1729         return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1730 }
1731
1732 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1733                                          int free_start, int size)
1734 {
1735         /*
1736          * We need to make sure that the name+value pair fits within
1737          * one block.
1738          */
1739         if (((free_start - size) >> sb->s_blocksize_bits) !=
1740             ((free_start - 1) >> sb->s_blocksize_bits))
1741                 free_start -= free_start % sb->s_blocksize;
1742
1743         return free_start;
1744 }
1745
1746 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1747                                        struct ocfs2_xattr_info *xi)
1748 {
1749         int rc;
1750         int count = le16_to_cpu(loc->xl_header->xh_count);
1751         int free_start = ocfs2_xa_get_free_start(loc);
1752         int needed_space = ocfs2_xi_entry_usage(xi);
1753         int size = namevalue_size_xi(xi);
1754         struct super_block *sb = loc->xl_inode->i_sb;
1755
1756         /*
1757          * Bucket storage does not reclaim name+value pairs it cannot
1758          * reuse.  They live as holes until the bucket fills, and then
1759          * the bucket is defragmented.  However, the bucket can reclaim
1760          * the ocfs2_xattr_entry.
1761          */
1762         if (loc->xl_entry) {
1763                 /* Don't need space if we're reusing! */
1764                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1765                         needed_space = 0;
1766                 else
1767                         needed_space -= sizeof(struct ocfs2_xattr_entry);
1768         }
1769         BUG_ON(needed_space < 0);
1770
1771         if (free_start < size) {
1772                 if (needed_space)
1773                         return -ENOSPC;
1774         } else {
1775                 /*
1776                  * First we check if it would fit in the first place.
1777                  * Below, we align the free start to a block.  This may
1778                  * slide us below the minimum gap.  By checking unaligned
1779                  * first, we avoid that error.
1780                  */
1781                 rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1782                                                  count);
1783                 if (rc)
1784                         return rc;
1785                 free_start = ocfs2_bucket_align_free_start(sb, free_start,
1786                                                            size);
1787         }
1788         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1789 }
1790
1791 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1792 {
1793         le16_add_cpu(&loc->xl_header->xh_name_value_len,
1794                      -namevalue_size_xe(loc->xl_entry));
1795 }
1796
1797 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1798 {
1799         struct ocfs2_xattr_header *xh = loc->xl_header;
1800         int count = le16_to_cpu(xh->xh_count);
1801         int low = 0, high = count - 1, tmp;
1802         struct ocfs2_xattr_entry *tmp_xe;
1803
1804         /*
1805          * We keep buckets sorted by name_hash, so we need to find
1806          * our insert place.
1807          */
1808         while (low <= high && count) {
1809                 tmp = (low + high) / 2;
1810                 tmp_xe = &xh->xh_entries[tmp];
1811
1812                 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1813                         low = tmp + 1;
1814                 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1815                         high = tmp - 1;
1816                 else {
1817                         low = tmp;
1818                         break;
1819                 }
1820         }
1821
1822         if (low != count)
1823                 memmove(&xh->xh_entries[low + 1],
1824                         &xh->xh_entries[low],
1825                         ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1826
1827         le16_add_cpu(&xh->xh_count, 1);
1828         loc->xl_entry = &xh->xh_entries[low];
1829         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1830 }
1831
1832 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1833 {
1834         int free_start = ocfs2_xa_get_free_start(loc);
1835         struct ocfs2_xattr_header *xh = loc->xl_header;
1836         struct super_block *sb = loc->xl_inode->i_sb;
1837         int nameval_offset;
1838
1839         free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1840         nameval_offset = free_start - size;
1841         loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1842         xh->xh_free_start = cpu_to_le16(nameval_offset);
1843         le16_add_cpu(&xh->xh_name_value_len, size);
1844
1845 }
1846
1847 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1848                                            struct ocfs2_xattr_value_buf *vb)
1849 {
1850         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1851         struct super_block *sb = loc->xl_inode->i_sb;
1852         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1853         int size = namevalue_size_xe(loc->xl_entry);
1854         int block_offset = nameval_offset >> sb->s_blocksize_bits;
1855
1856         /* Values are not allowed to straddle block boundaries */
1857         BUG_ON(block_offset !=
1858                ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1859         /* We expect the bucket to be filled in */
1860         BUG_ON(!bucket->bu_bhs[block_offset]);
1861
1862         vb->vb_access = ocfs2_journal_access;
1863         vb->vb_bh = bucket->bu_bhs[block_offset];
1864 }
1865
1866 /* Operations for xattrs stored in buckets. */
1867 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1868         .xlo_journal_access     = ocfs2_xa_bucket_journal_access,
1869         .xlo_journal_dirty      = ocfs2_xa_bucket_journal_dirty,
1870         .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1871         .xlo_check_space        = ocfs2_xa_bucket_check_space,
1872         .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1873         .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1874         .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1875         .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1876         .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1877         .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1878 };
1879
1880 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1881                                    struct ocfs2_xattr_set_ctxt *ctxt)
1882 {
1883         int trunc_rc, access_rc;
1884         struct ocfs2_xattr_value_buf vb;
1885
1886         ocfs2_xa_fill_value_buf(loc, &vb);
1887         trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1888                                               ctxt);
1889
1890         /*
1891          * The caller of ocfs2_xa_value_truncate() has already called
1892          * ocfs2_xa_journal_access on the loc.  However, The truncate code
1893          * calls ocfs2_extend_trans().  This may commit the previous
1894          * transaction and open a new one.  If this is a bucket, truncate
1895          * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1896          * the caller is expecting to dirty the entire bucket.  So we must
1897          * reset the journal work.  We do this even if truncate has failed,
1898          * as it could have failed after committing the extend.
1899          */
1900         access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1901                                             OCFS2_JOURNAL_ACCESS_WRITE);
1902
1903         /* Errors in truncate take precedence */
1904         return trunc_rc ? trunc_rc : access_rc;
1905 }
1906
1907 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1908 {
1909         int index, count;
1910         struct ocfs2_xattr_header *xh = loc->xl_header;
1911         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1912
1913         ocfs2_xa_wipe_namevalue(loc);
1914         loc->xl_entry = NULL;
1915
1916         le16_add_cpu(&xh->xh_count, -1);
1917         count = le16_to_cpu(xh->xh_count);
1918
1919         /*
1920          * Only zero out the entry if there are more remaining.  This is
1921          * important for an empty bucket, as it keeps track of the
1922          * bucket's hash value.  It doesn't hurt empty block storage.
1923          */
1924         if (count) {
1925                 index = ((char *)entry - (char *)&xh->xh_entries) /
1926                         sizeof(struct ocfs2_xattr_entry);
1927                 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1928                         (count - index) * sizeof(struct ocfs2_xattr_entry));
1929                 memset(&xh->xh_entries[count], 0,
1930                        sizeof(struct ocfs2_xattr_entry));
1931         }
1932 }
1933
1934 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
1935                            struct ocfs2_xattr_set_ctxt *ctxt)
1936 {
1937         int rc = 0;
1938
1939         if (!ocfs2_xattr_is_local(loc->xl_entry)) {
1940                 rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
1941                 if (rc) {
1942                         mlog_errno(rc);
1943                         goto out;
1944                 }
1945         }
1946
1947         ocfs2_xa_remove_entry(loc);
1948
1949 out:
1950         return rc;
1951 }
1952
1953 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
1954 {
1955         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1956         char *nameval_buf;
1957
1958         nameval_buf = ocfs2_xa_offset_pointer(loc,
1959                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
1960         memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
1961 }
1962
1963 /*
1964  * Take an existing entry and make it ready for the new value.  This
1965  * won't allocate space, but it may free space.  It should be ready for
1966  * ocfs2_xa_prepare_entry() to finish the work.
1967  */
1968 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
1969                                 struct ocfs2_xattr_info *xi,
1970                                 struct ocfs2_xattr_set_ctxt *ctxt)
1971 {
1972         int rc = 0;
1973         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
1974         char *nameval_buf;
1975         int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
1976         int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
1977
1978         BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
1979                name_size);
1980
1981         nameval_buf = ocfs2_xa_offset_pointer(loc,
1982                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
1983         if (xe_local) {
1984                 memset(nameval_buf + name_size, 0,
1985                        namevalue_size_xe(loc->xl_entry) - name_size);
1986                 if (!xi_local)
1987                         ocfs2_xa_install_value_root(loc);
1988         } else {
1989                 if (xi_local) {
1990                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
1991                         if (rc < 0) {
1992                                 mlog_errno(rc);
1993                                 goto out;
1994                         }
1995                         memset(nameval_buf + name_size, 0,
1996                                namevalue_size_xe(loc->xl_entry) -
1997                                name_size);
1998                 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
1999                            xi->xi_value_len) {
2000                         rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2001                                                      ctxt);
2002                         if (rc < 0) {
2003                                 mlog_errno(rc);
2004                                 goto out;
2005                         }
2006                 }
2007         }
2008
2009         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2010         ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2011
2012 out:
2013         return rc;
2014 }
2015
2016 /*
2017  * Prepares loc->xl_entry to receive the new xattr.  This includes
2018  * properly setting up the name+value pair region.  If loc->xl_entry
2019  * already exists, it will take care of modifying it appropriately.
2020  *
2021  * Note that this modifies the data.  You did journal_access already,
2022  * right?
2023  */
2024 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2025                                   struct ocfs2_xattr_info *xi,
2026                                   u32 name_hash,
2027                                   struct ocfs2_xattr_set_ctxt *ctxt)
2028 {
2029         int rc = 0;
2030
2031         rc = ocfs2_xa_check_space(loc, xi);
2032         if (rc)
2033                 goto out;
2034
2035         if (loc->xl_entry) {
2036                 if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2037                         rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2038                         if (rc)
2039                                 goto out;
2040                         goto alloc_value;
2041                 }
2042
2043                 if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2044                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2045                         if (rc) {
2046                                 mlog_errno(rc);
2047                                 goto out;
2048                         }
2049                 }
2050                 ocfs2_xa_wipe_namevalue(loc);
2051         } else
2052                 ocfs2_xa_add_entry(loc, name_hash);
2053
2054         /*
2055          * If we get here, we have a blank entry.  Fill it.  We grow our
2056          * name+value pair back from the end.
2057          */
2058         ocfs2_xa_add_namevalue(loc, xi);
2059         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2060                 ocfs2_xa_install_value_root(loc);
2061
2062 alloc_value:
2063         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2064                 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2065                 if (rc < 0)
2066                         mlog_errno(rc);
2067         }
2068
2069 out:
2070         return rc;
2071 }
2072
2073 /*
2074  * Store the value portion of the name+value pair.  This will skip
2075  * values that are stored externally.  Their tree roots were set up
2076  * by ocfs2_xa_prepare_entry().
2077  */
2078 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2079                                 struct ocfs2_xattr_info *xi,
2080                                 struct ocfs2_xattr_set_ctxt *ctxt)
2081 {
2082         int rc = 0;
2083         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2084         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2085         char *nameval_buf;
2086         struct ocfs2_xattr_value_buf vb;
2087
2088         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2089         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2090                 ocfs2_xa_fill_value_buf(loc, &vb);
2091                 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2092                                                      ctxt->handle, &vb,
2093                                                      xi->xi_value,
2094                                                      xi->xi_value_len);
2095         } else
2096                 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2097
2098         return rc;
2099 }
2100
2101 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2102                         struct ocfs2_xattr_info *xi,
2103                         struct ocfs2_xattr_set_ctxt *ctxt)
2104 {
2105         int ret;
2106         u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2107                                               xi->xi_name_len);
2108
2109         ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2110                                       OCFS2_JOURNAL_ACCESS_WRITE);
2111         if (ret) {
2112                 mlog_errno(ret);
2113                 goto out;
2114         }
2115
2116         /* Don't worry, we are never called with !xi_value and !xl_entry */
2117         if (!xi->xi_value) {
2118                 ret = ocfs2_xa_remove(loc, ctxt);
2119                 goto out;
2120         }
2121
2122         ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2123         if (ret) {
2124                 if (ret != -ENOSPC)
2125                         mlog_errno(ret);
2126                 goto out;
2127         }
2128
2129         ret = ocfs2_xa_store_value(loc, xi, ctxt);
2130         if (ret) {
2131                 mlog_errno(ret);
2132                 goto out;
2133         }
2134
2135         ocfs2_xa_journal_dirty(ctxt->handle, loc);
2136
2137 out:
2138         return ret;
2139 }
2140
2141 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2142                                      struct inode *inode,
2143                                      struct buffer_head *bh,
2144                                      struct ocfs2_xattr_entry *entry)
2145 {
2146         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2147
2148         loc->xl_inode = inode;
2149         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2150         loc->xl_storage = bh;
2151         loc->xl_entry = entry;
2152
2153         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2154                 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2155         else {
2156                 BUG_ON(entry);
2157                 loc->xl_size = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2158         }
2159         loc->xl_header =
2160                 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2161                                               loc->xl_size);
2162 }
2163
2164 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2165                                           struct inode *inode,
2166                                           struct buffer_head *bh,
2167                                           struct ocfs2_xattr_entry *entry)
2168 {
2169         struct ocfs2_xattr_block *xb =
2170                 (struct ocfs2_xattr_block *)bh->b_data;
2171
2172         BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2173
2174         loc->xl_inode = inode;
2175         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2176         loc->xl_storage = bh;
2177         loc->xl_header = &(xb->xb_attrs.xb_header);
2178         loc->xl_entry = entry;
2179         loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2180                                              xb_attrs.xb_header);
2181 }
2182
2183 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2184                                            struct ocfs2_xattr_bucket *bucket,
2185                                            struct ocfs2_xattr_entry *entry)
2186 {
2187         loc->xl_inode = bucket->bu_inode;
2188         loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2189         loc->xl_storage = bucket;
2190         loc->xl_header = bucket_xh(bucket);
2191         loc->xl_entry = entry;
2192         loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2193 }
2194
2195
2196 /*
2197  * ocfs2_xattr_set_entry()
2198  *
2199  * Set extended attribute entry into inode or block.
2200  *
2201  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
2202  * We first insert tree root(ocfs2_xattr_value_root) like a normal value,
2203  * then set value in B tree with set_value_outside().
2204  */
2205 static int ocfs2_xattr_set_entry(struct inode *inode,
2206                                  struct ocfs2_xattr_info *xi,
2207                                  struct ocfs2_xattr_search *xs,
2208                                  struct ocfs2_xattr_set_ctxt *ctxt,
2209                                  int flag)
2210 {
2211         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2212         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2213         handle_t *handle = ctxt->handle;
2214         int ret;
2215         struct ocfs2_xa_loc loc;
2216
2217         if (!(flag & OCFS2_INLINE_XATTR_FL))
2218                 BUG_ON(xs->xattr_bh == xs->inode_bh);
2219         else
2220                 BUG_ON(xs->xattr_bh != xs->inode_bh);
2221
2222         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
2223                                       OCFS2_JOURNAL_ACCESS_WRITE);
2224         if (ret) {
2225                 mlog_errno(ret);
2226                 goto out;
2227         }
2228
2229         if (xs->xattr_bh == xs->inode_bh)
2230                 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2231                                          xs->not_found ? NULL : xs->here);
2232         else
2233                 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2234                                               xs->not_found ? NULL : xs->here);
2235
2236         ret = ocfs2_xa_set(&loc, xi, ctxt);
2237         if (ret) {
2238                 if (ret != -ENOSPC)
2239                         mlog_errno(ret);
2240                 goto out;
2241         }
2242         xs->here = loc.xl_entry;
2243
2244         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
2245             (flag & OCFS2_INLINE_XATTR_FL)) {
2246                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2247                 unsigned int xattrsize = osb->s_xattr_inline_size;
2248
2249                 /*
2250                  * Adjust extent record count or inline data size
2251                  * to reserve space for extended attribute.
2252                  */
2253                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2254                         struct ocfs2_inline_data *idata = &di->id2.i_data;
2255                         le16_add_cpu(&idata->id_count, -xattrsize);
2256                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2257                         struct ocfs2_extent_list *el = &di->id2.i_list;
2258                         le16_add_cpu(&el->l_count, -(xattrsize /
2259                                         sizeof(struct ocfs2_extent_rec)));
2260                 }
2261                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2262         }
2263         /* Update xattr flag */
2264         spin_lock(&oi->ip_lock);
2265         oi->ip_dyn_features |= flag;
2266         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2267         spin_unlock(&oi->ip_lock);
2268
2269         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
2270         if (ret < 0)
2271                 mlog_errno(ret);
2272
2273 out:
2274         return ret;
2275 }
2276
2277 /*
2278  * In xattr remove, if it is stored outside and refcounted, we may have
2279  * the chance to split the refcount tree. So need the allocators.
2280  */
2281 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2282                                         struct ocfs2_xattr_value_root *xv,
2283                                         struct ocfs2_caching_info *ref_ci,
2284                                         struct buffer_head *ref_root_bh,
2285                                         struct ocfs2_alloc_context **meta_ac,
2286                                         int *ref_credits)
2287 {
2288         int ret, meta_add = 0;
2289         u32 p_cluster, num_clusters;
2290         unsigned int ext_flags;
2291
2292         *ref_credits = 0;
2293         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2294                                        &num_clusters,
2295                                        &xv->xr_list,
2296                                        &ext_flags);
2297         if (ret) {
2298                 mlog_errno(ret);
2299                 goto out;
2300         }
2301
2302         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2303                 goto out;
2304
2305         ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2306                                                  ref_root_bh, xv,
2307                                                  &meta_add, ref_credits);
2308         if (ret) {
2309                 mlog_errno(ret);
2310                 goto out;
2311         }
2312
2313         ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2314                                                 meta_add, meta_ac);
2315         if (ret)
2316                 mlog_errno(ret);
2317
2318 out:
2319         return ret;
2320 }
2321
2322 static int ocfs2_remove_value_outside(struct inode*inode,
2323                                       struct ocfs2_xattr_value_buf *vb,
2324                                       struct ocfs2_xattr_header *header,
2325                                       struct ocfs2_caching_info *ref_ci,
2326                                       struct buffer_head *ref_root_bh)
2327 {
2328         int ret = 0, i, ref_credits;
2329         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2330         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2331         void *val;
2332
2333         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2334
2335         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2336                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2337
2338                 if (ocfs2_xattr_is_local(entry))
2339                         continue;
2340
2341                 val = (void *)header +
2342                         le16_to_cpu(entry->xe_name_offset);
2343                 vb->vb_xv = (struct ocfs2_xattr_value_root *)
2344                         (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2345
2346                 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2347                                                          ref_ci, ref_root_bh,
2348                                                          &ctxt.meta_ac,
2349                                                          &ref_credits);
2350
2351                 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2352                                         ocfs2_remove_extent_credits(osb->sb));
2353                 if (IS_ERR(ctxt.handle)) {
2354                         ret = PTR_ERR(ctxt.handle);
2355                         mlog_errno(ret);
2356                         break;
2357                 }
2358
2359                 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2360                 if (ret < 0) {
2361                         mlog_errno(ret);
2362                         break;
2363                 }
2364
2365                 ocfs2_commit_trans(osb, ctxt.handle);
2366                 if (ctxt.meta_ac) {
2367                         ocfs2_free_alloc_context(ctxt.meta_ac);
2368                         ctxt.meta_ac = NULL;
2369                 }
2370         }
2371
2372         if (ctxt.meta_ac)
2373                 ocfs2_free_alloc_context(ctxt.meta_ac);
2374         ocfs2_schedule_truncate_log_flush(osb, 1);
2375         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2376         return ret;
2377 }
2378
2379 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2380                                     struct buffer_head *di_bh,
2381                                     struct ocfs2_caching_info *ref_ci,
2382                                     struct buffer_head *ref_root_bh)
2383 {
2384
2385         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2386         struct ocfs2_xattr_header *header;
2387         int ret;
2388         struct ocfs2_xattr_value_buf vb = {
2389                 .vb_bh = di_bh,
2390                 .vb_access = ocfs2_journal_access_di,
2391         };
2392
2393         header = (struct ocfs2_xattr_header *)
2394                  ((void *)di + inode->i_sb->s_blocksize -
2395                  le16_to_cpu(di->i_xattr_inline_size));
2396
2397         ret = ocfs2_remove_value_outside(inode, &vb, header,
2398                                          ref_ci, ref_root_bh);
2399
2400         return ret;
2401 }
2402
2403 struct ocfs2_rm_xattr_bucket_para {
2404         struct ocfs2_caching_info *ref_ci;
2405         struct buffer_head *ref_root_bh;
2406 };
2407
2408 static int ocfs2_xattr_block_remove(struct inode *inode,
2409                                     struct buffer_head *blk_bh,
2410                                     struct ocfs2_caching_info *ref_ci,
2411                                     struct buffer_head *ref_root_bh)
2412 {
2413         struct ocfs2_xattr_block *xb;
2414         int ret = 0;
2415         struct ocfs2_xattr_value_buf vb = {
2416                 .vb_bh = blk_bh,
2417                 .vb_access = ocfs2_journal_access_xb,
2418         };
2419         struct ocfs2_rm_xattr_bucket_para args = {
2420                 .ref_ci = ref_ci,
2421                 .ref_root_bh = ref_root_bh,
2422         };
2423
2424         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2425         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2426                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2427                 ret = ocfs2_remove_value_outside(inode, &vb, header,
2428                                                  ref_ci, ref_root_bh);
2429         } else
2430                 ret = ocfs2_iterate_xattr_index_block(inode,
2431                                                 blk_bh,
2432                                                 ocfs2_rm_xattr_cluster,
2433                                                 &args);
2434
2435         return ret;
2436 }
2437
2438 static int ocfs2_xattr_free_block(struct inode *inode,
2439                                   u64 block,
2440                                   struct ocfs2_caching_info *ref_ci,
2441                                   struct buffer_head *ref_root_bh)
2442 {
2443         struct inode *xb_alloc_inode;
2444         struct buffer_head *xb_alloc_bh = NULL;
2445         struct buffer_head *blk_bh = NULL;
2446         struct ocfs2_xattr_block *xb;
2447         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2448         handle_t *handle;
2449         int ret = 0;
2450         u64 blk, bg_blkno;
2451         u16 bit;
2452
2453         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2454         if (ret < 0) {
2455                 mlog_errno(ret);
2456                 goto out;
2457         }
2458
2459         ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2460         if (ret < 0) {
2461                 mlog_errno(ret);
2462                 goto out;
2463         }
2464
2465         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2466         blk = le64_to_cpu(xb->xb_blkno);
2467         bit = le16_to_cpu(xb->xb_suballoc_bit);
2468         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2469
2470         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2471                                 EXTENT_ALLOC_SYSTEM_INODE,
2472                                 le16_to_cpu(xb->xb_suballoc_slot));
2473         if (!xb_alloc_inode) {
2474                 ret = -ENOMEM;
2475                 mlog_errno(ret);
2476                 goto out;
2477         }
2478         mutex_lock(&xb_alloc_inode->i_mutex);
2479
2480         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2481         if (ret < 0) {
2482                 mlog_errno(ret);
2483                 goto out_mutex;
2484         }
2485
2486         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2487         if (IS_ERR(handle)) {
2488                 ret = PTR_ERR(handle);
2489                 mlog_errno(ret);
2490                 goto out_unlock;
2491         }
2492
2493         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2494                                        bit, bg_blkno, 1);
2495         if (ret < 0)
2496                 mlog_errno(ret);
2497
2498         ocfs2_commit_trans(osb, handle);
2499 out_unlock:
2500         ocfs2_inode_unlock(xb_alloc_inode, 1);
2501         brelse(xb_alloc_bh);
2502 out_mutex:
2503         mutex_unlock(&xb_alloc_inode->i_mutex);
2504         iput(xb_alloc_inode);
2505 out:
2506         brelse(blk_bh);
2507         return ret;
2508 }
2509
2510 /*
2511  * ocfs2_xattr_remove()
2512  *
2513  * Free extended attribute resources associated with this inode.
2514  */
2515 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2516 {
2517         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2518         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2519         struct ocfs2_refcount_tree *ref_tree = NULL;
2520         struct buffer_head *ref_root_bh = NULL;
2521         struct ocfs2_caching_info *ref_ci = NULL;
2522         handle_t *handle;
2523         int ret;
2524
2525         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2526                 return 0;
2527
2528         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2529                 return 0;
2530
2531         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2532                 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2533                                                le64_to_cpu(di->i_refcount_loc),
2534                                                1, &ref_tree, &ref_root_bh);
2535                 if (ret) {
2536                         mlog_errno(ret);
2537                         goto out;
2538                 }
2539                 ref_ci = &ref_tree->rf_ci;
2540
2541         }
2542
2543         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2544                 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2545                                                ref_ci, ref_root_bh);
2546                 if (ret < 0) {
2547                         mlog_errno(ret);
2548                         goto out;
2549                 }
2550         }
2551
2552         if (di->i_xattr_loc) {
2553                 ret = ocfs2_xattr_free_block(inode,
2554                                              le64_to_cpu(di->i_xattr_loc),
2555                                              ref_ci, ref_root_bh);
2556                 if (ret < 0) {
2557                         mlog_errno(ret);
2558                         goto out;
2559                 }
2560         }
2561
2562         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2563                                    OCFS2_INODE_UPDATE_CREDITS);
2564         if (IS_ERR(handle)) {
2565                 ret = PTR_ERR(handle);
2566                 mlog_errno(ret);
2567                 goto out;
2568         }
2569         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2570                                       OCFS2_JOURNAL_ACCESS_WRITE);
2571         if (ret) {
2572                 mlog_errno(ret);
2573                 goto out_commit;
2574         }
2575
2576         di->i_xattr_loc = 0;
2577
2578         spin_lock(&oi->ip_lock);
2579         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2580         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2581         spin_unlock(&oi->ip_lock);
2582
2583         ret = ocfs2_journal_dirty(handle, di_bh);
2584         if (ret < 0)
2585                 mlog_errno(ret);
2586 out_commit:
2587         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2588 out:
2589         if (ref_tree)
2590                 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2591         brelse(ref_root_bh);
2592         return ret;
2593 }
2594
2595 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2596                                         struct ocfs2_dinode *di)
2597 {
2598         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2599         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2600         int free;
2601
2602         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2603                 return 0;
2604
2605         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2606                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2607                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2608         } else if (ocfs2_inode_is_fast_symlink(inode)) {
2609                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2610                         le64_to_cpu(di->i_size);
2611         } else {
2612                 struct ocfs2_extent_list *el = &di->id2.i_list;
2613                 free = (le16_to_cpu(el->l_count) -
2614                         le16_to_cpu(el->l_next_free_rec)) *
2615                         sizeof(struct ocfs2_extent_rec);
2616         }
2617         if (free >= xattrsize)
2618                 return 1;
2619
2620         return 0;
2621 }
2622
2623 /*
2624  * ocfs2_xattr_ibody_find()
2625  *
2626  * Find extended attribute in inode block and
2627  * fill search info into struct ocfs2_xattr_search.
2628  */
2629 static int ocfs2_xattr_ibody_find(struct inode *inode,
2630                                   int name_index,
2631                                   const char *name,
2632                                   struct ocfs2_xattr_search *xs)
2633 {
2634         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2635         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2636         int ret;
2637         int has_space = 0;
2638
2639         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2640                 return 0;
2641
2642         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2643                 down_read(&oi->ip_alloc_sem);
2644                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2645                 up_read(&oi->ip_alloc_sem);
2646                 if (!has_space)
2647                         return 0;
2648         }
2649
2650         xs->xattr_bh = xs->inode_bh;
2651         xs->end = (void *)di + inode->i_sb->s_blocksize;
2652         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2653                 xs->header = (struct ocfs2_xattr_header *)
2654                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2655         else
2656                 xs->header = (struct ocfs2_xattr_header *)
2657                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2658         xs->base = (void *)xs->header;
2659         xs->here = xs->header->xh_entries;
2660
2661         /* Find the named attribute. */
2662         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2663                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2664                 if (ret && ret != -ENODATA)
2665                         return ret;
2666                 xs->not_found = ret;
2667         }
2668
2669         return 0;
2670 }
2671
2672 /*
2673  * ocfs2_xattr_ibody_set()
2674  *
2675  * Set, replace or remove an extended attribute into inode block.
2676  *
2677  */
2678 static int ocfs2_xattr_ibody_set(struct inode *inode,
2679                                  struct ocfs2_xattr_info *xi,
2680                                  struct ocfs2_xattr_search *xs,
2681                                  struct ocfs2_xattr_set_ctxt *ctxt)
2682 {
2683         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2684         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2685         int ret;
2686
2687         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2688                 return -ENOSPC;
2689
2690         down_write(&oi->ip_alloc_sem);
2691         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2692                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2693                         ret = -ENOSPC;
2694                         goto out;
2695                 }
2696         }
2697
2698         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2699                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2700 out:
2701         up_write(&oi->ip_alloc_sem);
2702
2703         return ret;
2704 }
2705
2706 /*
2707  * ocfs2_xattr_block_find()
2708  *
2709  * Find extended attribute in external block and
2710  * fill search info into struct ocfs2_xattr_search.
2711  */
2712 static int ocfs2_xattr_block_find(struct inode *inode,
2713                                   int name_index,
2714                                   const char *name,
2715                                   struct ocfs2_xattr_search *xs)
2716 {
2717         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2718         struct buffer_head *blk_bh = NULL;
2719         struct ocfs2_xattr_block *xb;
2720         int ret = 0;
2721
2722         if (!di->i_xattr_loc)
2723                 return ret;
2724
2725         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2726                                      &blk_bh);
2727         if (ret < 0) {
2728                 mlog_errno(ret);
2729                 return ret;
2730         }
2731
2732         xs->xattr_bh = blk_bh;
2733         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2734
2735         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2736                 xs->header = &xb->xb_attrs.xb_header;
2737                 xs->base = (void *)xs->header;
2738                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2739                 xs->here = xs->header->xh_entries;
2740
2741                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2742         } else
2743                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2744                                                    name_index,
2745                                                    name, xs);
2746
2747         if (ret && ret != -ENODATA) {
2748                 xs->xattr_bh = NULL;
2749                 goto cleanup;
2750         }
2751         xs->not_found = ret;
2752         return 0;
2753 cleanup:
2754         brelse(blk_bh);
2755
2756         return ret;
2757 }
2758
2759 static int ocfs2_create_xattr_block(handle_t *handle,
2760                                     struct inode *inode,
2761                                     struct buffer_head *inode_bh,
2762                                     struct ocfs2_alloc_context *meta_ac,
2763                                     struct buffer_head **ret_bh,
2764                                     int indexed)
2765 {
2766         int ret;
2767         u16 suballoc_bit_start;
2768         u32 num_got;
2769         u64 first_blkno;
2770         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2771         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2772         struct buffer_head *new_bh = NULL;
2773         struct ocfs2_xattr_block *xblk;
2774
2775         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2776                                       OCFS2_JOURNAL_ACCESS_CREATE);
2777         if (ret < 0) {
2778                 mlog_errno(ret);
2779                 goto end;
2780         }
2781
2782         ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2783                                    &suballoc_bit_start, &num_got,
2784                                    &first_blkno);
2785         if (ret < 0) {
2786                 mlog_errno(ret);
2787                 goto end;
2788         }
2789
2790         new_bh = sb_getblk(inode->i_sb, first_blkno);
2791         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2792
2793         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2794                                       new_bh,
2795                                       OCFS2_JOURNAL_ACCESS_CREATE);
2796         if (ret < 0) {
2797                 mlog_errno(ret);
2798                 goto end;
2799         }
2800
2801         /* Initialize ocfs2_xattr_block */
2802         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2803         memset(xblk, 0, inode->i_sb->s_blocksize);
2804         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2805         xblk->xb_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2806         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2807         xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2808         xblk->xb_blkno = cpu_to_le64(first_blkno);
2809
2810         if (indexed) {
2811                 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2812                 xr->xt_clusters = cpu_to_le32(1);
2813                 xr->xt_last_eb_blk = 0;
2814                 xr->xt_list.l_tree_depth = 0;
2815                 xr->xt_list.l_count = cpu_to_le16(
2816                                         ocfs2_xattr_recs_per_xb(inode->i_sb));
2817                 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2818                 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2819         }
2820
2821         ret = ocfs2_journal_dirty(handle, new_bh);
2822         if (ret < 0) {
2823                 mlog_errno(ret);
2824                 goto end;
2825         }
2826         di->i_xattr_loc = cpu_to_le64(first_blkno);
2827         ocfs2_journal_dirty(handle, inode_bh);
2828
2829         *ret_bh = new_bh;
2830         new_bh = NULL;
2831
2832 end:
2833         brelse(new_bh);
2834         return ret;
2835 }
2836
2837 /*
2838  * ocfs2_xattr_block_set()
2839  *
2840  * Set, replace or remove an extended attribute into external block.
2841  *
2842  */
2843 static int ocfs2_xattr_block_set(struct inode *inode,
2844                                  struct ocfs2_xattr_info *xi,
2845                                  struct ocfs2_xattr_search *xs,
2846                                  struct ocfs2_xattr_set_ctxt *ctxt)
2847 {
2848         struct buffer_head *new_bh = NULL;
2849         handle_t *handle = ctxt->handle;
2850         struct ocfs2_xattr_block *xblk = NULL;
2851         int ret;
2852
2853         if (!xs->xattr_bh) {
2854                 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2855                                                ctxt->meta_ac, &new_bh, 0);
2856                 if (ret) {
2857                         mlog_errno(ret);
2858                         goto end;
2859                 }
2860
2861                 xs->xattr_bh = new_bh;
2862                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2863                 xs->header = &xblk->xb_attrs.xb_header;
2864                 xs->base = (void *)xs->header;
2865                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2866                 xs->here = xs->header->xh_entries;
2867         } else
2868                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2869
2870         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2871                 /* Set extended attribute into external block */
2872                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2873                                             OCFS2_HAS_XATTR_FL);
2874                 if (!ret || ret != -ENOSPC)
2875                         goto end;
2876
2877                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2878                 if (ret)
2879                         goto end;
2880         }
2881
2882         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2883
2884 end:
2885
2886         return ret;
2887 }
2888
2889 /* Check whether the new xattr can be inserted into the inode. */
2890 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2891                                        struct ocfs2_xattr_info *xi,
2892                                        struct ocfs2_xattr_search *xs)
2893 {
2894         struct ocfs2_xattr_entry *last;
2895         int free, i;
2896         size_t min_offs = xs->end - xs->base;
2897
2898         if (!xs->header)
2899                 return 0;
2900
2901         last = xs->header->xh_entries;
2902
2903         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2904                 size_t offs = le16_to_cpu(last->xe_name_offset);
2905                 if (offs < min_offs)
2906                         min_offs = offs;
2907                 last += 1;
2908         }
2909
2910         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2911         if (free < 0)
2912                 return 0;
2913
2914         BUG_ON(!xs->not_found);
2915
2916         if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
2917                 return 1;
2918
2919         return 0;
2920 }
2921
2922 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2923                                      struct ocfs2_dinode *di,
2924                                      struct ocfs2_xattr_info *xi,
2925                                      struct ocfs2_xattr_search *xis,
2926                                      struct ocfs2_xattr_search *xbs,
2927                                      int *clusters_need,
2928                                      int *meta_need,
2929                                      int *credits_need)
2930 {
2931         int ret = 0, old_in_xb = 0;
2932         int clusters_add = 0, meta_add = 0, credits = 0;
2933         struct buffer_head *bh = NULL;
2934         struct ocfs2_xattr_block *xb = NULL;
2935         struct ocfs2_xattr_entry *xe = NULL;
2936         struct ocfs2_xattr_value_root *xv = NULL;
2937         char *base = NULL;
2938         int name_offset, name_len = 0;
2939         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2940                                                     xi->xi_value_len);
2941         u64 value_size;
2942
2943         /*
2944          * Calculate the clusters we need to write.
2945          * No matter whether we replace an old one or add a new one,
2946          * we need this for writing.
2947          */
2948         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2949                 credits += new_clusters *
2950                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2951
2952         if (xis->not_found && xbs->not_found) {
2953                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2954
2955                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2956                         clusters_add += new_clusters;
2957                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2958                                                         &def_xv.xv.xr_list,
2959                                                         new_clusters);
2960                 }
2961
2962                 goto meta_guess;
2963         }
2964
2965         if (!xis->not_found) {
2966                 xe = xis->here;
2967                 name_offset = le16_to_cpu(xe->xe_name_offset);
2968                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2969                 base = xis->base;
2970                 credits += OCFS2_INODE_UPDATE_CREDITS;
2971         } else {
2972                 int i, block_off = 0;
2973                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2974                 xe = xbs->here;
2975                 name_offset = le16_to_cpu(xe->xe_name_offset);
2976                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2977                 i = xbs->here - xbs->header->xh_entries;
2978                 old_in_xb = 1;
2979
2980                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2981                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2982                                                         bucket_xh(xbs->bucket),
2983                                                         i, &block_off,
2984                                                         &name_offset);
2985                         base = bucket_block(xbs->bucket, block_off);
2986                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2987                 } else {
2988                         base = xbs->base;
2989                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2990                 }
2991         }
2992
2993         /*
2994          * delete a xattr doesn't need metadata and cluster allocation.
2995          * so just calculate the credits and return.
2996          *
2997          * The credits for removing the value tree will be extended
2998          * by ocfs2_remove_extent itself.
2999          */
3000         if (!xi->xi_value) {
3001                 if (!ocfs2_xattr_is_local(xe))
3002                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3003
3004                 goto out;
3005         }
3006
3007         /* do cluster allocation guess first. */
3008         value_size = le64_to_cpu(xe->xe_value_size);
3009
3010         if (old_in_xb) {
3011                 /*
3012                  * In xattr set, we always try to set the xe in inode first,
3013                  * so if it can be inserted into inode successfully, the old
3014                  * one will be removed from the xattr block, and this xattr
3015                  * will be inserted into inode as a new xattr in inode.
3016                  */
3017                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3018                         clusters_add += new_clusters;
3019                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
3020                                     OCFS2_INODE_UPDATE_CREDITS;
3021                         if (!ocfs2_xattr_is_local(xe))
3022                                 credits += ocfs2_calc_extend_credits(
3023                                                         inode->i_sb,
3024                                                         &def_xv.xv.xr_list,
3025                                                         new_clusters);
3026                         goto out;
3027                 }
3028         }
3029
3030         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3031                 /* the new values will be stored outside. */
3032                 u32 old_clusters = 0;
3033
3034                 if (!ocfs2_xattr_is_local(xe)) {
3035                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3036                                                                  value_size);
3037                         xv = (struct ocfs2_xattr_value_root *)
3038                              (base + name_offset + name_len);
3039                         value_size = OCFS2_XATTR_ROOT_SIZE;
3040                 } else
3041                         xv = &def_xv.xv;
3042
3043                 if (old_clusters >= new_clusters) {
3044                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3045                         goto out;
3046                 } else {
3047                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3048                         clusters_add += new_clusters - old_clusters;
3049                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3050                                                              &xv->xr_list,
3051                                                              new_clusters -
3052                                                              old_clusters);
3053                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3054                                 goto out;
3055                 }
3056         } else {
3057                 /*
3058                  * Now the new value will be stored inside. So if the new
3059                  * value is smaller than the size of value root or the old
3060                  * value, we don't need any allocation, otherwise we have
3061                  * to guess metadata allocation.
3062                  */
3063                 if ((ocfs2_xattr_is_local(xe) &&
3064                      (value_size >= xi->xi_value_len)) ||
3065                     (!ocfs2_xattr_is_local(xe) &&
3066                      OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3067                         goto out;
3068         }
3069
3070 meta_guess:
3071         /* calculate metadata allocation. */
3072         if (di->i_xattr_loc) {
3073                 if (!xbs->xattr_bh) {
3074                         ret = ocfs2_read_xattr_block(inode,
3075                                                      le64_to_cpu(di->i_xattr_loc),
3076                                                      &bh);
3077                         if (ret) {
3078                                 mlog_errno(ret);
3079                                 goto out;
3080                         }
3081
3082                         xb = (struct ocfs2_xattr_block *)bh->b_data;
3083                 } else
3084                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3085
3086                 /*
3087                  * If there is already an xattr tree, good, we can calculate
3088                  * like other b-trees. Otherwise we may have the chance of
3089                  * create a tree, the credit calculation is borrowed from
3090                  * ocfs2_calc_extend_credits with root_el = NULL. And the
3091                  * new tree will be cluster based, so no meta is needed.
3092                  */
3093                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3094                         struct ocfs2_extent_list *el =
3095                                  &xb->xb_attrs.xb_root.xt_list;
3096                         meta_add += ocfs2_extend_meta_needed(el);
3097                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3098                                                              el, 1);
3099                 } else
3100                         credits += OCFS2_SUBALLOC_ALLOC + 1;
3101
3102                 /*
3103                  * This cluster will be used either for new bucket or for
3104                  * new xattr block.
3105                  * If the cluster size is the same as the bucket size, one
3106                  * more is needed since we may need to extend the bucket
3107                  * also.
3108                  */
3109                 clusters_add += 1;
3110                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3111                 if (OCFS2_XATTR_BUCKET_SIZE ==
3112                         OCFS2_SB(inode->i_sb)->s_clustersize) {
3113                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3114                         clusters_add += 1;
3115                 }
3116         } else {
3117                 meta_add += 1;
3118                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3119         }
3120 out:
3121         if (clusters_need)
3122                 *clusters_need = clusters_add;
3123         if (meta_need)
3124                 *meta_need = meta_add;
3125         if (credits_need)
3126                 *credits_need = credits;
3127         brelse(bh);
3128         return ret;
3129 }
3130
3131 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3132                                      struct ocfs2_dinode *di,
3133                                      struct ocfs2_xattr_info *xi,
3134                                      struct ocfs2_xattr_search *xis,
3135                                      struct ocfs2_xattr_search *xbs,
3136                                      struct ocfs2_xattr_set_ctxt *ctxt,
3137                                      int extra_meta,
3138                                      int *credits)
3139 {
3140         int clusters_add, meta_add, ret;
3141         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3142
3143         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3144
3145         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3146
3147         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3148                                         &clusters_add, &meta_add, credits);
3149         if (ret) {
3150                 mlog_errno(ret);
3151                 return ret;
3152         }
3153
3154         meta_add += extra_meta;
3155         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3156              "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3157
3158         if (meta_add) {
3159                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3160                                                         &ctxt->meta_ac);
3161                 if (ret) {
3162                         mlog_errno(ret);
3163                         goto out;
3164                 }
3165         }
3166
3167         if (clusters_add) {
3168                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3169                 if (ret)
3170                         mlog_errno(ret);
3171         }
3172 out:
3173         if (ret) {
3174                 if (ctxt->meta_ac) {
3175                         ocfs2_free_alloc_context(ctxt->meta_ac);
3176                         ctxt->meta_ac = NULL;
3177                 }
3178
3179                 /*
3180                  * We cannot have an error and a non null ctxt->data_ac.
3181                  */
3182         }
3183
3184         return ret;
3185 }
3186
3187 static int __ocfs2_xattr_set_handle(struct inode *inode,
3188                                     struct ocfs2_dinode *di,
3189                                     struct ocfs2_xattr_info *xi,
3190                                     struct ocfs2_xattr_search *xis,
3191                                     struct ocfs2_xattr_search *xbs,
3192                                     struct ocfs2_xattr_set_ctxt *ctxt)
3193 {
3194         int ret = 0, credits, old_found;
3195
3196         if (!xi->xi_value) {
3197                 /* Remove existing extended attribute */
3198                 if (!xis->not_found)
3199                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3200                 else if (!xbs->not_found)
3201                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3202         } else {
3203                 /* We always try to set extended attribute into inode first*/
3204                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3205                 if (!ret && !xbs->not_found) {
3206                         /*
3207                          * If succeed and that extended attribute existing in
3208                          * external block, then we will remove it.
3209                          */
3210                         xi->xi_value = NULL;
3211                         xi->xi_value_len = 0;
3212
3213                         old_found = xis->not_found;
3214                         xis->not_found = -ENODATA;
3215                         ret = ocfs2_calc_xattr_set_need(inode,
3216                                                         di,
3217                                                         xi,
3218                                                         xis,
3219                                                         xbs,
3220                                                         NULL,
3221                                                         NULL,
3222                                                         &credits);
3223                         xis->not_found = old_found;
3224                         if (ret) {
3225                                 mlog_errno(ret);
3226                                 goto out;
3227                         }
3228
3229                         ret = ocfs2_extend_trans(ctxt->handle, credits +
3230                                         ctxt->handle->h_buffer_credits);
3231                         if (ret) {
3232                                 mlog_errno(ret);
3233                                 goto out;
3234                         }
3235                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3236                 } else if (ret == -ENOSPC) {
3237                         if (di->i_xattr_loc && !xbs->xattr_bh) {
3238                                 ret = ocfs2_xattr_block_find(inode,
3239                                                              xi->xi_name_index,
3240                                                              xi->xi_name, xbs);
3241                                 if (ret)
3242                                         goto out;
3243
3244                                 old_found = xis->not_found;
3245                                 xis->not_found = -ENODATA;
3246                                 ret = ocfs2_calc_xattr_set_need(inode,
3247                                                                 di,
3248                                                                 xi,
3249                                                                 xis,
3250                                                                 xbs,
3251                                                                 NULL,
3252                                                                 NULL,
3253                                                                 &credits);
3254                                 xis->not_found = old_found;
3255                                 if (ret) {
3256                                         mlog_errno(ret);
3257                                         goto out;
3258                                 }
3259
3260                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3261                                         ctxt->handle->h_buffer_credits);
3262                                 if (ret) {
3263                                         mlog_errno(ret);
3264                                         goto out;
3265                                 }
3266                         }
3267                         /*
3268                          * If no space in inode, we will set extended attribute
3269                          * into external block.
3270                          */
3271                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3272                         if (ret)
3273                                 goto out;
3274                         if (!xis->not_found) {
3275                                 /*
3276                                  * If succeed and that extended attribute
3277                                  * existing in inode, we will remove it.
3278                                  */
3279                                 xi->xi_value = NULL;
3280                                 xi->xi_value_len = 0;
3281                                 xbs->not_found = -ENODATA;
3282                                 ret = ocfs2_calc_xattr_set_need(inode,
3283                                                                 di,
3284                                                                 xi,
3285                                                                 xis,
3286                                                                 xbs,
3287                                                                 NULL,
3288                                                                 NULL,
3289                                                                 &credits);
3290                                 if (ret) {
3291                                         mlog_errno(ret);
3292                                         goto out;
3293                                 }
3294
3295                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3296                                                 ctxt->handle->h_buffer_credits);
3297                                 if (ret) {
3298                                         mlog_errno(ret);
3299                                         goto out;
3300                                 }
3301                                 ret = ocfs2_xattr_ibody_set(inode, xi,
3302                                                             xis, ctxt);
3303                         }
3304                 }
3305         }
3306
3307         if (!ret) {
3308                 /* Update inode ctime. */
3309                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3310                                               xis->inode_bh,
3311                                               OCFS2_JOURNAL_ACCESS_WRITE);
3312                 if (ret) {
3313                         mlog_errno(ret);
3314                         goto out;
3315                 }
3316
3317                 inode->i_ctime = CURRENT_TIME;
3318                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3319                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3320                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3321         }
3322 out:
3323         return ret;
3324 }
3325
3326 /*
3327  * This function only called duing creating inode
3328  * for init security/acl xattrs of the new inode.
3329  * All transanction credits have been reserved in mknod.
3330  */
3331 int ocfs2_xattr_set_handle(handle_t *handle,
3332                            struct inode *inode,
3333                            struct buffer_head *di_bh,
3334                            int name_index,
3335                            const char *name,
3336                            const void *value,
3337                            size_t value_len,
3338                            int flags,
3339                            struct ocfs2_alloc_context *meta_ac,
3340                            struct ocfs2_alloc_context *data_ac)
3341 {
3342         struct ocfs2_dinode *di;
3343         int ret;
3344
3345         struct ocfs2_xattr_info xi = {
3346                 .xi_name_index = name_index,
3347                 .xi_name = name,
3348                 .xi_name_len = strlen(name),
3349                 .xi_value = value,
3350                 .xi_value_len = value_len,
3351         };
3352
3353         struct ocfs2_xattr_search xis = {
3354                 .not_found = -ENODATA,
3355         };
3356
3357         struct ocfs2_xattr_search xbs = {
3358                 .not_found = -ENODATA,
3359         };
3360
3361         struct ocfs2_xattr_set_ctxt ctxt = {
3362                 .handle = handle,
3363                 .meta_ac = meta_ac,
3364                 .data_ac = data_ac,
3365         };
3366
3367         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3368                 return -EOPNOTSUPP;
3369
3370         /*
3371          * In extreme situation, may need xattr bucket when
3372          * block size is too small. And we have already reserved
3373          * the credits for bucket in mknod.
3374          */
3375         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3376                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
3377                 if (!xbs.bucket) {
3378                         mlog_errno(-ENOMEM);
3379                         return -ENOMEM;
3380                 }
3381         }
3382
3383         xis.inode_bh = xbs.inode_bh = di_bh;
3384         di = (struct ocfs2_dinode *)di_bh->b_data;
3385
3386         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3387
3388         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3389         if (ret)
3390                 goto cleanup;
3391         if (xis.not_found) {
3392                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3393                 if (ret)
3394                         goto cleanup;
3395         }
3396
3397         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3398
3399 cleanup:
3400         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3401         brelse(xbs.xattr_bh);
3402         ocfs2_xattr_bucket_free(xbs.bucket);
3403
3404         return ret;
3405 }
3406
3407 /*
3408  * ocfs2_xattr_set()
3409  *
3410  * Set, replace or remove an extended attribute for this inode.
3411  * value is NULL to remove an existing extended attribute, else either
3412  * create or replace an extended attribute.
3413  */
3414 int ocfs2_xattr_set(struct inode *inode,
3415                     int name_index,
3416                     const char *name,
3417                     const void *value,
3418                     size_t value_len,
3419                     int flags)
3420 {
3421         struct buffer_head *di_bh = NULL;
3422         struct ocfs2_dinode *di;
3423         int ret, credits, ref_meta = 0, ref_credits = 0;
3424         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3425         struct inode *tl_inode = osb->osb_tl_inode;
3426         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3427         struct ocfs2_refcount_tree *ref_tree = NULL;
3428
3429         struct ocfs2_xattr_info xi = {
3430                 .xi_name_index = name_index,
3431                 .xi_name = name,
3432                 .xi_name_len = strlen(name),
3433                 .xi_value = value,
3434                 .xi_value_len = value_len,
3435         };
3436
3437         struct ocfs2_xattr_search xis = {
3438                 .not_found = -ENODATA,
3439         };
3440
3441         struct ocfs2_xattr_search xbs = {
3442                 .not_found = -ENODATA,
3443         };
3444
3445         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3446                 return -EOPNOTSUPP;
3447
3448         /*
3449          * Only xbs will be used on indexed trees.  xis doesn't need a
3450          * bucket.
3451          */
3452         xbs.bucket = ocfs2_xattr_bucket_new(inode);
3453         if (!xbs.bucket) {
3454                 mlog_errno(-ENOMEM);
3455                 return -ENOMEM;
3456         }
3457
3458         ret = ocfs2_inode_lock(inode, &di_bh, 1);
3459         if (ret < 0) {
3460                 mlog_errno(ret);
3461                 goto cleanup_nolock;
3462         }
3463         xis.inode_bh = xbs.inode_bh = di_bh;
3464         di = (struct ocfs2_dinode *)di_bh->b_data;
3465
3466         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3467         /*
3468          * Scan inode and external block to find the same name
3469          * extended attribute and collect search infomation.
3470          */
3471         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3472         if (ret)
3473                 goto cleanup;
3474         if (xis.not_found) {
3475                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3476                 if (ret)
3477                         goto cleanup;
3478         }
3479
3480         if (xis.not_found && xbs.not_found) {
3481                 ret = -ENODATA;
3482                 if (flags & XATTR_REPLACE)
3483                         goto cleanup;
3484                 ret = 0;
3485                 if (!value)
3486                         goto cleanup;
3487         } else {
3488                 ret = -EEXIST;
3489                 if (flags & XATTR_CREATE)
3490                         goto cleanup;
3491         }
3492
3493         /* Check whether the value is refcounted and do some prepartion. */
3494         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3495             (!xis.not_found || !xbs.not_found)) {
3496                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3497                                                    &xis, &xbs, &ref_tree,
3498                                                    &ref_meta, &ref_credits);
3499                 if (ret) {
3500                         mlog_errno(ret);
3501                         goto cleanup;
3502                 }
3503         }
3504
3505         mutex_lock(&tl_inode->i_mutex);
3506
3507         if (ocfs2_truncate_log_needs_flush(osb)) {
3508                 ret = __ocfs2_flush_truncate_log(osb);
3509                 if (ret < 0) {
3510                         mutex_unlock(&tl_inode->i_mutex);
3511                         mlog_errno(ret);
3512                         goto cleanup;
3513                 }
3514         }
3515         mutex_unlock(&tl_inode->i_mutex);
3516
3517         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3518                                         &xbs, &ctxt, ref_meta, &credits);
3519         if (ret) {
3520                 mlog_errno(ret);
3521                 goto cleanup;
3522         }
3523
3524         /* we need to update inode's ctime field, so add credit for it. */
3525         credits += OCFS2_INODE_UPDATE_CREDITS;
3526         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3527         if (IS_ERR(ctxt.handle)) {
3528                 ret = PTR_ERR(ctxt.handle);
3529                 mlog_errno(ret);
3530                 goto cleanup;
3531         }
3532
3533         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3534
3535         ocfs2_commit_trans(osb, ctxt.handle);
3536
3537         if (ctxt.data_ac)
3538                 ocfs2_free_alloc_context(ctxt.data_ac);
3539         if (ctxt.meta_ac)
3540                 ocfs2_free_alloc_context(ctxt.meta_ac);
3541         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3542                 ocfs2_schedule_truncate_log_flush(osb, 1);
3543         ocfs2_run_deallocs(osb, &ctxt.dealloc);
3544
3545 cleanup:
3546         if (ref_tree)
3547                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3548         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3549         if (!value && !ret) {
3550                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3551                 if (ret)
3552                         mlog_errno(ret);
3553         }
3554         ocfs2_inode_unlock(inode, 1);
3555 cleanup_nolock:
3556         brelse(di_bh);
3557         brelse(xbs.xattr_bh);
3558         ocfs2_xattr_bucket_free(xbs.bucket);
3559
3560         return ret;
3561 }
3562
3563 /*
3564  * Find the xattr extent rec which may contains name_hash.
3565  * e_cpos will be the first name hash of the xattr rec.
3566  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3567  */
3568 static int ocfs2_xattr_get_rec(struct inode *inode,
3569                                u32 name_hash,
3570                                u64 *p_blkno,
3571                                u32 *e_cpos,
3572                                u32 *num_clusters,
3573                                struct ocfs2_extent_list *el)
3574 {
3575         int ret = 0, i;
3576         struct buffer_head *eb_bh = NULL;
3577         struct ocfs2_extent_block *eb;
3578         struct ocfs2_extent_rec *rec = NULL;
3579         u64 e_blkno = 0;
3580
3581         if (el->l_tree_depth) {
3582                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3583                                       &eb_bh);
3584                 if (ret) {
3585                         mlog_errno(ret);
3586                         goto out;
3587                 }
3588
3589                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3590                 el = &eb->h_list;
3591
3592                 if (el->l_tree_depth) {
3593                         ocfs2_error(inode->i_sb,
3594                                     "Inode %lu has non zero tree depth in "
3595                                     "xattr tree block %llu\n", inode->i_ino,
3596                                     (unsigned long long)eb_bh->b_blocknr);
3597                         ret = -EROFS;
3598                         goto out;
3599                 }
3600         }
3601
3602         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3603                 rec = &el->l_recs[i];
3604
3605                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3606                         e_blkno = le64_to_cpu(rec->e_blkno);
3607                         break;
3608                 }
3609         }
3610
3611         if (!e_blkno) {
3612                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3613                             "record (%u, %u, 0) in xattr", inode->i_ino,
3614                             le32_to_cpu(rec->e_cpos),
3615                             ocfs2_rec_clusters(el, rec));
3616                 ret = -EROFS;
3617                 goto out;
3618         }
3619
3620         *p_blkno = le64_to_cpu(rec->e_blkno);
3621         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3622         if (e_cpos)
3623                 *e_cpos = le32_to_cpu(rec->e_cpos);
3624 out:
3625         brelse(eb_bh);
3626         return ret;
3627 }
3628
3629 typedef int (xattr_bucket_func)(struct inode *inode,
3630                                 struct ocfs2_xattr_bucket *bucket,
3631                                 void *para);
3632
3633 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3634                                    struct ocfs2_xattr_bucket *bucket,
3635                                    int name_index,
3636                                    const char *name,
3637                                    u32 name_hash,
3638                                    u16 *xe_index,
3639                                    int *found)
3640 {
3641         int i, ret = 0, cmp = 1, block_off, new_offset;
3642         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3643         size_t name_len = strlen(name);
3644         struct ocfs2_xattr_entry *xe = NULL;
3645         char *xe_name;
3646
3647         /*
3648          * We don't use binary search in the bucket because there
3649          * may be multiple entries with the same name hash.
3650          */
3651         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3652                 xe = &xh->xh_entries[i];
3653
3654                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3655                         continue;
3656                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3657                         break;
3658
3659                 cmp = name_index - ocfs2_xattr_get_type(xe);
3660                 if (!cmp)
3661                         cmp = name_len - xe->xe_name_len;
3662                 if (cmp)
3663                         continue;
3664
3665                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3666                                                         xh,
3667                                                         i,
3668                                                         &block_off,
3669                                                         &new_offset);
3670                 if (ret) {
3671                         mlog_errno(ret);
3672                         break;
3673                 }
3674
3675
3676                 xe_name = bucket_block(bucket, block_off) + new_offset;
3677                 if (!memcmp(name, xe_name, name_len)) {
3678                         *xe_index = i;
3679                         *found = 1;
3680                         ret = 0;
3681                         break;
3682                 }
3683         }
3684
3685         return ret;
3686 }
3687
3688 /*
3689  * Find the specified xattr entry in a series of buckets.
3690  * This series start from p_blkno and last for num_clusters.
3691  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3692  * the num of the valid buckets.
3693  *
3694  * Return the buffer_head this xattr should reside in. And if the xattr's
3695  * hash is in the gap of 2 buckets, return the lower bucket.
3696  */
3697 static int ocfs2_xattr_bucket_find(struct inode *inode,
3698                                    int name_index,
3699                                    const char *name,
3700                                    u32 name_hash,
3701                                    u64 p_blkno,
3702                                    u32 first_hash,
3703                                    u32 num_clusters,
3704                                    struct ocfs2_xattr_search *xs)
3705 {
3706         int ret, found = 0;
3707         struct ocfs2_xattr_header *xh = NULL;
3708         struct ocfs2_xattr_entry *xe = NULL;
3709         u16 index = 0;
3710         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3711         int low_bucket = 0, bucket, high_bucket;
3712         struct ocfs2_xattr_bucket *search;
3713         u32 last_hash;
3714         u64 blkno, lower_blkno = 0;
3715
3716         search = ocfs2_xattr_bucket_new(inode);
3717         if (!search) {
3718                 ret = -ENOMEM;
3719                 mlog_errno(ret);
3720                 goto out;
3721         }
3722
3723         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3724         if (ret) {
3725                 mlog_errno(ret);
3726                 goto out;
3727         }
3728
3729         xh = bucket_xh(search);
3730         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3731         while (low_bucket <= high_bucket) {
3732                 ocfs2_xattr_bucket_relse(search);
3733
3734                 bucket = (low_bucket + high_bucket) / 2;
3735                 blkno = p_blkno + bucket * blk_per_bucket;
3736                 ret = ocfs2_read_xattr_bucket(search, blkno);
3737                 if (ret) {
3738                         mlog_errno(ret);
3739                         goto out;
3740                 }
3741
3742                 xh = bucket_xh(search);
3743                 xe = &xh->xh_entries[0];
3744                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3745                         high_bucket = bucket - 1;
3746                         continue;
3747                 }
3748
3749                 /*
3750                  * Check whether the hash of the last entry in our
3751                  * bucket is larger than the search one. for an empty
3752                  * bucket, the last one is also the first one.
3753                  */
3754                 if (xh->xh_count)
3755                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3756
3757                 last_hash = le32_to_cpu(xe->xe_name_hash);
3758
3759                 /* record lower_blkno which may be the insert place. */
3760                 lower_blkno = blkno;
3761
3762                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3763                         low_bucket = bucket + 1;
3764                         continue;
3765                 }
3766
3767                 /* the searched xattr should reside in this bucket if exists. */
3768                 ret = ocfs2_find_xe_in_bucket(inode, search,
3769                                               name_index, name, name_hash,
3770                                               &index, &found);
3771                 if (ret) {
3772                         mlog_errno(ret);
3773                         goto out;
3774                 }
3775                 break;
3776         }
3777
3778         /*
3779          * Record the bucket we have found.
3780          * When the xattr's hash value is in the gap of 2 buckets, we will
3781          * always set it to the previous bucket.
3782          */
3783         if (!lower_blkno)
3784                 lower_blkno = p_blkno;
3785
3786         /* This should be in cache - we just read it during the search */
3787         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3788         if (ret) {
3789                 mlog_errno(ret);
3790                 goto out;
3791         }
3792
3793         xs->header = bucket_xh(xs->bucket);
3794         xs->base = bucket_block(xs->bucket, 0);
3795         xs->end = xs->base + inode->i_sb->s_blocksize;
3796
3797         if (found) {
3798                 xs->here = &xs->header->xh_entries[index];
3799                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3800                      (unsigned long long)bucket_blkno(xs->bucket), index);
3801         } else
3802                 ret = -ENODATA;
3803
3804 out:
3805         ocfs2_xattr_bucket_free(search);
3806         return ret;
3807 }
3808
3809 static int ocfs2_xattr_index_block_find(struct inode *inode,
3810                                         struct buffer_head *root_bh,
3811                                         int name_index,
3812                                         const char *name,
3813                                         struct ocfs2_xattr_search *xs)
3814 {
3815         int ret;
3816         struct ocfs2_xattr_block *xb =
3817                         (struct ocfs2_xattr_block *)root_bh->b_data;
3818         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3819         struct ocfs2_extent_list *el = &xb_root->xt_list;
3820         u64 p_blkno = 0;
3821         u32 first_hash, num_clusters = 0;
3822         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3823
3824         if (le16_to_cpu(el->l_next_free_rec) == 0)
3825                 return -ENODATA;
3826
3827         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3828              name, name_hash, name_index);
3829
3830         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3831                                   &num_clusters, el);
3832         if (ret) {
3833                 mlog_errno(ret);
3834                 goto out;
3835         }
3836
3837         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3838
3839         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3840              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3841              first_hash);
3842
3843         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3844                                       p_blkno, first_hash, num_clusters, xs);
3845
3846 out:
3847         return ret;
3848 }
3849
3850 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3851                                        u64 blkno,
3852                                        u32 clusters,
3853                                        xattr_bucket_func *func,
3854                                        void *para)
3855 {
3856         int i, ret = 0;
3857         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3858         u32 num_buckets = clusters * bpc;
3859         struct ocfs2_xattr_bucket *bucket;
3860
3861         bucket = ocfs2_xattr_bucket_new(inode);
3862         if (!bucket) {
3863                 mlog_errno(-ENOMEM);
3864                 return -ENOMEM;
3865         }
3866
3867         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3868              clusters, (unsigned long long)blkno);
3869
3870         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3871                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3872                 if (ret) {
3873                         mlog_errno(ret);
3874                         break;
3875                 }
3876
3877                 /*
3878                  * The real bucket num in this series of blocks is stored
3879                  * in the 1st bucket.
3880                  */
3881                 if (i == 0)
3882                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3883
3884                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3885                      (unsigned long long)blkno,
3886                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3887                 if (func) {
3888                         ret = func(inode, bucket, para);
3889                         if (ret && ret != -ERANGE)
3890                                 mlog_errno(ret);
3891                         /* Fall through to bucket_relse() */
3892                 }
3893
3894                 ocfs2_xattr_bucket_relse(bucket);
3895                 if (ret)
3896                         break;
3897         }
3898
3899         ocfs2_xattr_bucket_free(bucket);
3900         return ret;
3901 }
3902
3903 struct ocfs2_xattr_tree_list {
3904         char *buffer;
3905         size_t buffer_size;
3906         size_t result;
3907 };
3908
3909 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3910                                              struct ocfs2_xattr_header *xh,
3911                                              int index,
3912                                              int *block_off,
3913                                              int *new_offset)
3914 {
3915         u16 name_offset;
3916
3917         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3918                 return -EINVAL;
3919
3920         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3921
3922         *block_off = name_offset >> sb->s_blocksize_bits;
3923         *new_offset = name_offset % sb->s_blocksize;
3924
3925         return 0;
3926 }
3927
3928 static int ocfs2_list_xattr_bucket(struct inode *inode,
3929                                    struct ocfs2_xattr_bucket *bucket,
3930                                    void *para)
3931 {
3932         int ret = 0, type;
3933         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3934         int i, block_off, new_offset;
3935         const char *prefix, *name;
3936
3937         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3938                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3939                 type = ocfs2_xattr_get_type(entry);
3940                 prefix = ocfs2_xattr_prefix(type);
3941
3942                 if (prefix) {
3943                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3944                                                                 bucket_xh(bucket),
3945                                                                 i,
3946                                                                 &block_off,
3947                                                                 &new_offset);
3948                         if (ret)
3949                                 break;
3950
3951                         name = (const char *)bucket_block(bucket, block_off) +
3952                                 new_offset;
3953                         ret = ocfs2_xattr_list_entry(xl->buffer,
3954                                                      xl->buffer_size,
3955                                                      &xl->result,
3956                                                      prefix, name,
3957                                                      entry->xe_name_len);
3958                         if (ret)
3959                                 break;
3960                 }
3961         }
3962
3963         return ret;
3964 }
3965
3966 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3967                                            struct buffer_head *blk_bh,
3968                                            xattr_tree_rec_func *rec_func,
3969                                            void *para)
3970 {
3971         struct ocfs2_xattr_block *xb =
3972                         (struct ocfs2_xattr_block *)blk_bh->b_data;
3973         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3974         int ret = 0;
3975         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3976         u64 p_blkno = 0;
3977
3978         if (!el->l_next_free_rec || !rec_func)
3979                 return 0;
3980
3981         while (name_hash > 0) {
3982                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3983                                           &e_cpos, &num_clusters, el);
3984                 if (ret) {
3985                         mlog_errno(ret);
3986                         break;
3987                 }
3988
3989                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3990                                num_clusters, para);
3991                 if (ret) {
3992                         if (ret != -ERANGE)
3993                                 mlog_errno(ret);
3994                         break;
3995                 }
3996
3997                 if (e_cpos == 0)
3998                         break;
3999
4000                 name_hash = e_cpos - 1;
4001         }
4002
4003         return ret;
4004
4005 }
4006
4007 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4008                                      struct buffer_head *root_bh,
4009                                      u64 blkno, u32 cpos, u32 len, void *para)
4010 {
4011         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4012                                            ocfs2_list_xattr_bucket, para);
4013 }
4014
4015 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4016                                              struct buffer_head *blk_bh,
4017                                              char *buffer,
4018                                              size_t buffer_size)
4019 {
4020         int ret;
4021         struct ocfs2_xattr_tree_list xl = {
4022                 .buffer = buffer,
4023                 .buffer_size = buffer_size,
4024                 .result = 0,
4025         };
4026
4027         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4028                                               ocfs2_list_xattr_tree_rec, &xl);
4029         if (ret) {
4030                 mlog_errno(ret);
4031                 goto out;
4032         }
4033
4034         ret = xl.result;
4035 out:
4036         return ret;
4037 }
4038
4039 static int cmp_xe(const void *a, const void *b)
4040 {
4041         const struct ocfs2_xattr_entry *l = a, *r = b;
4042         u32 l_hash = le32_to_cpu(l->xe_name_hash);
4043         u32 r_hash = le32_to_cpu(r->xe_name_hash);
4044
4045         if (l_hash > r_hash)
4046                 return 1;
4047         if (l_hash < r_hash)
4048                 return -1;
4049         return 0;
4050 }
4051
4052 static void swap_xe(void *a, void *b, int size)
4053 {
4054         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4055
4056         tmp = *l;
4057         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4058         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4059 }
4060
4061 /*
4062  * When the ocfs2_xattr_block is filled up, new bucket will be created
4063  * and all the xattr entries will be moved to the new bucket.
4064  * The header goes at the start of the bucket, and the names+values are
4065  * filled from the end.  This is why *target starts as the last buffer.
4066  * Note: we need to sort the entries since they are not saved in order
4067  * in the ocfs2_xattr_block.
4068  */
4069 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4070                                            struct buffer_head *xb_bh,
4071                                            struct ocfs2_xattr_bucket *bucket)
4072 {
4073         int i, blocksize = inode->i_sb->s_blocksize;
4074         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4075         u16 offset, size, off_change;
4076         struct ocfs2_xattr_entry *xe;
4077         struct ocfs2_xattr_block *xb =
4078                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
4079         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4080         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4081         u16 count = le16_to_cpu(xb_xh->xh_count);
4082         char *src = xb_bh->b_data;
4083         char *target = bucket_block(bucket, blks - 1);
4084
4085         mlog(0, "cp xattr from block %llu to bucket %llu\n",
4086              (unsigned long long)xb_bh->b_blocknr,
4087              (unsigned long long)bucket_blkno(bucket));
4088
4089         for (i = 0; i < blks; i++)
4090                 memset(bucket_block(bucket, i), 0, blocksize);
4091
4092         /*
4093          * Since the xe_name_offset is based on ocfs2_xattr_header,
4094          * there is a offset change corresponding to the change of
4095          * ocfs2_xattr_header's position.
4096          */
4097         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4098         xe = &xb_xh->xh_entries[count - 1];
4099         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4100         size = blocksize - offset;
4101
4102         /* copy all the names and values. */
4103         memcpy(target + offset, src + offset, size);
4104
4105         /* Init new header now. */
4106         xh->xh_count = xb_xh->xh_count;
4107         xh->xh_num_buckets = cpu_to_le16(1);
4108         xh->xh_name_value_len = cpu_to_le16(size);
4109         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4110
4111         /* copy all the entries. */
4112         target = bucket_block(bucket, 0);
4113         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4114         size = count * sizeof(struct ocfs2_xattr_entry);
4115         memcpy(target + offset, (char *)xb_xh + offset, size);
4116
4117         /* Change the xe offset for all the xe because of the move. */
4118         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4119                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4120         for (i = 0; i < count; i++)
4121                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4122
4123         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4124              offset, size, off_change);
4125
4126         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4127              cmp_xe, swap_xe);
4128 }
4129
4130 /*
4131  * After we move xattr from block to index btree, we have to
4132  * update ocfs2_xattr_search to the new xe and base.
4133  *
4134  * When the entry is in xattr block, xattr_bh indicates the storage place.
4135  * While if the entry is in index b-tree, "bucket" indicates the
4136  * real place of the xattr.
4137  */
4138 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4139                                             struct ocfs2_xattr_search *xs,
4140                                             struct buffer_head *old_bh)
4141 {
4142         char *buf = old_bh->b_data;
4143         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4144         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4145         int i;
4146
4147         xs->header = bucket_xh(xs->bucket);
4148         xs->base = bucket_block(xs->bucket, 0);
4149         xs->end = xs->base + inode->i_sb->s_blocksize;
4150
4151         if (xs->not_found)
4152                 return;
4153
4154         i = xs->here - old_xh->xh_entries;
4155         xs->here = &xs->header->xh_entries[i];
4156 }
4157
4158 static int ocfs2_xattr_create_index_block(struct inode *inode,
4159                                           struct ocfs2_xattr_search *xs,
4160                                           struct ocfs2_xattr_set_ctxt *ctxt)
4161 {
4162         int ret;
4163         u32 bit_off, len;
4164         u64 blkno;
4165         handle_t *handle = ctxt->handle;
4166         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4167         struct ocfs2_inode_info *oi = OCFS2_I(inode);
4168         struct buffer_head *xb_bh = xs->xattr_bh;
4169         struct ocfs2_xattr_block *xb =
4170                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4171         struct ocfs2_xattr_tree_root *xr;
4172         u16 xb_flags = le16_to_cpu(xb->xb_flags);
4173
4174         mlog(0, "create xattr index block for %llu\n",
4175              (unsigned long long)xb_bh->b_blocknr);
4176
4177         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4178         BUG_ON(!xs->bucket);
4179
4180         /*
4181          * XXX:
4182          * We can use this lock for now, and maybe move to a dedicated mutex
4183          * if performance becomes a problem later.
4184          */
4185         down_write(&oi->ip_alloc_sem);
4186
4187         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4188                                       OCFS2_JOURNAL_ACCESS_WRITE);
4189         if (ret) {
4190                 mlog_errno(ret);
4191                 goto out;
4192         }
4193
4194         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
4195                                      1, 1, &bit_off, &len);
4196         if (ret) {
4197                 mlog_errno(ret);
4198                 goto out;
4199         }
4200
4201         /*
4202          * The bucket may spread in many blocks, and
4203          * we will only touch the 1st block and the last block
4204          * in the whole bucket(one for entry and one for data).
4205          */
4206         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4207
4208         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4209              (unsigned long long)blkno);
4210
4211         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4212         if (ret) {
4213                 mlog_errno(ret);
4214                 goto out;
4215         }
4216
4217         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4218                                                 OCFS2_JOURNAL_ACCESS_CREATE);
4219         if (ret) {
4220                 mlog_errno(ret);
4221                 goto out;
4222         }
4223
4224         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4225         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4226
4227         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4228
4229         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4230         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4231                offsetof(struct ocfs2_xattr_block, xb_attrs));
4232
4233         xr = &xb->xb_attrs.xb_root;
4234         xr->xt_clusters = cpu_to_le32(1);
4235         xr->xt_last_eb_blk = 0;
4236         xr->xt_list.l_tree_depth = 0;
4237         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4238         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4239
4240         xr->xt_list.l_recs[0].e_cpos = 0;
4241         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4242         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4243
4244         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4245
4246         ocfs2_journal_dirty(handle, xb_bh);
4247
4248 out:
4249         up_write(&oi->ip_alloc_sem);
4250
4251         return ret;
4252 }
4253
4254 static int cmp_xe_offset(const void *a, const void *b)
4255 {
4256         const struct ocfs2_xattr_entry *l = a, *r = b;
4257         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4258         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4259
4260         if (l_name_offset < r_name_offset)
4261                 return 1;
4262         if (l_name_offset > r_name_offset)
4263                 return -1;
4264         return 0;
4265 }
4266
4267 /*
4268  * defrag a xattr bucket if we find that the bucket has some
4269  * holes beteen name/value pairs.
4270  * We will move all the name/value pairs to the end of the bucket
4271  * so that we can spare some space for insertion.
4272  */
4273 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4274                                      handle_t *handle,
4275                                      struct ocfs2_xattr_bucket *bucket)
4276 {
4277         int ret, i;
4278         size_t end, offset, len;
4279         struct ocfs2_xattr_header *xh;
4280         char *entries, *buf, *bucket_buf = NULL;
4281         u64 blkno = bucket_blkno(bucket);
4282         u16 xh_free_start;
4283         size_t blocksize = inode->i_sb->s_blocksize;
4284         struct ocfs2_xattr_entry *xe;
4285
4286         /*
4287          * In order to make the operation more efficient and generic,
4288          * we copy all the blocks into a contiguous memory and do the
4289          * defragment there, so if anything is error, we will not touch
4290          * the real block.
4291          */
4292         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4293         if (!bucket_buf) {
4294                 ret = -EIO;
4295                 goto out;
4296         }
4297
4298         buf = bucket_buf;
4299         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4300                 memcpy(buf, bucket_block(bucket, i), blocksize);
4301
4302         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4303                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4304         if (ret < 0) {
4305                 mlog_errno(ret);
4306                 goto out;
4307         }
4308
4309         xh = (struct ocfs2_xattr_header *)bucket_buf;
4310         entries = (char *)xh->xh_entries;
4311         xh_free_start = le16_to_cpu(xh->xh_free_start);
4312
4313         mlog(0, "adjust xattr bucket in %llu, count = %u, "
4314              "xh_free_start = %u, xh_name_value_len = %u.\n",
4315              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4316              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4317
4318         /*
4319          * sort all the entries by their offset.
4320          * the largest will be the first, so that we can
4321          * move them to the end one by one.
4322          */
4323         sort(entries, le16_to_cpu(xh->xh_count),
4324              sizeof(struct ocfs2_xattr_entry),
4325              cmp_xe_offset, swap_xe);
4326
4327         /* Move all name/values to the end of the bucket. */
4328         xe = xh->xh_entries;
4329         end = OCFS2_XATTR_BUCKET_SIZE;
4330         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4331                 offset = le16_to_cpu(xe->xe_name_offset);
4332                 len = namevalue_size_xe(xe);
4333
4334                 /*
4335                  * We must make sure that the name/value pair
4336                  * exist in the same block. So adjust end to
4337                  * the previous block end if needed.
4338                  */
4339                 if (((end - len) / blocksize !=
4340                         (end - 1) / blocksize))
4341                         end = end - end % blocksize;
4342
4343                 if (end > offset + len) {
4344                         memmove(bucket_buf + end - len,
4345                                 bucket_buf + offset, len);
4346                         xe->xe_name_offset = cpu_to_le16(end - len);
4347                 }
4348
4349                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4350                                 "bucket %llu\n", (unsigned long long)blkno);
4351
4352                 end -= len;
4353         }
4354
4355         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4356                         "bucket %llu\n", (unsigned long long)blkno);
4357
4358         if (xh_free_start == end)
4359                 goto out;
4360
4361         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4362         xh->xh_free_start = cpu_to_le16(end);
4363
4364         /* sort the entries by their name_hash. */
4365         sort(entries, le16_to_cpu(xh->xh_count),
4366              sizeof(struct ocfs2_xattr_entry),
4367              cmp_xe, swap_xe);
4368
4369         buf = bucket_buf;
4370         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4371                 memcpy(bucket_block(bucket, i), buf, blocksize);
4372         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4373
4374 out:
4375         kfree(bucket_buf);
4376         return ret;
4377 }
4378
4379 /*
4380  * prev_blkno points to the start of an existing extent.  new_blkno
4381  * points to a newly allocated extent.  Because we know each of our
4382  * clusters contains more than bucket, we can easily split one cluster
4383  * at a bucket boundary.  So we take the last cluster of the existing
4384  * extent and split it down the middle.  We move the last half of the
4385  * buckets in the last cluster of the existing extent over to the new
4386  * extent.
4387  *
4388  * first_bh is the buffer at prev_blkno so we can update the existing
4389  * extent's bucket count.  header_bh is the bucket were we were hoping
4390  * to insert our xattr.  If the bucket move places the target in the new
4391  * extent, we'll update first_bh and header_bh after modifying the old
4392  * extent.
4393  *
4394  * first_hash will be set as the 1st xe's name_hash in the new extent.
4395  */
4396 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4397                                                handle_t *handle,
4398                                                struct ocfs2_xattr_bucket *first,
4399                                                struct ocfs2_xattr_bucket *target,
4400                                                u64 new_blkno,
4401                                                u32 num_clusters,
4402                                                u32 *first_hash)
4403 {
4404         int ret;
4405         struct super_block *sb = inode->i_sb;
4406         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4407         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4408         int to_move = num_buckets / 2;
4409         u64 src_blkno;
4410         u64 last_cluster_blkno = bucket_blkno(first) +
4411                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4412
4413         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4414         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4415
4416         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4417              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4418
4419         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4420                                      last_cluster_blkno, new_blkno,
4421                                      to_move, first_hash);
4422         if (ret) {
4423                 mlog_errno(ret);
4424                 goto out;
4425         }
4426
4427         /* This is the first bucket that got moved */
4428         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4429
4430         /*
4431          * If the target bucket was part of the moved buckets, we need to
4432          * update first and target.
4433          */
4434         if (bucket_blkno(target) >= src_blkno) {
4435                 /* Find the block for the new target bucket */
4436                 src_blkno = new_blkno +
4437                         (bucket_blkno(target) - src_blkno);
4438
4439                 ocfs2_xattr_bucket_relse(first);
4440                 ocfs2_xattr_bucket_relse(target);
4441
4442                 /*
4443                  * These shouldn't fail - the buffers are in the
4444                  * journal from ocfs2_cp_xattr_bucket().
4445                  */
4446                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
4447                 if (ret) {
4448                         mlog_errno(ret);
4449                         goto out;
4450                 }
4451                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
4452                 if (ret)
4453                         mlog_errno(ret);
4454
4455         }
4456
4457 out:
4458         return ret;
4459 }
4460
4461 /*
4462  * Find the suitable pos when we divide a bucket into 2.
4463  * We have to make sure the xattrs with the same hash value exist
4464  * in the same bucket.
4465  *
4466  * If this ocfs2_xattr_header covers more than one hash value, find a
4467  * place where the hash value changes.  Try to find the most even split.
4468  * The most common case is that all entries have different hash values,
4469  * and the first check we make will find a place to split.
4470  */
4471 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4472 {
4473         struct ocfs2_xattr_entry *entries = xh->xh_entries;
4474         int count = le16_to_cpu(xh->xh_count);
4475         int delta, middle = count / 2;
4476
4477         /*
4478          * We start at the middle.  Each step gets farther away in both
4479          * directions.  We therefore hit the change in hash value
4480          * nearest to the middle.  Note that this loop does not execute for
4481          * count < 2.
4482          */
4483         for (delta = 0; delta < middle; delta++) {
4484                 /* Let's check delta earlier than middle */
4485                 if (cmp_xe(&entries[middle - delta - 1],
4486                            &entries[middle - delta]))
4487                         return middle - delta;
4488
4489                 /* For even counts, don't walk off the end */
4490                 if ((middle + delta + 1) == count)
4491                         continue;
4492
4493                 /* Now try delta past middle */
4494                 if (cmp_xe(&entries[middle + delta],
4495                            &entries[middle + delta + 1]))
4496                         return middle + delta + 1;
4497         }
4498
4499         /* Every entry had the same hash */
4500         return count;
4501 }
4502
4503 /*
4504  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4505  * first_hash will record the 1st hash of the new bucket.
4506  *
4507  * Normally half of the xattrs will be moved.  But we have to make
4508  * sure that the xattrs with the same hash value are stored in the
4509  * same bucket. If all the xattrs in this bucket have the same hash
4510  * value, the new bucket will be initialized as an empty one and the
4511  * first_hash will be initialized as (hash_value+1).
4512  */
4513 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4514                                     handle_t *handle,
4515                                     u64 blk,
4516                                     u64 new_blk,
4517                                     u32 *first_hash,
4518                                     int new_bucket_head)
4519 {
4520         int ret, i;
4521         int count, start, len, name_value_len = 0, name_offset = 0;
4522         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4523         struct ocfs2_xattr_header *xh;
4524         struct ocfs2_xattr_entry *xe;
4525         int blocksize = inode->i_sb->s_blocksize;
4526
4527         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4528              (unsigned long long)blk, (unsigned long long)new_blk);
4529
4530         s_bucket = ocfs2_xattr_bucket_new(inode);
4531         t_bucket = ocfs2_xattr_bucket_new(inode);
4532         if (!s_bucket || !t_bucket) {
4533                 ret = -ENOMEM;
4534                 mlog_errno(ret);
4535                 goto out;
4536         }
4537
4538         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4539         if (ret) {
4540                 mlog_errno(ret);
4541                 goto out;
4542         }
4543
4544         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4545                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4546         if (ret) {
4547                 mlog_errno(ret);
4548                 goto out;
4549         }
4550
4551         /*
4552          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4553          * there's no need to read it.
4554          */
4555         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4556         if (ret) {
4557                 mlog_errno(ret);
4558                 goto out;
4559         }
4560
4561         /*
4562          * Hey, if we're overwriting t_bucket, what difference does
4563          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4564          * same part of ocfs2_cp_xattr_bucket().
4565          */
4566         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4567                                                 new_bucket_head ?
4568                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4569                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4570         if (ret) {
4571                 mlog_errno(ret);
4572                 goto out;
4573         }
4574
4575         xh = bucket_xh(s_bucket);
4576         count = le16_to_cpu(xh->xh_count);
4577         start = ocfs2_xattr_find_divide_pos(xh);
4578
4579         if (start == count) {
4580                 xe = &xh->xh_entries[start-1];
4581
4582                 /*
4583                  * initialized a new empty bucket here.
4584                  * The hash value is set as one larger than
4585                  * that of the last entry in the previous bucket.
4586                  */
4587                 for (i = 0; i < t_bucket->bu_blocks; i++)
4588                         memset(bucket_block(t_bucket, i), 0, blocksize);
4589
4590                 xh = bucket_xh(t_bucket);
4591                 xh->xh_free_start = cpu_to_le16(blocksize);
4592                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4593                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4594
4595                 goto set_num_buckets;
4596         }
4597
4598         /* copy the whole bucket to the new first. */
4599         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4600
4601         /* update the new bucket. */
4602         xh = bucket_xh(t_bucket);
4603
4604         /*
4605          * Calculate the total name/value len and xh_free_start for
4606          * the old bucket first.
4607          */
4608         name_offset = OCFS2_XATTR_BUCKET_SIZE;
4609         name_value_len = 0;
4610         for (i = 0; i < start; i++) {
4611                 xe = &xh->xh_entries[i];
4612                 name_value_len += namevalue_size_xe(xe);
4613                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4614                         name_offset = le16_to_cpu(xe->xe_name_offset);
4615         }
4616
4617         /*
4618          * Now begin the modification to the new bucket.
4619          *
4620          * In the new bucket, We just move the xattr entry to the beginning
4621          * and don't touch the name/value. So there will be some holes in the
4622          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4623          * called.
4624          */
4625         xe = &xh->xh_entries[start];
4626         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4627         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4628              (int)((char *)xe - (char *)xh),
4629              (int)((char *)xh->xh_entries - (char *)xh));
4630         memmove((char *)xh->xh_entries, (char *)xe, len);
4631         xe = &xh->xh_entries[count - start];
4632         len = sizeof(struct ocfs2_xattr_entry) * start;
4633         memset((char *)xe, 0, len);
4634
4635         le16_add_cpu(&xh->xh_count, -start);
4636         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4637
4638         /* Calculate xh_free_start for the new bucket. */
4639         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4640         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4641                 xe = &xh->xh_entries[i];
4642                 if (le16_to_cpu(xe->xe_name_offset) <
4643                     le16_to_cpu(xh->xh_free_start))
4644                         xh->xh_free_start = xe->xe_name_offset;
4645         }
4646
4647 set_num_buckets:
4648         /* set xh->xh_num_buckets for the new xh. */
4649         if (new_bucket_head)
4650                 xh->xh_num_buckets = cpu_to_le16(1);
4651         else
4652                 xh->xh_num_buckets = 0;
4653
4654         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4655
4656         /* store the first_hash of the new bucket. */
4657         if (first_hash)
4658                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4659
4660         /*
4661          * Now only update the 1st block of the old bucket.  If we
4662          * just added a new empty bucket, there is no need to modify
4663          * it.
4664          */
4665         if (start == count)
4666                 goto out;
4667
4668         xh = bucket_xh(s_bucket);
4669         memset(&xh->xh_entries[start], 0,
4670                sizeof(struct ocfs2_xattr_entry) * (count - start));
4671         xh->xh_count = cpu_to_le16(start);
4672         xh->xh_free_start = cpu_to_le16(name_offset);
4673         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4674
4675         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4676
4677 out:
4678         ocfs2_xattr_bucket_free(s_bucket);
4679         ocfs2_xattr_bucket_free(t_bucket);
4680
4681         return ret;
4682 }
4683
4684 /*
4685  * Copy xattr from one bucket to another bucket.
4686  *
4687  * The caller must make sure that the journal transaction
4688  * has enough space for journaling.
4689  */
4690 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4691                                  handle_t *handle,
4692                                  u64 s_blkno,
4693                                  u64 t_blkno,
4694                                  int t_is_new)
4695 {
4696         int ret;
4697         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4698
4699         BUG_ON(s_blkno == t_blkno);
4700
4701         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4702              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4703              t_is_new);
4704
4705         s_bucket = ocfs2_xattr_bucket_new(inode);
4706         t_bucket = ocfs2_xattr_bucket_new(inode);
4707         if (!s_bucket || !t_bucket) {
4708                 ret = -ENOMEM;
4709                 mlog_errno(ret);
4710                 goto out;
4711         }
4712
4713         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4714         if (ret)
4715                 goto out;
4716
4717         /*
4718          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4719          * there's no need to read it.
4720          */
4721         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4722         if (ret)
4723                 goto out;
4724
4725         /*
4726          * Hey, if we're overwriting t_bucket, what difference does
4727          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4728          * cluster to fill, we came here from
4729          * ocfs2_mv_xattr_buckets(), and it is really new -
4730          * ACCESS_CREATE is required.  But we also might have moved data
4731          * out of t_bucket before extending back into it.
4732          * ocfs2_add_new_xattr_bucket() can do this - its call to
4733          * ocfs2_add_new_xattr_cluster() may have created a new extent
4734          * and copied out the end of the old extent.  Then it re-extends
4735          * the old extent back to create space for new xattrs.  That's
4736          * how we get here, and the bucket isn't really new.
4737          */
4738         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4739                                                 t_is_new ?
4740                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4741                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4742         if (ret)
4743                 goto out;
4744
4745         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4746         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4747
4748 out:
4749         ocfs2_xattr_bucket_free(t_bucket);
4750         ocfs2_xattr_bucket_free(s_bucket);
4751
4752         return ret;
4753 }
4754
4755 /*
4756  * src_blk points to the start of an existing extent.  last_blk points to
4757  * last cluster in that extent.  to_blk points to a newly allocated
4758  * extent.  We copy the buckets from the cluster at last_blk to the new
4759  * extent.  If start_bucket is non-zero, we skip that many buckets before
4760  * we start copying.  The new extent's xh_num_buckets gets set to the
4761  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4762  * by the same amount.
4763  */
4764 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4765                                   u64 src_blk, u64 last_blk, u64 to_blk,
4766                                   unsigned int start_bucket,
4767                                   u32 *first_hash)
4768 {
4769         int i, ret, credits;
4770         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4771         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4772         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4773         struct ocfs2_xattr_bucket *old_first, *new_first;
4774
4775         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4776              (unsigned long long)last_blk, (unsigned long long)to_blk);
4777
4778         BUG_ON(start_bucket >= num_buckets);
4779         if (start_bucket) {
4780                 num_buckets -= start_bucket;
4781                 last_blk += (start_bucket * blks_per_bucket);
4782         }
4783
4784         /* The first bucket of the original extent */
4785         old_first = ocfs2_xattr_bucket_new(inode);
4786         /* The first bucket of the new extent */
4787         new_first = ocfs2_xattr_bucket_new(inode);
4788         if (!old_first || !new_first) {
4789                 ret = -ENOMEM;
4790                 mlog_errno(ret);
4791                 goto out;
4792         }
4793
4794         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4795         if (ret) {
4796                 mlog_errno(ret);
4797                 goto out;
4798         }
4799
4800         /*
4801          * We need to update the first bucket of the old extent and all
4802          * the buckets going to the new extent.
4803          */
4804         credits = ((num_buckets + 1) * blks_per_bucket) +
4805                 handle->h_buffer_credits;
4806         ret = ocfs2_extend_trans(handle, credits);
4807         if (ret) {
4808                 mlog_errno(ret);
4809                 goto out;
4810         }
4811
4812         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4813                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4814         if (ret) {
4815                 mlog_errno(ret);
4816                 goto out;
4817         }
4818
4819         for (i = 0; i < num_buckets; i++) {
4820                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4821                                             last_blk + (i * blks_per_bucket),
4822                                             to_blk + (i * blks_per_bucket),
4823                                             1);
4824                 if (ret) {
4825                         mlog_errno(ret);
4826                         goto out;
4827                 }
4828         }
4829
4830         /*
4831          * Get the new bucket ready before we dirty anything
4832          * (This actually shouldn't fail, because we already dirtied
4833          * it once in ocfs2_cp_xattr_bucket()).
4834          */
4835         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4836         if (ret) {
4837                 mlog_errno(ret);
4838                 goto out;
4839         }
4840         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4841                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4842         if (ret) {
4843                 mlog_errno(ret);
4844                 goto out;
4845         }
4846
4847         /* Now update the headers */
4848         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4849         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4850
4851         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4852         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4853
4854         if (first_hash)
4855                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4856
4857 out:
4858         ocfs2_xattr_bucket_free(new_first);
4859         ocfs2_xattr_bucket_free(old_first);
4860         return ret;
4861 }
4862
4863 /*
4864  * Move some xattrs in this cluster to the new cluster.
4865  * This function should only be called when bucket size == cluster size.
4866  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4867  */
4868 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4869                                       handle_t *handle,
4870                                       u64 prev_blk,
4871                                       u64 new_blk,
4872                                       u32 *first_hash)
4873 {
4874         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4875         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4876
4877         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4878
4879         ret = ocfs2_extend_trans(handle, credits);
4880         if (ret) {
4881                 mlog_errno(ret);
4882                 return ret;
4883         }
4884
4885         /* Move half of the xattr in start_blk to the next bucket. */
4886         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4887                                           new_blk, first_hash, 1);
4888 }
4889
4890 /*
4891  * Move some xattrs from the old cluster to the new one since they are not
4892  * contiguous in ocfs2 xattr tree.
4893  *
4894  * new_blk starts a new separate cluster, and we will move some xattrs from
4895  * prev_blk to it. v_start will be set as the first name hash value in this
4896  * new cluster so that it can be used as e_cpos during tree insertion and
4897  * don't collide with our original b-tree operations. first_bh and header_bh
4898  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4899  * to extend the insert bucket.
4900  *
4901  * The problem is how much xattr should we move to the new one and when should
4902  * we update first_bh and header_bh?
4903  * 1. If cluster size > bucket size, that means the previous cluster has more
4904  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4905  *    update the first_bh and header_bh if the insert bucket has been moved
4906  *    to the new cluster.
4907  * 2. If cluster_size == bucket_size:
4908  *    a) If the previous extent rec has more than one cluster and the insert
4909  *       place isn't in the last cluster, copy the entire last cluster to the
4910  *       new one. This time, we don't need to upate the first_bh and header_bh
4911  *       since they will not be moved into the new cluster.
4912  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4913  *       the new one. And we set the extend flag to zero if the insert place is
4914  *       moved into the new allocated cluster since no extend is needed.
4915  */
4916 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4917                                             handle_t *handle,
4918                                             struct ocfs2_xattr_bucket *first,
4919                                             struct ocfs2_xattr_bucket *target,
4920                                             u64 new_blk,
4921                                             u32 prev_clusters,
4922                                             u32 *v_start,
4923                                             int *extend)
4924 {
4925         int ret;
4926
4927         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4928              (unsigned long long)bucket_blkno(first), prev_clusters,
4929              (unsigned long long)new_blk);
4930
4931         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4932                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4933                                                           handle,
4934                                                           first, target,
4935                                                           new_blk,
4936                                                           prev_clusters,
4937                                                           v_start);
4938                 if (ret)
4939                         mlog_errno(ret);
4940         } else {
4941                 /* The start of the last cluster in the first extent */
4942                 u64 last_blk = bucket_blkno(first) +
4943                         ((prev_clusters - 1) *
4944                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4945
4946                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4947                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4948                                                      bucket_blkno(first),
4949                                                      last_blk, new_blk, 0,
4950                                                      v_start);
4951                         if (ret)
4952                                 mlog_errno(ret);
4953                 } else {
4954                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4955                                                          last_blk, new_blk,
4956                                                          v_start);
4957                         if (ret)
4958                                 mlog_errno(ret);
4959
4960                         if ((bucket_blkno(target) == last_blk) && extend)
4961                                 *extend = 0;
4962                 }
4963         }
4964
4965         return ret;
4966 }
4967
4968 /*
4969  * Add a new cluster for xattr storage.
4970  *
4971  * If the new cluster is contiguous with the previous one, it will be
4972  * appended to the same extent record, and num_clusters will be updated.
4973  * If not, we will insert a new extent for it and move some xattrs in
4974  * the last cluster into the new allocated one.
4975  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4976  * lose the benefits of hashing because we'll have to search large leaves.
4977  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4978  * if it's bigger).
4979  *
4980  * first_bh is the first block of the previous extent rec and header_bh
4981  * indicates the bucket we will insert the new xattrs. They will be updated
4982  * when the header_bh is moved into the new cluster.
4983  */
4984 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4985                                        struct buffer_head *root_bh,
4986                                        struct ocfs2_xattr_bucket *first,
4987                                        struct ocfs2_xattr_bucket *target,
4988                                        u32 *num_clusters,
4989                                        u32 prev_cpos,
4990                                        int *extend,
4991                                        struct ocfs2_xattr_set_ctxt *ctxt)
4992 {
4993         int ret;
4994         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4995         u32 prev_clusters = *num_clusters;
4996         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4997         u64 block;
4998         handle_t *handle = ctxt->handle;
4999         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5000         struct ocfs2_extent_tree et;
5001
5002         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
5003              "previous xattr blkno = %llu\n",
5004              (unsigned long long)OCFS2_I(inode)->ip_blkno,
5005              prev_cpos, (unsigned long long)bucket_blkno(first));
5006
5007         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5008
5009         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5010                                       OCFS2_JOURNAL_ACCESS_WRITE);
5011         if (ret < 0) {
5012                 mlog_errno(ret);
5013                 goto leave;
5014         }
5015
5016         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
5017                                      clusters_to_add, &bit_off, &num_bits);
5018         if (ret < 0) {
5019                 if (ret != -ENOSPC)
5020                         mlog_errno(ret);
5021                 goto leave;
5022         }
5023
5024         BUG_ON(num_bits > clusters_to_add);
5025
5026         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5027         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5028              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5029
5030         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5031             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5032              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5033                 /*
5034                  * If this cluster is contiguous with the old one and
5035                  * adding this new cluster, we don't surpass the limit of
5036                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5037                  * initialized and used like other buckets in the previous
5038                  * cluster.
5039                  * So add it as a contiguous one. The caller will handle
5040                  * its init process.
5041                  */
5042                 v_start = prev_cpos + prev_clusters;
5043                 *num_clusters = prev_clusters + num_bits;
5044                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5045                      num_bits);
5046         } else {
5047                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5048                                                        handle,
5049                                                        first,
5050                                                        target,
5051                                                        block,
5052                                                        prev_clusters,
5053                                                        &v_start,
5054                                                        extend);
5055                 if (ret) {
5056                         mlog_errno(ret);
5057                         goto leave;
5058                 }
5059         }
5060
5061         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5062              num_bits, (unsigned long long)block, v_start);
5063         ret = ocfs2_insert_extent(handle, &et, v_start, block,
5064                                   num_bits, 0, ctxt->meta_ac);
5065         if (ret < 0) {
5066                 mlog_errno(ret);
5067                 goto leave;
5068         }
5069
5070         ret = ocfs2_journal_dirty(handle, root_bh);
5071         if (ret < 0)
5072                 mlog_errno(ret);
5073
5074 leave:
5075         return ret;
5076 }
5077
5078 /*
5079  * We are given an extent.  'first' is the bucket at the very front of
5080  * the extent.  The extent has space for an additional bucket past
5081  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5082  * of the target bucket.  We wish to shift every bucket past the target
5083  * down one, filling in that additional space.  When we get back to the
5084  * target, we split the target between itself and the now-empty bucket
5085  * at target+1 (aka, target_blkno + blks_per_bucket).
5086  */
5087 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5088                                      handle_t *handle,
5089                                      struct ocfs2_xattr_bucket *first,
5090                                      u64 target_blk,
5091                                      u32 num_clusters)
5092 {
5093         int ret, credits;
5094         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5095         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5096         u64 end_blk;
5097         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5098
5099         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5100              "from %llu, len = %u\n", (unsigned long long)target_blk,
5101              (unsigned long long)bucket_blkno(first), num_clusters);
5102
5103         /* The extent must have room for an additional bucket */
5104         BUG_ON(new_bucket >=
5105                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5106
5107         /* end_blk points to the last existing bucket */
5108         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5109
5110         /*
5111          * end_blk is the start of the last existing bucket.
5112          * Thus, (end_blk - target_blk) covers the target bucket and
5113          * every bucket after it up to, but not including, the last
5114          * existing bucket.  Then we add the last existing bucket, the
5115          * new bucket, and the first bucket (3 * blk_per_bucket).
5116          */
5117         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
5118                   handle->h_buffer_credits;
5119         ret = ocfs2_extend_trans(handle, credits);
5120         if (ret) {
5121                 mlog_errno(ret);
5122                 goto out;
5123         }
5124
5125         ret = ocfs2_xattr_bucket_journal_access(handle, first,
5126                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5127         if (ret) {
5128                 mlog_errno(ret);
5129                 goto out;
5130         }
5131
5132         while (end_blk != target_blk) {
5133                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5134                                             end_blk + blk_per_bucket, 0);
5135                 if (ret)
5136                         goto out;
5137                 end_blk -= blk_per_bucket;
5138         }
5139
5140         /* Move half of the xattr in target_blkno to the next bucket. */
5141         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5142                                         target_blk + blk_per_bucket, NULL, 0);
5143
5144         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5145         ocfs2_xattr_bucket_journal_dirty(handle, first);
5146
5147 out:
5148         return ret;
5149 }
5150
5151 /*
5152  * Add new xattr bucket in an extent record and adjust the buckets
5153  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5154  * bucket we want to insert into.
5155  *
5156  * In the easy case, we will move all the buckets after target down by
5157  * one. Half of target's xattrs will be moved to the next bucket.
5158  *
5159  * If current cluster is full, we'll allocate a new one.  This may not
5160  * be contiguous.  The underlying calls will make sure that there is
5161  * space for the insert, shifting buckets around if necessary.
5162  * 'target' may be moved by those calls.
5163  */
5164 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5165                                       struct buffer_head *xb_bh,
5166                                       struct ocfs2_xattr_bucket *target,
5167                                       struct ocfs2_xattr_set_ctxt *ctxt)
5168 {
5169         struct ocfs2_xattr_block *xb =
5170                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5171         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5172         struct ocfs2_extent_list *el = &xb_root->xt_list;
5173         u32 name_hash =
5174                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5175         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5176         int ret, num_buckets, extend = 1;
5177         u64 p_blkno;
5178         u32 e_cpos, num_clusters;
5179         /* The bucket at the front of the extent */
5180         struct ocfs2_xattr_bucket *first;
5181
5182         mlog(0, "Add new xattr bucket starting from %llu\n",
5183              (unsigned long long)bucket_blkno(target));
5184
5185         /* The first bucket of the original extent */
5186         first = ocfs2_xattr_bucket_new(inode);
5187         if (!first) {
5188                 ret = -ENOMEM;
5189                 mlog_errno(ret);
5190                 goto out;
5191         }
5192
5193         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5194                                   &num_clusters, el);
5195         if (ret) {
5196                 mlog_errno(ret);
5197                 goto out;
5198         }
5199
5200         ret = ocfs2_read_xattr_bucket(first, p_blkno);
5201         if (ret) {
5202                 mlog_errno(ret);
5203                 goto out;
5204         }
5205
5206         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5207         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5208                 /*
5209                  * This can move first+target if the target bucket moves
5210                  * to the new extent.
5211                  */
5212                 ret = ocfs2_add_new_xattr_cluster(inode,
5213                                                   xb_bh,
5214                                                   first,
5215                                                   target,
5216                                                   &num_clusters,
5217                                                   e_cpos,
5218                                                   &extend,
5219                                                   ctxt);
5220                 if (ret) {
5221                         mlog_errno(ret);
5222                         goto out;
5223                 }
5224         }
5225
5226         if (extend) {
5227                 ret = ocfs2_extend_xattr_bucket(inode,
5228                                                 ctxt->handle,
5229                                                 first,
5230                                                 bucket_blkno(target),
5231                                                 num_clusters);
5232                 if (ret)
5233                         mlog_errno(ret);
5234         }
5235
5236 out:
5237         ocfs2_xattr_bucket_free(first);
5238
5239         return ret;
5240 }
5241
5242 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5243                                         struct ocfs2_xattr_bucket *bucket,
5244                                         int offs)
5245 {
5246         int block_off = offs >> inode->i_sb->s_blocksize_bits;
5247
5248         offs = offs % inode->i_sb->s_blocksize;
5249         return bucket_block(bucket, block_off) + offs;
5250 }
5251
5252 /*
5253  * Truncate the specified xe_off entry in xattr bucket.
5254  * bucket is indicated by header_bh and len is the new length.
5255  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5256  *
5257  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5258  */
5259 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5260                                              struct ocfs2_xattr_bucket *bucket,
5261                                              int xe_off,
5262                                              int len,
5263                                              struct ocfs2_xattr_set_ctxt *ctxt)
5264 {
5265         int ret, offset;
5266         u64 value_blk;
5267         struct ocfs2_xattr_entry *xe;
5268         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5269         size_t blocksize = inode->i_sb->s_blocksize;
5270         struct ocfs2_xattr_value_buf vb = {
5271                 .vb_access = ocfs2_journal_access,
5272         };
5273
5274         xe = &xh->xh_entries[xe_off];
5275
5276         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5277
5278         offset = le16_to_cpu(xe->xe_name_offset) +
5279                  OCFS2_XATTR_SIZE(xe->xe_name_len);
5280
5281         value_blk = offset / blocksize;
5282
5283         /* We don't allow ocfs2_xattr_value to be stored in different block. */
5284         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5285
5286         vb.vb_bh = bucket->bu_bhs[value_blk];
5287         BUG_ON(!vb.vb_bh);
5288
5289         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5290                 (vb.vb_bh->b_data + offset % blocksize);
5291
5292         /*
5293          * From here on out we have to dirty the bucket.  The generic
5294          * value calls only modify one of the bucket's bhs, but we need
5295          * to send the bucket at once.  So if they error, they *could* have
5296          * modified something.  We have to assume they did, and dirty
5297          * the whole bucket.  This leaves us in a consistent state.
5298          */
5299         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5300              xe_off, (unsigned long long)bucket_blkno(bucket), len);
5301         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5302         if (ret) {
5303                 mlog_errno(ret);
5304                 goto out;
5305         }
5306
5307         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5308                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5309         if (ret) {
5310                 mlog_errno(ret);
5311                 goto out;
5312         }
5313
5314         xe->xe_value_size = cpu_to_le64(len);
5315
5316         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5317
5318 out:
5319         return ret;
5320 }
5321
5322 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5323                                   struct buffer_head *root_bh,
5324                                   u64 blkno,
5325                                   u32 cpos,
5326                                   u32 len,
5327                                   void *para)
5328 {
5329         int ret;
5330         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5331         struct inode *tl_inode = osb->osb_tl_inode;
5332         handle_t *handle;
5333         struct ocfs2_xattr_block *xb =
5334                         (struct ocfs2_xattr_block *)root_bh->b_data;
5335         struct ocfs2_alloc_context *meta_ac = NULL;
5336         struct ocfs2_cached_dealloc_ctxt dealloc;
5337         struct ocfs2_extent_tree et;
5338
5339         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5340                                           ocfs2_delete_xattr_in_bucket, para);
5341         if (ret) {
5342                 mlog_errno(ret);
5343                 return ret;
5344         }
5345
5346         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5347
5348         ocfs2_init_dealloc_ctxt(&dealloc);
5349
5350         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5351              cpos, len, (unsigned long long)blkno);
5352
5353         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5354                                                len);
5355
5356         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5357         if (ret) {
5358                 mlog_errno(ret);
5359                 return ret;
5360         }
5361
5362         mutex_lock(&tl_inode->i_mutex);
5363
5364         if (ocfs2_truncate_log_needs_flush(osb)) {
5365                 ret = __ocfs2_flush_truncate_log(osb);
5366                 if (ret < 0) {
5367                         mlog_errno(ret);
5368                         goto out;
5369                 }
5370         }
5371
5372         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5373         if (IS_ERR(handle)) {
5374                 ret = -ENOMEM;
5375                 mlog_errno(ret);
5376                 goto out;
5377         }
5378
5379         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5380                                       OCFS2_JOURNAL_ACCESS_WRITE);
5381         if (ret) {
5382                 mlog_errno(ret);
5383                 goto out_commit;
5384         }
5385
5386         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5387                                   &dealloc);
5388         if (ret) {
5389                 mlog_errno(ret);
5390                 goto out_commit;
5391         }
5392
5393         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5394
5395         ret = ocfs2_journal_dirty(handle, root_bh);
5396         if (ret) {
5397                 mlog_errno(ret);
5398                 goto out_commit;
5399         }
5400
5401         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5402         if (ret)
5403                 mlog_errno(ret);
5404
5405 out_commit:
5406         ocfs2_commit_trans(osb, handle);
5407 out:
5408         ocfs2_schedule_truncate_log_flush(osb, 1);
5409
5410         mutex_unlock(&tl_inode->i_mutex);
5411
5412         if (meta_ac)
5413                 ocfs2_free_alloc_context(meta_ac);
5414
5415         ocfs2_run_deallocs(osb, &dealloc);
5416
5417         return ret;
5418 }
5419
5420 /*
5421  * Set the xattr name/value in the bucket specified in xs.
5422  */
5423 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
5424                                      struct ocfs2_xattr_info *xi,
5425                                      struct ocfs2_xattr_search *xs,
5426                                      struct ocfs2_xattr_set_ctxt *ctxt)
5427 {
5428         int ret;
5429         u64 blkno;
5430         struct ocfs2_xa_loc loc;
5431
5432         if (!xs->bucket->bu_bhs[1]) {
5433                 blkno = bucket_blkno(xs->bucket);
5434                 ocfs2_xattr_bucket_relse(xs->bucket);
5435                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
5436                 if (ret) {
5437                         mlog_errno(ret);
5438                         goto out;
5439                 }
5440         }
5441
5442         ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5443                                        xs->not_found ? NULL : xs->here);
5444         ret = ocfs2_xa_set(&loc, xi, ctxt);
5445         if (ret) {
5446                 if (ret != -ENOSPC)
5447                         mlog_errno(ret);
5448                 goto out;
5449         }
5450         xs->here = loc.xl_entry;
5451
5452 out:
5453         return ret;
5454 }
5455
5456 /*
5457  * check whether the xattr bucket is filled up with the same hash value.
5458  * If we want to insert the xattr with the same hash, return -ENOSPC.
5459  * If we want to insert a xattr with different hash value, go ahead
5460  * and ocfs2_divide_xattr_bucket will handle this.
5461  */
5462 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5463                                               struct ocfs2_xattr_bucket *bucket,
5464                                               const char *name)
5465 {
5466         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5467         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5468
5469         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5470                 return 0;
5471
5472         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5473             xh->xh_entries[0].xe_name_hash) {
5474                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5475                      "hash = %u\n",
5476                      (unsigned long long)bucket_blkno(bucket),
5477                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5478                 return -ENOSPC;
5479         }
5480
5481         return 0;
5482 }
5483
5484 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5485                                              struct ocfs2_xattr_info *xi,
5486                                              struct ocfs2_xattr_search *xs,
5487                                              struct ocfs2_xattr_set_ctxt *ctxt)
5488 {
5489         struct ocfs2_xattr_header *xh;
5490         struct ocfs2_xattr_entry *xe;
5491         u16 count, header_size, xh_free_start;
5492         int free, max_free, need, old;
5493         size_t value_size = 0;
5494         size_t blocksize = inode->i_sb->s_blocksize;
5495         int ret, allocation = 0;
5496
5497         mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5498
5499 try_again:
5500         xh = xs->header;
5501         count = le16_to_cpu(xh->xh_count);
5502         xh_free_start = le16_to_cpu(xh->xh_free_start);
5503         header_size = sizeof(struct ocfs2_xattr_header) +
5504                         count * sizeof(struct ocfs2_xattr_entry);
5505         max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5506                 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5507
5508         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5509                         "of %u which exceed block size\n",
5510                         (unsigned long long)bucket_blkno(xs->bucket),
5511                         header_size);
5512
5513         if (xi->xi_value && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
5514                 value_size = OCFS2_XATTR_ROOT_SIZE;
5515         else if (xi->xi_value)
5516                 value_size = OCFS2_XATTR_SIZE(xi->xi_value_len);
5517
5518         if (xs->not_found)
5519                 need = sizeof(struct ocfs2_xattr_entry) +
5520                         OCFS2_XATTR_SIZE(xi->xi_name_len) + value_size;
5521         else {
5522                 need = value_size + OCFS2_XATTR_SIZE(xi->xi_name_len);
5523
5524                 /*
5525                  * We only replace the old value if the new length is smaller
5526                  * than the old one. Otherwise we will allocate new space in the
5527                  * bucket to store it.
5528                  */
5529                 xe = xs->here;
5530                 if (ocfs2_xattr_is_local(xe))
5531                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5532                 else
5533                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5534
5535                 if (old >= value_size)
5536                         need = 0;
5537         }
5538
5539         free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5540         /*
5541          * We need to make sure the new name/value pair
5542          * can exist in the same block.
5543          */
5544         if (xh_free_start % blocksize < need)
5545                 free -= xh_free_start % blocksize;
5546
5547         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5548              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5549              " %u\n", xs->not_found,
5550              (unsigned long long)bucket_blkno(xs->bucket),
5551              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5552              le16_to_cpu(xh->xh_name_value_len));
5553
5554         if (free < need ||
5555             (xs->not_found &&
5556              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5557                 if (need <= max_free &&
5558                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5559                         /*
5560                          * We can create the space by defragment. Since only the
5561                          * name/value will be moved, the xe shouldn't be changed
5562                          * in xs.
5563                          */
5564                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5565                                                         xs->bucket);
5566                         if (ret) {
5567                                 mlog_errno(ret);
5568                                 goto out;
5569                         }
5570
5571                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5572                         free = xh_free_start - header_size
5573                                 - OCFS2_XATTR_HEADER_GAP;
5574                         if (xh_free_start % blocksize < need)
5575                                 free -= xh_free_start % blocksize;
5576
5577                         if (free >= need)
5578                                 goto xattr_set;
5579
5580                         mlog(0, "Can't get enough space for xattr insert by "
5581                              "defragment. Need %u bytes, but we have %d, so "
5582                              "allocate new bucket for it.\n", need, free);
5583                 }
5584
5585                 /*
5586                  * We have to add new buckets or clusters and one
5587                  * allocation should leave us enough space for insert.
5588                  */
5589                 BUG_ON(allocation);
5590
5591                 /*
5592                  * We do not allow for overlapping ranges between buckets. And
5593                  * the maximum number of collisions we will allow for then is
5594                  * one bucket's worth, so check it here whether we need to
5595                  * add a new bucket for the insert.
5596                  */
5597                 ret = ocfs2_check_xattr_bucket_collision(inode,
5598                                                          xs->bucket,
5599                                                          xi->xi_name);
5600                 if (ret) {
5601                         mlog_errno(ret);
5602                         goto out;
5603                 }
5604
5605                 ret = ocfs2_add_new_xattr_bucket(inode,
5606                                                  xs->xattr_bh,
5607                                                  xs->bucket,
5608                                                  ctxt);
5609                 if (ret) {
5610                         mlog_errno(ret);
5611                         goto out;
5612                 }
5613
5614                 /*
5615                  * ocfs2_add_new_xattr_bucket() will have updated
5616                  * xs->bucket if it moved, but it will not have updated
5617                  * any of the other search fields.  Thus, we drop it and
5618                  * re-search.  Everything should be cached, so it'll be
5619                  * quick.
5620                  */
5621                 ocfs2_xattr_bucket_relse(xs->bucket);
5622                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5623                                                    xi->xi_name_index,
5624                                                    xi->xi_name, xs);
5625                 if (ret && ret != -ENODATA)
5626                         goto out;
5627                 xs->not_found = ret;
5628                 allocation = 1;
5629                 goto try_again;
5630         }
5631
5632 xattr_set:
5633         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5634 out:
5635         mlog_exit(ret);
5636         return ret;
5637 }
5638
5639 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5640                                         struct ocfs2_xattr_bucket *bucket,
5641                                         void *para)
5642 {
5643         int ret = 0, ref_credits;
5644         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5645         u16 i;
5646         struct ocfs2_xattr_entry *xe;
5647         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5648         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5649         int credits = ocfs2_remove_extent_credits(osb->sb) +
5650                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5651         struct ocfs2_xattr_value_root *xv;
5652         struct ocfs2_rm_xattr_bucket_para *args =
5653                         (struct ocfs2_rm_xattr_bucket_para *)para;
5654
5655         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5656
5657         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5658                 xe = &xh->xh_entries[i];
5659                 if (ocfs2_xattr_is_local(xe))
5660                         continue;
5661
5662                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5663                                                       i, &xv, NULL);
5664
5665                 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5666                                                          args->ref_ci,
5667                                                          args->ref_root_bh,
5668                                                          &ctxt.meta_ac,
5669                                                          &ref_credits);
5670
5671                 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5672                 if (IS_ERR(ctxt.handle)) {
5673                         ret = PTR_ERR(ctxt.handle);
5674                         mlog_errno(ret);
5675                         break;
5676                 }
5677
5678                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5679                                                         i, 0, &ctxt);
5680
5681                 ocfs2_commit_trans(osb, ctxt.handle);
5682                 if (ctxt.meta_ac) {
5683                         ocfs2_free_alloc_context(ctxt.meta_ac);
5684                         ctxt.meta_ac = NULL;
5685                 }
5686                 if (ret) {
5687                         mlog_errno(ret);
5688                         break;
5689                 }
5690         }
5691
5692         if (ctxt.meta_ac)
5693                 ocfs2_free_alloc_context(ctxt.meta_ac);
5694         ocfs2_schedule_truncate_log_flush(osb, 1);
5695         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5696         return ret;
5697 }
5698
5699 /*
5700  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5701  * or change the extent record flag), we need to recalculate
5702  * the metaecc for the whole bucket. So it is done here.
5703  *
5704  * Note:
5705  * We have to give the extra credits for the caller.
5706  */
5707 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5708                                             handle_t *handle,
5709                                             void *para)
5710 {
5711         int ret;
5712         struct ocfs2_xattr_bucket *bucket =
5713                         (struct ocfs2_xattr_bucket *)para;
5714
5715         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5716                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5717         if (ret) {
5718                 mlog_errno(ret);
5719                 return ret;
5720         }
5721
5722         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5723
5724         return 0;
5725 }
5726
5727 /*
5728  * Special action we need if the xattr value is refcounted.
5729  *
5730  * 1. If the xattr is refcounted, lock the tree.
5731  * 2. CoW the xattr if we are setting the new value and the value
5732  *    will be stored outside.
5733  * 3. In other case, decrease_refcount will work for us, so just
5734  *    lock the refcount tree, calculate the meta and credits is OK.
5735  *
5736  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5737  * currently CoW is a completed transaction, while this function
5738  * will also lock the allocators and let us deadlock. So we will
5739  * CoW the whole xattr value.
5740  */
5741 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5742                                         struct ocfs2_dinode *di,
5743                                         struct ocfs2_xattr_info *xi,
5744                                         struct ocfs2_xattr_search *xis,
5745                                         struct ocfs2_xattr_search *xbs,
5746                                         struct ocfs2_refcount_tree **ref_tree,
5747                                         int *meta_add,
5748                                         int *credits)
5749 {
5750         int ret = 0;
5751         struct ocfs2_xattr_block *xb;
5752         struct ocfs2_xattr_entry *xe;
5753         char *base;
5754         u32 p_cluster, num_clusters;
5755         unsigned int ext_flags;
5756         int name_offset, name_len;
5757         struct ocfs2_xattr_value_buf vb;
5758         struct ocfs2_xattr_bucket *bucket = NULL;
5759         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5760         struct ocfs2_post_refcount refcount;
5761         struct ocfs2_post_refcount *p = NULL;
5762         struct buffer_head *ref_root_bh = NULL;
5763
5764         if (!xis->not_found) {
5765                 xe = xis->here;
5766                 name_offset = le16_to_cpu(xe->xe_name_offset);
5767                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5768                 base = xis->base;
5769                 vb.vb_bh = xis->inode_bh;
5770                 vb.vb_access = ocfs2_journal_access_di;
5771         } else {
5772                 int i, block_off = 0;
5773                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5774                 xe = xbs->here;
5775                 name_offset = le16_to_cpu(xe->xe_name_offset);
5776                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5777                 i = xbs->here - xbs->header->xh_entries;
5778
5779                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5780                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5781                                                         bucket_xh(xbs->bucket),
5782                                                         i, &block_off,
5783                                                         &name_offset);
5784                         if (ret) {
5785                                 mlog_errno(ret);
5786                                 goto out;
5787                         }
5788                         base = bucket_block(xbs->bucket, block_off);
5789                         vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5790                         vb.vb_access = ocfs2_journal_access;
5791
5792                         if (ocfs2_meta_ecc(osb)) {
5793                                 /*create parameters for ocfs2_post_refcount. */
5794                                 bucket = xbs->bucket;
5795                                 refcount.credits = bucket->bu_blocks;
5796                                 refcount.para = bucket;
5797                                 refcount.func =
5798                                         ocfs2_xattr_bucket_post_refcount;
5799                                 p = &refcount;
5800                         }
5801                 } else {
5802                         base = xbs->base;
5803                         vb.vb_bh = xbs->xattr_bh;
5804                         vb.vb_access = ocfs2_journal_access_xb;
5805                 }
5806         }
5807
5808         if (ocfs2_xattr_is_local(xe))
5809                 goto out;
5810
5811         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5812                                 (base + name_offset + name_len);
5813
5814         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5815                                        &num_clusters, &vb.vb_xv->xr_list,
5816                                        &ext_flags);
5817         if (ret) {
5818                 mlog_errno(ret);
5819                 goto out;
5820         }
5821
5822         /*
5823          * We just need to check the 1st extent record, since we always
5824          * CoW the whole xattr. So there shouldn't be a xattr with
5825          * some REFCOUNT extent recs after the 1st one.
5826          */
5827         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5828                 goto out;
5829
5830         ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5831                                        1, ref_tree, &ref_root_bh);
5832         if (ret) {
5833                 mlog_errno(ret);
5834                 goto out;
5835         }
5836
5837         /*
5838          * If we are deleting the xattr or the new size will be stored inside,
5839          * cool, leave it there, the xattr truncate process will remove them
5840          * for us(it still needs the refcount tree lock and the meta, credits).
5841          * And the worse case is that every cluster truncate will split the
5842          * refcount tree, and make the original extent become 3. So we will need
5843          * 2 * cluster more extent recs at most.
5844          */
5845         if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5846
5847                 ret = ocfs2_refcounted_xattr_delete_need(inode,
5848                                                          &(*ref_tree)->rf_ci,
5849                                                          ref_root_bh, vb.vb_xv,
5850                                                          meta_add, credits);
5851                 if (ret)
5852                         mlog_errno(ret);
5853                 goto out;
5854         }
5855
5856         ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5857                                        *ref_tree, ref_root_bh, 0,
5858                                        le32_to_cpu(vb.vb_xv->xr_clusters), p);
5859         if (ret)
5860                 mlog_errno(ret);
5861
5862 out:
5863         brelse(ref_root_bh);
5864         return ret;
5865 }
5866
5867 /*
5868  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5869  * The physical clusters will be added to refcount tree.
5870  */
5871 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5872                                 struct ocfs2_xattr_value_root *xv,
5873                                 struct ocfs2_extent_tree *value_et,
5874                                 struct ocfs2_caching_info *ref_ci,
5875                                 struct buffer_head *ref_root_bh,
5876                                 struct ocfs2_cached_dealloc_ctxt *dealloc,
5877                                 struct ocfs2_post_refcount *refcount)
5878 {
5879         int ret = 0;
5880         u32 clusters = le32_to_cpu(xv->xr_clusters);
5881         u32 cpos, p_cluster, num_clusters;
5882         struct ocfs2_extent_list *el = &xv->xr_list;
5883         unsigned int ext_flags;
5884
5885         cpos = 0;
5886         while (cpos < clusters) {
5887                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5888                                                &num_clusters, el, &ext_flags);
5889
5890                 cpos += num_clusters;
5891                 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5892                         continue;
5893
5894                 BUG_ON(!p_cluster);
5895
5896                 ret = ocfs2_add_refcount_flag(inode, value_et,
5897                                               ref_ci, ref_root_bh,
5898                                               cpos - num_clusters,
5899                                               p_cluster, num_clusters,
5900                                               dealloc, refcount);
5901                 if (ret) {
5902                         mlog_errno(ret);
5903                         break;
5904                 }
5905         }
5906
5907         return ret;
5908 }
5909
5910 /*
5911  * Given a normal ocfs2_xattr_header, refcount all the entries which
5912  * have value stored outside.
5913  * Used for xattrs stored in inode and ocfs2_xattr_block.
5914  */
5915 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5916                                 struct ocfs2_xattr_value_buf *vb,
5917                                 struct ocfs2_xattr_header *header,
5918                                 struct ocfs2_caching_info *ref_ci,
5919                                 struct buffer_head *ref_root_bh,
5920                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5921 {
5922
5923         struct ocfs2_xattr_entry *xe;
5924         struct ocfs2_xattr_value_root *xv;
5925         struct ocfs2_extent_tree et;
5926         int i, ret = 0;
5927
5928         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5929                 xe = &header->xh_entries[i];
5930
5931                 if (ocfs2_xattr_is_local(xe))
5932                         continue;
5933
5934                 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5935                         le16_to_cpu(xe->xe_name_offset) +
5936                         OCFS2_XATTR_SIZE(xe->xe_name_len));
5937
5938                 vb->vb_xv = xv;
5939                 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5940
5941                 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5942                                                         ref_ci, ref_root_bh,
5943                                                         dealloc, NULL);
5944                 if (ret) {
5945                         mlog_errno(ret);
5946                         break;
5947                 }
5948         }
5949
5950         return ret;
5951 }
5952
5953 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5954                                 struct buffer_head *fe_bh,
5955                                 struct ocfs2_caching_info *ref_ci,
5956                                 struct buffer_head *ref_root_bh,
5957                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5958 {
5959         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5960         struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5961                                 (fe_bh->b_data + inode->i_sb->s_blocksize -
5962                                 le16_to_cpu(di->i_xattr_inline_size));
5963         struct ocfs2_xattr_value_buf vb = {
5964                 .vb_bh = fe_bh,
5965                 .vb_access = ocfs2_journal_access_di,
5966         };
5967
5968         return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5969                                                   ref_ci, ref_root_bh, dealloc);
5970 }
5971
5972 struct ocfs2_xattr_tree_value_refcount_para {
5973         struct ocfs2_caching_info *ref_ci;
5974         struct buffer_head *ref_root_bh;
5975         struct ocfs2_cached_dealloc_ctxt *dealloc;
5976 };
5977
5978 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5979                                            struct ocfs2_xattr_bucket *bucket,
5980                                            int offset,
5981                                            struct ocfs2_xattr_value_root **xv,
5982                                            struct buffer_head **bh)
5983 {
5984         int ret, block_off, name_offset;
5985         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5986         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5987         void *base;
5988
5989         ret = ocfs2_xattr_bucket_get_name_value(sb,
5990                                                 bucket_xh(bucket),
5991                                                 offset,
5992                                                 &block_off,
5993                                                 &name_offset);
5994         if (ret) {
5995                 mlog_errno(ret);
5996                 goto out;
5997         }
5998
5999         base = bucket_block(bucket, block_off);
6000
6001         *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6002                          OCFS2_XATTR_SIZE(xe->xe_name_len));
6003
6004         if (bh)
6005                 *bh = bucket->bu_bhs[block_off];
6006 out:
6007         return ret;
6008 }
6009
6010 /*
6011  * For a given xattr bucket, refcount all the entries which
6012  * have value stored outside.
6013  */
6014 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6015                                              struct ocfs2_xattr_bucket *bucket,
6016                                              void *para)
6017 {
6018         int i, ret = 0;
6019         struct ocfs2_extent_tree et;
6020         struct ocfs2_xattr_tree_value_refcount_para *ref =
6021                         (struct ocfs2_xattr_tree_value_refcount_para *)para;
6022         struct ocfs2_xattr_header *xh =
6023                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6024         struct ocfs2_xattr_entry *xe;
6025         struct ocfs2_xattr_value_buf vb = {
6026                 .vb_access = ocfs2_journal_access,
6027         };
6028         struct ocfs2_post_refcount refcount = {
6029                 .credits = bucket->bu_blocks,
6030                 .para = bucket,
6031                 .func = ocfs2_xattr_bucket_post_refcount,
6032         };
6033         struct ocfs2_post_refcount *p = NULL;
6034
6035         /* We only need post_refcount if we support metaecc. */
6036         if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6037                 p = &refcount;
6038
6039         mlog(0, "refcount bucket %llu, count = %u\n",
6040              (unsigned long long)bucket_blkno(bucket),
6041              le16_to_cpu(xh->xh_count));
6042         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6043                 xe = &xh->xh_entries[i];
6044
6045                 if (ocfs2_xattr_is_local(xe))
6046                         continue;
6047
6048                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6049                                                       &vb.vb_xv, &vb.vb_bh);
6050                 if (ret) {
6051                         mlog_errno(ret);
6052                         break;
6053                 }
6054
6055                 ocfs2_init_xattr_value_extent_tree(&et,
6056                                                    INODE_CACHE(inode), &vb);
6057
6058                 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6059                                                         &et, ref->ref_ci,
6060                                                         ref->ref_root_bh,
6061                                                         ref->dealloc, p);
6062                 if (ret) {
6063                         mlog_errno(ret);
6064                         break;
6065                 }
6066         }
6067
6068         return ret;
6069
6070 }
6071
6072 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6073                                      struct buffer_head *root_bh,
6074                                      u64 blkno, u32 cpos, u32 len, void *para)
6075 {
6076         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6077                                            ocfs2_xattr_bucket_value_refcount,
6078                                            para);
6079 }
6080
6081 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6082                                 struct buffer_head *blk_bh,
6083                                 struct ocfs2_caching_info *ref_ci,
6084                                 struct buffer_head *ref_root_bh,
6085                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6086 {
6087         int ret = 0;
6088         struct ocfs2_xattr_block *xb =
6089                                 (struct ocfs2_xattr_block *)blk_bh->b_data;
6090
6091         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6092                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6093                 struct ocfs2_xattr_value_buf vb = {
6094                         .vb_bh = blk_bh,
6095                         .vb_access = ocfs2_journal_access_xb,
6096                 };
6097
6098                 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6099                                                          ref_ci, ref_root_bh,
6100                                                          dealloc);
6101         } else {
6102                 struct ocfs2_xattr_tree_value_refcount_para para = {
6103                         .ref_ci = ref_ci,
6104                         .ref_root_bh = ref_root_bh,
6105                         .dealloc = dealloc,
6106                 };
6107
6108                 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6109                                                 ocfs2_refcount_xattr_tree_rec,
6110                                                 &para);
6111         }
6112
6113         return ret;
6114 }
6115
6116 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6117                                      struct buffer_head *fe_bh,
6118                                      struct ocfs2_caching_info *ref_ci,
6119                                      struct buffer_head *ref_root_bh,
6120                                      struct ocfs2_cached_dealloc_ctxt *dealloc)
6121 {
6122         int ret = 0;
6123         struct ocfs2_inode_info *oi = OCFS2_I(inode);
6124         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6125         struct buffer_head *blk_bh = NULL;
6126
6127         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6128                 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6129                                                          ref_ci, ref_root_bh,
6130                                                          dealloc);
6131                 if (ret) {
6132                         mlog_errno(ret);
6133                         goto out;
6134                 }
6135         }
6136
6137         if (!di->i_xattr_loc)
6138                 goto out;
6139
6140         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6141                                      &blk_bh);
6142         if (ret < 0) {
6143                 mlog_errno(ret);
6144                 goto out;
6145         }
6146
6147         ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6148                                                 ref_root_bh, dealloc);
6149         if (ret)
6150                 mlog_errno(ret);
6151
6152         brelse(blk_bh);
6153 out:
6154
6155         return ret;
6156 }
6157
6158 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6159 /*
6160  * Store the information we need in xattr reflink.
6161  * old_bh and new_bh are inode bh for the old and new inode.
6162  */
6163 struct ocfs2_xattr_reflink {
6164         struct inode *old_inode;
6165         struct inode *new_inode;
6166         struct buffer_head *old_bh;
6167         struct buffer_head *new_bh;
6168         struct ocfs2_caching_info *ref_ci;
6169         struct buffer_head *ref_root_bh;
6170         struct ocfs2_cached_dealloc_ctxt *dealloc;
6171         should_xattr_reflinked *xattr_reflinked;
6172 };
6173
6174 /*
6175  * Given a xattr header and xe offset,
6176  * return the proper xv and the corresponding bh.
6177  * xattr in inode, block and xattr tree have different implementaions.
6178  */
6179 typedef int (get_xattr_value_root)(struct super_block *sb,
6180                                    struct buffer_head *bh,
6181                                    struct ocfs2_xattr_header *xh,
6182                                    int offset,
6183                                    struct ocfs2_xattr_value_root **xv,
6184                                    struct buffer_head **ret_bh,
6185                                    void *para);
6186
6187 /*
6188  * Calculate all the xattr value root metadata stored in this xattr header and
6189  * credits we need if we create them from the scratch.
6190  * We use get_xattr_value_root so that all types of xattr container can use it.
6191  */
6192 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6193                                              struct buffer_head *bh,
6194                                              struct ocfs2_xattr_header *xh,
6195                                              int *metas, int *credits,
6196                                              int *num_recs,
6197                                              get_xattr_value_root *func,
6198                                              void *para)
6199 {
6200         int i, ret = 0;
6201         struct ocfs2_xattr_value_root *xv;
6202         struct ocfs2_xattr_entry *xe;
6203
6204         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6205                 xe = &xh->xh_entries[i];
6206                 if (ocfs2_xattr_is_local(xe))
6207                         continue;
6208
6209                 ret = func(sb, bh, xh, i, &xv, NULL, para);
6210                 if (ret) {
6211                         mlog_errno(ret);
6212                         break;
6213                 }
6214
6215                 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6216                           le16_to_cpu(xv->xr_list.l_next_free_rec);
6217
6218                 *credits += ocfs2_calc_extend_credits(sb,
6219                                                 &def_xv.xv.xr_list,
6220                                                 le32_to_cpu(xv->xr_clusters));
6221
6222                 /*
6223                  * If the value is a tree with depth > 1, We don't go deep
6224                  * to the extent block, so just calculate a maximum record num.
6225                  */
6226                 if (!xv->xr_list.l_tree_depth)
6227                         *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6228                 else
6229                         *num_recs += ocfs2_clusters_for_bytes(sb,
6230                                                               XATTR_SIZE_MAX);
6231         }
6232
6233         return ret;
6234 }
6235
6236 /* Used by xattr inode and block to return the right xv and buffer_head. */
6237 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6238                                       struct buffer_head *bh,
6239                                       struct ocfs2_xattr_header *xh,
6240                                       int offset,
6241                                       struct ocfs2_xattr_value_root **xv,
6242                                       struct buffer_head **ret_bh,
6243                                       void *para)
6244 {
6245         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6246
6247         *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6248                 le16_to_cpu(xe->xe_name_offset) +
6249                 OCFS2_XATTR_SIZE(xe->xe_name_len));
6250
6251         if (ret_bh)
6252                 *ret_bh = bh;
6253
6254         return 0;
6255 }
6256
6257 /*
6258  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6259  * It is only used for inline xattr and xattr block.
6260  */
6261 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6262                                         struct ocfs2_xattr_header *xh,
6263                                         struct buffer_head *ref_root_bh,
6264                                         int *credits,
6265                                         struct ocfs2_alloc_context **meta_ac)
6266 {
6267         int ret, meta_add = 0, num_recs = 0;
6268         struct ocfs2_refcount_block *rb =
6269                         (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6270
6271         *credits = 0;
6272
6273         ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6274                                                 &meta_add, credits, &num_recs,
6275                                                 ocfs2_get_xattr_value_root,
6276                                                 NULL);
6277         if (ret) {
6278                 mlog_errno(ret);
6279                 goto out;
6280         }
6281
6282         /*
6283          * We need to add/modify num_recs in refcount tree, so just calculate
6284          * an approximate number we need for refcount tree change.
6285          * Sometimes we need to split the tree, and after split,  half recs
6286          * will be moved to the new block, and a new block can only provide
6287          * half number of recs. So we multiple new blocks by 2.
6288          */
6289         num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6290         meta_add += num_recs;
6291         *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6292         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6293                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6294                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6295         else
6296                 *credits += 1;
6297
6298         ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6299         if (ret)
6300                 mlog_errno(ret);
6301
6302 out:
6303         return ret;
6304 }
6305
6306 /*
6307  * Given a xattr header, reflink all the xattrs in this container.
6308  * It can be used for inode, block and bucket.
6309  *
6310  * NOTE:
6311  * Before we call this function, the caller has memcpy the xattr in
6312  * old_xh to the new_xh.
6313  *
6314  * If args.xattr_reflinked is set, call it to decide whether the xe should
6315  * be reflinked or not. If not, remove it from the new xattr header.
6316  */
6317 static int ocfs2_reflink_xattr_header(handle_t *handle,
6318                                       struct ocfs2_xattr_reflink *args,
6319                                       struct buffer_head *old_bh,
6320                                       struct ocfs2_xattr_header *xh,
6321                                       struct buffer_head *new_bh,
6322                                       struct ocfs2_xattr_header *new_xh,
6323                                       struct ocfs2_xattr_value_buf *vb,
6324                                       struct ocfs2_alloc_context *meta_ac,
6325                                       get_xattr_value_root *func,
6326                                       void *para)
6327 {
6328         int ret = 0, i, j;
6329         struct super_block *sb = args->old_inode->i_sb;
6330         struct buffer_head *value_bh;
6331         struct ocfs2_xattr_entry *xe, *last;
6332         struct ocfs2_xattr_value_root *xv, *new_xv;
6333         struct ocfs2_extent_tree data_et;
6334         u32 clusters, cpos, p_cluster, num_clusters;
6335         unsigned int ext_flags = 0;
6336
6337         mlog(0, "reflink xattr in container %llu, count = %u\n",
6338              (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6339
6340         last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6341         for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6342                 xe = &xh->xh_entries[i];
6343
6344                 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6345                         xe = &new_xh->xh_entries[j];
6346
6347                         le16_add_cpu(&new_xh->xh_count, -1);
6348                         if (new_xh->xh_count) {
6349                                 memmove(xe, xe + 1,
6350                                         (void *)last - (void *)xe);
6351                                 memset(last, 0,
6352                                        sizeof(struct ocfs2_xattr_entry));
6353                         }
6354
6355                         /*
6356                          * We don't want j to increase in the next round since
6357                          * it is already moved ahead.
6358                          */
6359                         j--;
6360                         continue;
6361                 }
6362
6363                 if (ocfs2_xattr_is_local(xe))
6364                         continue;
6365
6366                 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6367                 if (ret) {
6368                         mlog_errno(ret);
6369                         break;
6370                 }
6371
6372                 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6373                 if (ret) {
6374                         mlog_errno(ret);
6375                         break;
6376                 }
6377
6378                 /*
6379                  * For the xattr which has l_tree_depth = 0, all the extent
6380                  * recs have already be copied to the new xh with the
6381                  * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6382                  * increase the refount count int the refcount tree.
6383                  *
6384                  * For the xattr which has l_tree_depth > 0, we need
6385                  * to initialize it to the empty default value root,
6386                  * and then insert the extents one by one.
6387                  */
6388                 if (xv->xr_list.l_tree_depth) {
6389                         memcpy(new_xv, &def_xv, sizeof(def_xv));
6390                         vb->vb_xv = new_xv;
6391                         vb->vb_bh = value_bh;
6392                         ocfs2_init_xattr_value_extent_tree(&data_et,
6393                                         INODE_CACHE(args->new_inode), vb);
6394                 }
6395
6396                 clusters = le32_to_cpu(xv->xr_clusters);
6397                 cpos = 0;
6398                 while (cpos < clusters) {
6399                         ret = ocfs2_xattr_get_clusters(args->old_inode,
6400                                                        cpos,
6401                                                        &p_cluster,
6402                                                        &num_clusters,
6403                                                        &xv->xr_list,
6404                                                        &ext_flags);
6405                         if (ret) {
6406                                 mlog_errno(ret);
6407                                 goto out;
6408                         }
6409
6410                         BUG_ON(!p_cluster);
6411
6412                         if (xv->xr_list.l_tree_depth) {
6413                                 ret = ocfs2_insert_extent(handle,
6414                                                 &data_et, cpos,
6415                                                 ocfs2_clusters_to_blocks(
6416                                                         args->old_inode->i_sb,
6417                                                         p_cluster),
6418                                                 num_clusters, ext_flags,
6419                                                 meta_ac);
6420                                 if (ret) {
6421                                         mlog_errno(ret);
6422                                         goto out;
6423                                 }
6424                         }
6425
6426                         ret = ocfs2_increase_refcount(handle, args->ref_ci,
6427                                                       args->ref_root_bh,
6428                                                       p_cluster, num_clusters,
6429                                                       meta_ac, args->dealloc);
6430                         if (ret) {
6431                                 mlog_errno(ret);
6432                                 goto out;
6433                         }
6434
6435                         cpos += num_clusters;
6436                 }
6437         }
6438
6439 out:
6440         return ret;
6441 }
6442
6443 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6444 {
6445         int ret = 0, credits = 0;
6446         handle_t *handle;
6447         struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6448         struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6449         int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6450         int header_off = osb->sb->s_blocksize - inline_size;
6451         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6452                                         (args->old_bh->b_data + header_off);
6453         struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6454                                         (args->new_bh->b_data + header_off);
6455         struct ocfs2_alloc_context *meta_ac = NULL;
6456         struct ocfs2_inode_info *new_oi;
6457         struct ocfs2_dinode *new_di;
6458         struct ocfs2_xattr_value_buf vb = {
6459                 .vb_bh = args->new_bh,
6460                 .vb_access = ocfs2_journal_access_di,
6461         };
6462
6463         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6464                                                   &credits, &meta_ac);
6465         if (ret) {
6466                 mlog_errno(ret);
6467                 goto out;
6468         }
6469
6470         handle = ocfs2_start_trans(osb, credits);
6471         if (IS_ERR(handle)) {
6472                 ret = PTR_ERR(handle);
6473                 mlog_errno(ret);
6474                 goto out;
6475         }
6476
6477         ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6478                                       args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6479         if (ret) {
6480                 mlog_errno(ret);
6481                 goto out_commit;
6482         }
6483
6484         memcpy(args->new_bh->b_data + header_off,
6485                args->old_bh->b_data + header_off, inline_size);
6486
6487         new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6488         new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6489
6490         ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6491                                          args->new_bh, new_xh, &vb, meta_ac,
6492                                          ocfs2_get_xattr_value_root, NULL);
6493         if (ret) {
6494                 mlog_errno(ret);
6495                 goto out_commit;
6496         }
6497
6498         new_oi = OCFS2_I(args->new_inode);
6499         spin_lock(&new_oi->ip_lock);
6500         new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6501         new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6502         spin_unlock(&new_oi->ip_lock);
6503
6504         ocfs2_journal_dirty(handle, args->new_bh);
6505
6506 out_commit:
6507         ocfs2_commit_trans(osb, handle);
6508
6509 out:
6510         if (meta_ac)
6511                 ocfs2_free_alloc_context(meta_ac);
6512         return ret;
6513 }
6514
6515 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6516                                           struct buffer_head *fe_bh,
6517                                           struct buffer_head **ret_bh,
6518                                           int indexed)
6519 {
6520         int ret;
6521         handle_t *handle;
6522         struct ocfs2_alloc_context *meta_ac;
6523         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6524
6525         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6526         if (ret < 0) {
6527                 mlog_errno(ret);
6528                 return ret;
6529         }
6530
6531         handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6532         if (IS_ERR(handle)) {
6533                 ret = PTR_ERR(handle);
6534                 mlog_errno(ret);
6535                 goto out;
6536         }
6537
6538         mlog(0, "create new xattr block for inode %llu, index = %d\n",
6539              (unsigned long long)fe_bh->b_blocknr, indexed);
6540         ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6541                                        meta_ac, ret_bh, indexed);
6542         if (ret)
6543                 mlog_errno(ret);
6544
6545         ocfs2_commit_trans(osb, handle);
6546 out:
6547         ocfs2_free_alloc_context(meta_ac);
6548         return ret;
6549 }
6550
6551 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6552                                      struct buffer_head *blk_bh,
6553                                      struct buffer_head *new_blk_bh)
6554 {
6555         int ret = 0, credits = 0;
6556         handle_t *handle;
6557         struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6558         struct ocfs2_dinode *new_di;
6559         struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6560         int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6561         struct ocfs2_xattr_block *xb =
6562                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6563         struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6564         struct ocfs2_xattr_block *new_xb =
6565                         (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6566         struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6567         struct ocfs2_alloc_context *meta_ac;
6568         struct ocfs2_xattr_value_buf vb = {
6569                 .vb_bh = new_blk_bh,
6570                 .vb_access = ocfs2_journal_access_xb,
6571         };
6572
6573         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6574                                                   &credits, &meta_ac);
6575         if (ret) {
6576                 mlog_errno(ret);
6577                 return ret;
6578         }
6579
6580         /* One more credits in case we need to add xattr flags in new inode. */
6581         handle = ocfs2_start_trans(osb, credits + 1);
6582         if (IS_ERR(handle)) {
6583                 ret = PTR_ERR(handle);
6584                 mlog_errno(ret);
6585                 goto out;
6586         }
6587
6588         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6589                 ret = ocfs2_journal_access_di(handle,
6590                                               INODE_CACHE(args->new_inode),
6591                                               args->new_bh,
6592                                               OCFS2_JOURNAL_ACCESS_WRITE);
6593                 if (ret) {
6594                         mlog_errno(ret);
6595                         goto out_commit;
6596                 }
6597         }
6598
6599         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6600                                       new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6601         if (ret) {
6602                 mlog_errno(ret);
6603                 goto out_commit;
6604         }
6605
6606         memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6607                osb->sb->s_blocksize - header_off);
6608
6609         ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6610                                          new_blk_bh, new_xh, &vb, meta_ac,
6611                                          ocfs2_get_xattr_value_root, NULL);
6612         if (ret) {
6613                 mlog_errno(ret);
6614                 goto out_commit;
6615         }
6616
6617         ocfs2_journal_dirty(handle, new_blk_bh);
6618
6619         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6620                 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6621                 spin_lock(&new_oi->ip_lock);
6622                 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6623                 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6624                 spin_unlock(&new_oi->ip_lock);
6625
6626                 ocfs2_journal_dirty(handle, args->new_bh);
6627         }
6628
6629 out_commit:
6630         ocfs2_commit_trans(osb, handle);
6631
6632 out:
6633         ocfs2_free_alloc_context(meta_ac);
6634         return ret;
6635 }
6636
6637 struct ocfs2_reflink_xattr_tree_args {
6638         struct ocfs2_xattr_reflink *reflink;
6639         struct buffer_head *old_blk_bh;
6640         struct buffer_head *new_blk_bh;
6641         struct ocfs2_xattr_bucket *old_bucket;
6642         struct ocfs2_xattr_bucket *new_bucket;
6643 };
6644
6645 /*
6646  * NOTE:
6647  * We have to handle the case that both old bucket and new bucket
6648  * will call this function to get the right ret_bh.
6649  * So The caller must give us the right bh.
6650  */
6651 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6652                                         struct buffer_head *bh,
6653                                         struct ocfs2_xattr_header *xh,
6654                                         int offset,
6655                                         struct ocfs2_xattr_value_root **xv,
6656                                         struct buffer_head **ret_bh,
6657                                         void *para)
6658 {
6659         struct ocfs2_reflink_xattr_tree_args *args =
6660                         (struct ocfs2_reflink_xattr_tree_args *)para;
6661         struct ocfs2_xattr_bucket *bucket;
6662
6663         if (bh == args->old_bucket->bu_bhs[0])
6664                 bucket = args->old_bucket;
6665         else
6666                 bucket = args->new_bucket;
6667
6668         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6669                                                xv, ret_bh);
6670 }
6671
6672 struct ocfs2_value_tree_metas {
6673         int num_metas;
6674         int credits;
6675         int num_recs;
6676 };
6677
6678 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6679                                         struct buffer_head *bh,
6680                                         struct ocfs2_xattr_header *xh,
6681                                         int offset,
6682                                         struct ocfs2_xattr_value_root **xv,
6683                                         struct buffer_head **ret_bh,
6684                                         void *para)
6685 {
6686         struct ocfs2_xattr_bucket *bucket =
6687                                 (struct ocfs2_xattr_bucket *)para;
6688
6689         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6690                                                xv, ret_bh);
6691 }
6692
6693 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6694                                       struct ocfs2_xattr_bucket *bucket,
6695                                       void *para)
6696 {
6697         struct ocfs2_value_tree_metas *metas =
6698                         (struct ocfs2_value_tree_metas *)para;
6699         struct ocfs2_xattr_header *xh =
6700                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6701
6702         /* Add the credits for this bucket first. */
6703         metas->credits += bucket->bu_blocks;
6704         return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6705                                         xh, &metas->num_metas,
6706                                         &metas->credits, &metas->num_recs,
6707                                         ocfs2_value_tree_metas_in_bucket,
6708                                         bucket);
6709 }
6710
6711 /*
6712  * Given a xattr extent rec starting from blkno and having len clusters,
6713  * iterate all the buckets calculate how much metadata we need for reflinking
6714  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6715  */
6716 static int ocfs2_lock_reflink_xattr_rec_allocators(
6717                                 struct ocfs2_reflink_xattr_tree_args *args,
6718                                 struct ocfs2_extent_tree *xt_et,
6719                                 u64 blkno, u32 len, int *credits,
6720                                 struct ocfs2_alloc_context **meta_ac,
6721                                 struct ocfs2_alloc_context **data_ac)
6722 {
6723         int ret, num_free_extents;
6724         struct ocfs2_value_tree_metas metas;
6725         struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6726         struct ocfs2_refcount_block *rb;
6727
6728         memset(&metas, 0, sizeof(metas));
6729
6730         ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6731                                           ocfs2_calc_value_tree_metas, &metas);
6732         if (ret) {
6733                 mlog_errno(ret);
6734                 goto out;
6735         }
6736
6737         *credits = metas.credits;
6738
6739         /*
6740          * Calculate we need for refcount tree change.
6741          *
6742          * We need to add/modify num_recs in refcount tree, so just calculate
6743          * an approximate number we need for refcount tree change.
6744          * Sometimes we need to split the tree, and after split,  half recs
6745          * will be moved to the new block, and a new block can only provide
6746          * half number of recs. So we multiple new blocks by 2.
6747          * In the end, we have to add credits for modifying the already
6748          * existed refcount block.
6749          */
6750         rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6751         metas.num_recs =
6752                 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6753                  ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6754         metas.num_metas += metas.num_recs;
6755         *credits += metas.num_recs +
6756                     metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6757         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6758                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6759                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6760         else
6761                 *credits += 1;
6762
6763         /* count in the xattr tree change. */
6764         num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6765         if (num_free_extents < 0) {
6766                 ret = num_free_extents;
6767                 mlog_errno(ret);
6768                 goto out;
6769         }
6770
6771         if (num_free_extents < len)
6772                 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6773
6774         *credits += ocfs2_calc_extend_credits(osb->sb,
6775                                               xt_et->et_root_el, len);
6776
6777         if (metas.num_metas) {
6778                 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6779                                                         meta_ac);
6780                 if (ret) {
6781                         mlog_errno(ret);
6782                         goto out;
6783                 }
6784         }
6785
6786         if (len) {
6787                 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6788                 if (ret)
6789                         mlog_errno(ret);
6790         }
6791 out:
6792         if (ret) {
6793                 if (*meta_ac) {
6794                         ocfs2_free_alloc_context(*meta_ac);
6795                         meta_ac = NULL;
6796                 }
6797         }
6798
6799         return ret;
6800 }
6801
6802 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6803                                 u64 blkno, u64 new_blkno, u32 clusters,
6804                                 struct ocfs2_alloc_context *meta_ac,
6805                                 struct ocfs2_alloc_context *data_ac,
6806                                 struct ocfs2_reflink_xattr_tree_args *args)
6807 {
6808         int i, j, ret = 0;
6809         struct super_block *sb = args->reflink->old_inode->i_sb;
6810         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6811         u32 num_buckets = clusters * bpc;
6812         int bpb = args->old_bucket->bu_blocks;
6813         struct ocfs2_xattr_value_buf vb = {
6814                 .vb_access = ocfs2_journal_access,
6815         };
6816
6817         for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6818                 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6819                 if (ret) {
6820                         mlog_errno(ret);
6821                         break;
6822                 }
6823
6824                 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6825                 if (ret) {
6826                         mlog_errno(ret);
6827                         break;
6828                 }
6829
6830                 /*
6831                  * The real bucket num in this series of blocks is stored
6832                  * in the 1st bucket.
6833                  */
6834                 if (i == 0)
6835                         num_buckets = le16_to_cpu(
6836                                 bucket_xh(args->old_bucket)->xh_num_buckets);
6837
6838                 ret = ocfs2_xattr_bucket_journal_access(handle,
6839                                                 args->new_bucket,
6840                                                 OCFS2_JOURNAL_ACCESS_CREATE);
6841                 if (ret) {
6842                         mlog_errno(ret);
6843                         break;
6844                 }
6845
6846                 for (j = 0; j < bpb; j++)
6847                         memcpy(bucket_block(args->new_bucket, j),
6848                                bucket_block(args->old_bucket, j),
6849                                sb->s_blocksize);
6850
6851                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6852
6853                 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6854                                         args->old_bucket->bu_bhs[0],
6855                                         bucket_xh(args->old_bucket),
6856                                         args->new_bucket->bu_bhs[0],
6857                                         bucket_xh(args->new_bucket),
6858                                         &vb, meta_ac,
6859                                         ocfs2_get_reflink_xattr_value_root,
6860                                         args);
6861                 if (ret) {
6862                         mlog_errno(ret);
6863                         break;
6864                 }
6865
6866                 /*
6867                  * Re-access and dirty the bucket to calculate metaecc.
6868                  * Because we may extend the transaction in reflink_xattr_header
6869                  * which will let the already accessed block gone.
6870                  */
6871                 ret = ocfs2_xattr_bucket_journal_access(handle,
6872                                                 args->new_bucket,
6873                                                 OCFS2_JOURNAL_ACCESS_WRITE);
6874                 if (ret) {
6875                         mlog_errno(ret);
6876                         break;
6877                 }
6878
6879                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6880                 ocfs2_xattr_bucket_relse(args->old_bucket);
6881                 ocfs2_xattr_bucket_relse(args->new_bucket);
6882         }
6883
6884         ocfs2_xattr_bucket_relse(args->old_bucket);
6885         ocfs2_xattr_bucket_relse(args->new_bucket);
6886         return ret;
6887 }
6888 /*
6889  * Create the same xattr extent record in the new inode's xattr tree.
6890  */
6891 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6892                                    struct buffer_head *root_bh,
6893                                    u64 blkno,
6894                                    u32 cpos,
6895                                    u32 len,
6896                                    void *para)
6897 {
6898         int ret, credits = 0;
6899         u32 p_cluster, num_clusters;
6900         u64 new_blkno;
6901         handle_t *handle;
6902         struct ocfs2_reflink_xattr_tree_args *args =
6903                         (struct ocfs2_reflink_xattr_tree_args *)para;
6904         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6905         struct ocfs2_alloc_context *meta_ac = NULL;
6906         struct ocfs2_alloc_context *data_ac = NULL;
6907         struct ocfs2_extent_tree et;
6908
6909         ocfs2_init_xattr_tree_extent_tree(&et,
6910                                           INODE_CACHE(args->reflink->new_inode),
6911                                           args->new_blk_bh);
6912
6913         ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6914                                                       len, &credits,
6915                                                       &meta_ac, &data_ac);
6916         if (ret) {
6917                 mlog_errno(ret);
6918                 goto out;
6919         }
6920
6921         handle = ocfs2_start_trans(osb, credits);
6922         if (IS_ERR(handle)) {
6923                 ret = PTR_ERR(handle);
6924                 mlog_errno(ret);
6925                 goto out;
6926         }
6927
6928         ret = ocfs2_claim_clusters(osb, handle, data_ac,
6929                                    len, &p_cluster, &num_clusters);
6930         if (ret) {
6931                 mlog_errno(ret);
6932                 goto out_commit;
6933         }
6934
6935         new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6936
6937         mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6938              (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6939         ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6940                                           meta_ac, data_ac, args);
6941         if (ret) {
6942                 mlog_errno(ret);
6943                 goto out_commit;
6944         }
6945
6946         mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6947              (unsigned long long)new_blkno, len, cpos);
6948         ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6949                                   len, 0, meta_ac);
6950         if (ret)
6951                 mlog_errno(ret);
6952
6953 out_commit:
6954         ocfs2_commit_trans(osb, handle);
6955
6956 out:
6957         if (meta_ac)
6958                 ocfs2_free_alloc_context(meta_ac);
6959         if (data_ac)
6960                 ocfs2_free_alloc_context(data_ac);
6961         return ret;
6962 }
6963
6964 /*
6965  * Create reflinked xattr buckets.
6966  * We will add bucket one by one, and refcount all the xattrs in the bucket
6967  * if they are stored outside.
6968  */
6969 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6970                                     struct buffer_head *blk_bh,
6971                                     struct buffer_head *new_blk_bh)
6972 {
6973         int ret;
6974         struct ocfs2_reflink_xattr_tree_args para;
6975
6976         memset(&para, 0, sizeof(para));
6977         para.reflink = args;
6978         para.old_blk_bh = blk_bh;
6979         para.new_blk_bh = new_blk_bh;
6980
6981         para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6982         if (!para.old_bucket) {
6983                 mlog_errno(-ENOMEM);
6984                 return -ENOMEM;
6985         }
6986
6987         para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6988         if (!para.new_bucket) {
6989                 ret = -ENOMEM;
6990                 mlog_errno(ret);
6991                 goto out;
6992         }
6993
6994         ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
6995                                               ocfs2_reflink_xattr_rec,
6996                                               &para);
6997         if (ret)
6998                 mlog_errno(ret);
6999
7000 out:
7001         ocfs2_xattr_bucket_free(para.old_bucket);
7002         ocfs2_xattr_bucket_free(para.new_bucket);
7003         return ret;
7004 }
7005
7006 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7007                                         struct buffer_head *blk_bh)
7008 {
7009         int ret, indexed = 0;
7010         struct buffer_head *new_blk_bh = NULL;
7011         struct ocfs2_xattr_block *xb =
7012                         (struct ocfs2_xattr_block *)blk_bh->b_data;
7013
7014
7015         if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7016                 indexed = 1;
7017
7018         ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7019                                              &new_blk_bh, indexed);
7020         if (ret) {
7021                 mlog_errno(ret);
7022                 goto out;
7023         }
7024
7025         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
7026                 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7027         else
7028                 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7029         if (ret)
7030                 mlog_errno(ret);
7031
7032 out:
7033         brelse(new_blk_bh);
7034         return ret;
7035 }
7036
7037 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7038 {
7039         int type = ocfs2_xattr_get_type(xe);
7040
7041         return type != OCFS2_XATTR_INDEX_SECURITY &&
7042                type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7043                type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7044 }
7045
7046 int ocfs2_reflink_xattrs(struct inode *old_inode,
7047                          struct buffer_head *old_bh,
7048                          struct inode *new_inode,
7049                          struct buffer_head *new_bh,
7050                          bool preserve_security)
7051 {
7052         int ret;
7053         struct ocfs2_xattr_reflink args;
7054         struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7055         struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7056         struct buffer_head *blk_bh = NULL;
7057         struct ocfs2_cached_dealloc_ctxt dealloc;
7058         struct ocfs2_refcount_tree *ref_tree;
7059         struct buffer_head *ref_root_bh = NULL;
7060
7061         ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7062                                        le64_to_cpu(di->i_refcount_loc),
7063                                        1, &ref_tree, &ref_root_bh);
7064         if (ret) {
7065                 mlog_errno(ret);
7066                 goto out;
7067         }
7068
7069         ocfs2_init_dealloc_ctxt(&dealloc);
7070
7071         args.old_inode = old_inode;
7072         args.new_inode = new_inode;
7073         args.old_bh = old_bh;
7074         args.new_bh = new_bh;
7075         args.ref_ci = &ref_tree->rf_ci;
7076         args.ref_root_bh = ref_root_bh;
7077         args.dealloc = &dealloc;
7078         if (preserve_security)
7079                 args.xattr_reflinked = NULL;
7080         else
7081                 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7082
7083         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7084                 ret = ocfs2_reflink_xattr_inline(&args);
7085                 if (ret) {
7086                         mlog_errno(ret);
7087                         goto out_unlock;
7088                 }
7089         }
7090
7091         if (!di->i_xattr_loc)
7092                 goto out_unlock;
7093
7094         ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7095                                      &blk_bh);
7096         if (ret < 0) {
7097                 mlog_errno(ret);
7098                 goto out_unlock;
7099         }
7100
7101         ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7102         if (ret)
7103                 mlog_errno(ret);
7104
7105         brelse(blk_bh);
7106
7107 out_unlock:
7108         ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7109                                    ref_tree, 1);
7110         brelse(ref_root_bh);
7111
7112         if (ocfs2_dealloc_has_cluster(&dealloc)) {
7113                 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7114                 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7115         }
7116
7117 out:
7118         return ret;
7119 }
7120
7121 /*
7122  * Initialize security and acl for a already created inode.
7123  * Used for reflink a non-preserve-security file.
7124  *
7125  * It uses common api like ocfs2_xattr_set, so the caller
7126  * must not hold any lock expect i_mutex.
7127  */
7128 int ocfs2_init_security_and_acl(struct inode *dir,
7129                                 struct inode *inode)
7130 {
7131         int ret = 0;
7132         struct buffer_head *dir_bh = NULL;
7133         struct ocfs2_security_xattr_info si = {
7134                 .enable = 1,
7135         };
7136
7137         ret = ocfs2_init_security_get(inode, dir, &si);
7138         if (!ret) {
7139                 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7140                                       si.name, si.value, si.value_len,
7141                                       XATTR_CREATE);
7142                 if (ret) {
7143                         mlog_errno(ret);
7144                         goto leave;
7145                 }
7146         } else if (ret != -EOPNOTSUPP) {
7147                 mlog_errno(ret);
7148                 goto leave;
7149         }
7150
7151         ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7152         if (ret) {
7153                 mlog_errno(ret);
7154                 goto leave;
7155         }
7156
7157         ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7158         if (ret)
7159                 mlog_errno(ret);
7160
7161         ocfs2_inode_unlock(dir, 0);
7162         brelse(dir_bh);
7163 leave:
7164         return ret;
7165 }
7166 /*
7167  * 'security' attributes support
7168  */
7169 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7170                                         size_t list_size, const char *name,
7171                                         size_t name_len, int type)
7172 {
7173         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7174         const size_t total_len = prefix_len + name_len + 1;
7175
7176         if (list && total_len <= list_size) {
7177                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7178                 memcpy(list + prefix_len, name, name_len);
7179                 list[prefix_len + name_len] = '\0';
7180         }
7181         return total_len;
7182 }
7183
7184 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7185                                     void *buffer, size_t size, int type)
7186 {
7187         if (strcmp(name, "") == 0)
7188                 return -EINVAL;
7189         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7190                                name, buffer, size);
7191 }
7192
7193 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7194                 const void *value, size_t size, int flags, int type)
7195 {
7196         if (strcmp(name, "") == 0)
7197                 return -EINVAL;
7198
7199         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7200                                name, value, size, flags);
7201 }
7202
7203 int ocfs2_init_security_get(struct inode *inode,
7204                             struct inode *dir,
7205                             struct ocfs2_security_xattr_info *si)
7206 {
7207         /* check whether ocfs2 support feature xattr */
7208         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7209                 return -EOPNOTSUPP;
7210         return security_inode_init_security(inode, dir, &si->name, &si->value,
7211                                             &si->value_len);
7212 }
7213
7214 int ocfs2_init_security_set(handle_t *handle,
7215                             struct inode *inode,
7216                             struct buffer_head *di_bh,
7217                             struct ocfs2_security_xattr_info *si,
7218                             struct ocfs2_alloc_context *xattr_ac,
7219                             struct ocfs2_alloc_context *data_ac)
7220 {
7221         return ocfs2_xattr_set_handle(handle, inode, di_bh,
7222                                      OCFS2_XATTR_INDEX_SECURITY,
7223                                      si->name, si->value, si->value_len, 0,
7224                                      xattr_ac, data_ac);
7225 }
7226
7227 struct xattr_handler ocfs2_xattr_security_handler = {
7228         .prefix = XATTR_SECURITY_PREFIX,
7229         .list   = ocfs2_xattr_security_list,
7230         .get    = ocfs2_xattr_security_get,
7231         .set    = ocfs2_xattr_security_set,
7232 };
7233
7234 /*
7235  * 'trusted' attributes support
7236  */
7237 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7238                                        size_t list_size, const char *name,
7239                                        size_t name_len, int type)
7240 {
7241         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7242         const size_t total_len = prefix_len + name_len + 1;
7243
7244         if (list && total_len <= list_size) {
7245                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7246                 memcpy(list + prefix_len, name, name_len);
7247                 list[prefix_len + name_len] = '\0';
7248         }
7249         return total_len;
7250 }
7251
7252 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7253                 void *buffer, size_t size, int type)
7254 {
7255         if (strcmp(name, "") == 0)
7256                 return -EINVAL;
7257         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7258                                name, buffer, size);
7259 }
7260
7261 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7262                 const void *value, size_t size, int flags, int type)
7263 {
7264         if (strcmp(name, "") == 0)
7265                 return -EINVAL;
7266
7267         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7268                                name, value, size, flags);
7269 }
7270
7271 struct xattr_handler ocfs2_xattr_trusted_handler = {
7272         .prefix = XATTR_TRUSTED_PREFIX,
7273         .list   = ocfs2_xattr_trusted_list,
7274         .get    = ocfs2_xattr_trusted_get,
7275         .set    = ocfs2_xattr_trusted_set,
7276 };
7277
7278 /*
7279  * 'user' attributes support
7280  */
7281 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7282                                     size_t list_size, const char *name,
7283                                     size_t name_len, int type)
7284 {
7285         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7286         const size_t total_len = prefix_len + name_len + 1;
7287         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7288
7289         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7290                 return 0;
7291
7292         if (list && total_len <= list_size) {
7293                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7294                 memcpy(list + prefix_len, name, name_len);
7295                 list[prefix_len + name_len] = '\0';
7296         }
7297         return total_len;
7298 }
7299
7300 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7301                 void *buffer, size_t size, int type)
7302 {
7303         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7304
7305         if (strcmp(name, "") == 0)
7306                 return -EINVAL;
7307         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7308                 return -EOPNOTSUPP;
7309         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7310                                buffer, size);
7311 }
7312
7313 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7314                 const void *value, size_t size, int flags, int type)
7315 {
7316         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7317
7318         if (strcmp(name, "") == 0)
7319                 return -EINVAL;
7320         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7321                 return -EOPNOTSUPP;
7322
7323         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7324                                name, value, size, flags);
7325 }
7326
7327 struct xattr_handler ocfs2_xattr_user_handler = {
7328         .prefix = XATTR_USER_PREFIX,
7329         .list   = ocfs2_xattr_user_list,
7330         .get    = ocfs2_xattr_user_get,
7331         .set    = ocfs2_xattr_user_set,
7332 };