ocfs2: Set the xattr name+value pair in one place
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60
61 struct ocfs2_xattr_def_value_root {
62         struct ocfs2_xattr_value_root   xv;
63         struct ocfs2_extent_rec         er;
64 };
65
66 struct ocfs2_xattr_bucket {
67         /* The inode these xattrs are associated with */
68         struct inode *bu_inode;
69
70         /* The actual buffers that make up the bucket */
71         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72
73         /* How many blocks make up one bucket for this filesystem */
74         int bu_blocks;
75 };
76
77 struct ocfs2_xattr_set_ctxt {
78         handle_t *handle;
79         struct ocfs2_alloc_context *meta_ac;
80         struct ocfs2_alloc_context *data_ac;
81         struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83
84 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP  4
87 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
88                                          - sizeof(struct ocfs2_xattr_header) \
89                                          - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
91                                          - sizeof(struct ocfs2_xattr_block) \
92                                          - sizeof(struct ocfs2_xattr_header) \
93                                          - OCFS2_XATTR_HEADER_GAP)
94
95 static struct ocfs2_xattr_def_value_root def_xv = {
96         .xv.xr_list.l_count = cpu_to_le16(1),
97 };
98
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100         &ocfs2_xattr_user_handler,
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103         &ocfs2_xattr_trusted_handler,
104         &ocfs2_xattr_security_handler,
105         NULL
106 };
107
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
115         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
116 };
117
118 struct ocfs2_xattr_info {
119         int             xi_name_index;
120         const char      *xi_name;
121         int             xi_name_len;
122         const void      *xi_value;
123         size_t          xi_value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144         /*
145          * Return a pointer to the appropriate buffer in loc->xl_storage
146          * at the given offset from loc->xl_header.
147          */
148         void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
149
150         /* Can we reuse the existing entry for the new value? */
151         int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
152                              struct ocfs2_xattr_info *xi);
153
154         /* How much space is needed for the new value? */
155         int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
156                                struct ocfs2_xattr_info *xi);
157
158         /*
159          * Return the offset of the first name+value pair.  This is
160          * the start of our downward-filling free space.
161          */
162         int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
163
164         /*
165          * Remove the name+value at this location.  Do whatever is
166          * appropriate with the remaining name+value pairs.
167          */
168         void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
169
170         /* Fill xl_entry with a new entry */
171         void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
172
173         /* Add name+value storage to an entry */
174         void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
175 };
176
177 /*
178  * Describes an xattr entry location.  This is a memory structure
179  * tracking the on-disk structure.
180  */
181 struct ocfs2_xa_loc {
182         /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
183         struct ocfs2_xattr_header *xl_header;
184
185         /* Bytes from xl_header to the end of the storage */
186         int xl_size;
187
188         /*
189          * The ocfs2_xattr_entry this location describes.  If this is
190          * NULL, this location describes the on-disk structure where it
191          * would have been.
192          */
193         struct ocfs2_xattr_entry *xl_entry;
194
195         /*
196          * Internal housekeeping
197          */
198
199         /* Buffer(s) containing this entry */
200         void *xl_storage;
201
202         /* Operations on the storage backing this location */
203         const struct ocfs2_xa_loc_operations *xl_ops;
204 };
205
206 /*
207  * Convenience functions to calculate how much space is needed for a
208  * given name+value pair
209  */
210 static int namevalue_size(int name_len, uint64_t value_len)
211 {
212         if (value_len > OCFS2_XATTR_INLINE_SIZE)
213                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
214         else
215                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
216 }
217
218 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
219 {
220         return namevalue_size(xi->xi_name_len, xi->xi_value_len);
221 }
222
223 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
224 {
225         u64 value_len = le64_to_cpu(xe->xe_value_size);
226
227         BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
228                ocfs2_xattr_is_local(xe));
229         return namevalue_size(xe->xe_name_len, value_len);
230 }
231
232
233 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
234                                              struct ocfs2_xattr_header *xh,
235                                              int index,
236                                              int *block_off,
237                                              int *new_offset);
238
239 static int ocfs2_xattr_block_find(struct inode *inode,
240                                   int name_index,
241                                   const char *name,
242                                   struct ocfs2_xattr_search *xs);
243 static int ocfs2_xattr_index_block_find(struct inode *inode,
244                                         struct buffer_head *root_bh,
245                                         int name_index,
246                                         const char *name,
247                                         struct ocfs2_xattr_search *xs);
248
249 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
250                                         struct buffer_head *blk_bh,
251                                         char *buffer,
252                                         size_t buffer_size);
253
254 static int ocfs2_xattr_create_index_block(struct inode *inode,
255                                           struct ocfs2_xattr_search *xs,
256                                           struct ocfs2_xattr_set_ctxt *ctxt);
257
258 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
259                                              struct ocfs2_xattr_info *xi,
260                                              struct ocfs2_xattr_search *xs,
261                                              struct ocfs2_xattr_set_ctxt *ctxt);
262
263 typedef int (xattr_tree_rec_func)(struct inode *inode,
264                                   struct buffer_head *root_bh,
265                                   u64 blkno, u32 cpos, u32 len, void *para);
266 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
267                                            struct buffer_head *root_bh,
268                                            xattr_tree_rec_func *rec_func,
269                                            void *para);
270 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
271                                         struct ocfs2_xattr_bucket *bucket,
272                                         void *para);
273 static int ocfs2_rm_xattr_cluster(struct inode *inode,
274                                   struct buffer_head *root_bh,
275                                   u64 blkno,
276                                   u32 cpos,
277                                   u32 len,
278                                   void *para);
279
280 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
281                                   u64 src_blk, u64 last_blk, u64 to_blk,
282                                   unsigned int start_bucket,
283                                   u32 *first_hash);
284 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
285                                         struct ocfs2_dinode *di,
286                                         struct ocfs2_xattr_info *xi,
287                                         struct ocfs2_xattr_search *xis,
288                                         struct ocfs2_xattr_search *xbs,
289                                         struct ocfs2_refcount_tree **ref_tree,
290                                         int *meta_need,
291                                         int *credits);
292 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
293                                            struct ocfs2_xattr_bucket *bucket,
294                                            int offset,
295                                            struct ocfs2_xattr_value_root **xv,
296                                            struct buffer_head **bh);
297
298 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
299 {
300         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
301 }
302
303 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
304 {
305         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
306 }
307
308 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
309 {
310         u16 len = sb->s_blocksize -
311                  offsetof(struct ocfs2_xattr_header, xh_entries);
312
313         return len / sizeof(struct ocfs2_xattr_entry);
314 }
315
316 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
317 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
318 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
319
320 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
321 {
322         struct ocfs2_xattr_bucket *bucket;
323         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
324
325         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
326
327         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
328         if (bucket) {
329                 bucket->bu_inode = inode;
330                 bucket->bu_blocks = blks;
331         }
332
333         return bucket;
334 }
335
336 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
337 {
338         int i;
339
340         for (i = 0; i < bucket->bu_blocks; i++) {
341                 brelse(bucket->bu_bhs[i]);
342                 bucket->bu_bhs[i] = NULL;
343         }
344 }
345
346 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
347 {
348         if (bucket) {
349                 ocfs2_xattr_bucket_relse(bucket);
350                 bucket->bu_inode = NULL;
351                 kfree(bucket);
352         }
353 }
354
355 /*
356  * A bucket that has never been written to disk doesn't need to be
357  * read.  We just need the buffer_heads.  Don't call this for
358  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
359  * them fully.
360  */
361 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
362                                    u64 xb_blkno)
363 {
364         int i, rc = 0;
365
366         for (i = 0; i < bucket->bu_blocks; i++) {
367                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
368                                               xb_blkno + i);
369                 if (!bucket->bu_bhs[i]) {
370                         rc = -EIO;
371                         mlog_errno(rc);
372                         break;
373                 }
374
375                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
376                                            bucket->bu_bhs[i]))
377                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
378                                                       bucket->bu_bhs[i]);
379         }
380
381         if (rc)
382                 ocfs2_xattr_bucket_relse(bucket);
383         return rc;
384 }
385
386 /* Read the xattr bucket at xb_blkno */
387 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
388                                    u64 xb_blkno)
389 {
390         int rc;
391
392         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
393                                bucket->bu_blocks, bucket->bu_bhs, 0,
394                                NULL);
395         if (!rc) {
396                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
397                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
398                                                  bucket->bu_bhs,
399                                                  bucket->bu_blocks,
400                                                  &bucket_xh(bucket)->xh_check);
401                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
402                 if (rc)
403                         mlog_errno(rc);
404         }
405
406         if (rc)
407                 ocfs2_xattr_bucket_relse(bucket);
408         return rc;
409 }
410
411 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
412                                              struct ocfs2_xattr_bucket *bucket,
413                                              int type)
414 {
415         int i, rc = 0;
416
417         for (i = 0; i < bucket->bu_blocks; i++) {
418                 rc = ocfs2_journal_access(handle,
419                                           INODE_CACHE(bucket->bu_inode),
420                                           bucket->bu_bhs[i], type);
421                 if (rc) {
422                         mlog_errno(rc);
423                         break;
424                 }
425         }
426
427         return rc;
428 }
429
430 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
431                                              struct ocfs2_xattr_bucket *bucket)
432 {
433         int i;
434
435         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
436         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
437                                    bucket->bu_bhs, bucket->bu_blocks,
438                                    &bucket_xh(bucket)->xh_check);
439         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
440
441         for (i = 0; i < bucket->bu_blocks; i++)
442                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
443 }
444
445 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
446                                          struct ocfs2_xattr_bucket *src)
447 {
448         int i;
449         int blocksize = src->bu_inode->i_sb->s_blocksize;
450
451         BUG_ON(dest->bu_blocks != src->bu_blocks);
452         BUG_ON(dest->bu_inode != src->bu_inode);
453
454         for (i = 0; i < src->bu_blocks; i++) {
455                 memcpy(bucket_block(dest, i), bucket_block(src, i),
456                        blocksize);
457         }
458 }
459
460 static int ocfs2_validate_xattr_block(struct super_block *sb,
461                                       struct buffer_head *bh)
462 {
463         int rc;
464         struct ocfs2_xattr_block *xb =
465                 (struct ocfs2_xattr_block *)bh->b_data;
466
467         mlog(0, "Validating xattr block %llu\n",
468              (unsigned long long)bh->b_blocknr);
469
470         BUG_ON(!buffer_uptodate(bh));
471
472         /*
473          * If the ecc fails, we return the error but otherwise
474          * leave the filesystem running.  We know any error is
475          * local to this block.
476          */
477         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
478         if (rc)
479                 return rc;
480
481         /*
482          * Errors after here are fatal
483          */
484
485         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
486                 ocfs2_error(sb,
487                             "Extended attribute block #%llu has bad "
488                             "signature %.*s",
489                             (unsigned long long)bh->b_blocknr, 7,
490                             xb->xb_signature);
491                 return -EINVAL;
492         }
493
494         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
495                 ocfs2_error(sb,
496                             "Extended attribute block #%llu has an "
497                             "invalid xb_blkno of %llu",
498                             (unsigned long long)bh->b_blocknr,
499                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
500                 return -EINVAL;
501         }
502
503         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
504                 ocfs2_error(sb,
505                             "Extended attribute block #%llu has an invalid "
506                             "xb_fs_generation of #%u",
507                             (unsigned long long)bh->b_blocknr,
508                             le32_to_cpu(xb->xb_fs_generation));
509                 return -EINVAL;
510         }
511
512         return 0;
513 }
514
515 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
516                                   struct buffer_head **bh)
517 {
518         int rc;
519         struct buffer_head *tmp = *bh;
520
521         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
522                               ocfs2_validate_xattr_block);
523
524         /* If ocfs2_read_block() got us a new bh, pass it up. */
525         if (!rc && !*bh)
526                 *bh = tmp;
527
528         return rc;
529 }
530
531 static inline const char *ocfs2_xattr_prefix(int name_index)
532 {
533         struct xattr_handler *handler = NULL;
534
535         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
536                 handler = ocfs2_xattr_handler_map[name_index];
537
538         return handler ? handler->prefix : NULL;
539 }
540
541 static u32 ocfs2_xattr_name_hash(struct inode *inode,
542                                  const char *name,
543                                  int name_len)
544 {
545         /* Get hash value of uuid from super block */
546         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
547         int i;
548
549         /* hash extended attribute name */
550         for (i = 0; i < name_len; i++) {
551                 hash = (hash << OCFS2_HASH_SHIFT) ^
552                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
553                        *name++;
554         }
555
556         return hash;
557 }
558
559 /*
560  * ocfs2_xattr_hash_entry()
561  *
562  * Compute the hash of an extended attribute.
563  */
564 static void ocfs2_xattr_hash_entry(struct inode *inode,
565                                    struct ocfs2_xattr_header *header,
566                                    struct ocfs2_xattr_entry *entry)
567 {
568         u32 hash = 0;
569         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
570
571         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
572         entry->xe_name_hash = cpu_to_le32(hash);
573
574         return;
575 }
576
577 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
578 {
579         return namevalue_size(name_len, value_len) +
580                 sizeof(struct ocfs2_xattr_entry);
581 }
582
583 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
584 {
585         return namevalue_size_xi(xi) +
586                 sizeof(struct ocfs2_xattr_entry);
587 }
588
589 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
590 {
591         return namevalue_size_xe(xe) +
592                 sizeof(struct ocfs2_xattr_entry);
593 }
594
595 int ocfs2_calc_security_init(struct inode *dir,
596                              struct ocfs2_security_xattr_info *si,
597                              int *want_clusters,
598                              int *xattr_credits,
599                              struct ocfs2_alloc_context **xattr_ac)
600 {
601         int ret = 0;
602         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
603         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
604                                                  si->value_len);
605
606         /*
607          * The max space of security xattr taken inline is
608          * 256(name) + 80(value) + 16(entry) = 352 bytes,
609          * So reserve one metadata block for it is ok.
610          */
611         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
612             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
613                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
614                 if (ret) {
615                         mlog_errno(ret);
616                         return ret;
617                 }
618                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
619         }
620
621         /* reserve clusters for xattr value which will be set in B tree*/
622         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
623                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
624                                                             si->value_len);
625
626                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
627                                                            new_clusters);
628                 *want_clusters += new_clusters;
629         }
630         return ret;
631 }
632
633 int ocfs2_calc_xattr_init(struct inode *dir,
634                           struct buffer_head *dir_bh,
635                           int mode,
636                           struct ocfs2_security_xattr_info *si,
637                           int *want_clusters,
638                           int *xattr_credits,
639                           int *want_meta)
640 {
641         int ret = 0;
642         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
643         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
644
645         if (si->enable)
646                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
647                                                      si->value_len);
648
649         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
650                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
651                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
652                                         "", NULL, 0);
653                 if (acl_len > 0) {
654                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
655                         if (S_ISDIR(mode))
656                                 a_size <<= 1;
657                 } else if (acl_len != 0 && acl_len != -ENODATA) {
658                         mlog_errno(ret);
659                         return ret;
660                 }
661         }
662
663         if (!(s_size + a_size))
664                 return ret;
665
666         /*
667          * The max space of security xattr taken inline is
668          * 256(name) + 80(value) + 16(entry) = 352 bytes,
669          * The max space of acl xattr taken inline is
670          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
671          * when blocksize = 512, may reserve one more cluser for
672          * xattr bucket, otherwise reserve one metadata block
673          * for them is ok.
674          * If this is a new directory with inline data,
675          * we choose to reserve the entire inline area for
676          * directory contents and force an external xattr block.
677          */
678         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
679             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
680             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
681                 *want_meta = *want_meta + 1;
682                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
683         }
684
685         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
686             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
687                 *want_clusters += 1;
688                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
689         }
690
691         /*
692          * reserve credits and clusters for xattrs which has large value
693          * and have to be set outside
694          */
695         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
696                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
697                                                         si->value_len);
698                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
699                                                            new_clusters);
700                 *want_clusters += new_clusters;
701         }
702         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
703             acl_len > OCFS2_XATTR_INLINE_SIZE) {
704                 /* for directory, it has DEFAULT and ACCESS two types of acls */
705                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
706                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
707                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
708                                                            new_clusters);
709                 *want_clusters += new_clusters;
710         }
711
712         return ret;
713 }
714
715 static int ocfs2_xattr_extend_allocation(struct inode *inode,
716                                          u32 clusters_to_add,
717                                          struct ocfs2_xattr_value_buf *vb,
718                                          struct ocfs2_xattr_set_ctxt *ctxt)
719 {
720         int status = 0;
721         handle_t *handle = ctxt->handle;
722         enum ocfs2_alloc_restarted why;
723         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
724         struct ocfs2_extent_tree et;
725
726         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
727
728         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
729
730         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
731                               OCFS2_JOURNAL_ACCESS_WRITE);
732         if (status < 0) {
733                 mlog_errno(status);
734                 goto leave;
735         }
736
737         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
738         status = ocfs2_add_clusters_in_btree(handle,
739                                              &et,
740                                              &logical_start,
741                                              clusters_to_add,
742                                              0,
743                                              ctxt->data_ac,
744                                              ctxt->meta_ac,
745                                              &why);
746         if (status < 0) {
747                 mlog_errno(status);
748                 goto leave;
749         }
750
751         status = ocfs2_journal_dirty(handle, vb->vb_bh);
752         if (status < 0) {
753                 mlog_errno(status);
754                 goto leave;
755         }
756
757         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
758
759         /*
760          * We should have already allocated enough space before the transaction,
761          * so no need to restart.
762          */
763         BUG_ON(why != RESTART_NONE || clusters_to_add);
764
765 leave:
766
767         return status;
768 }
769
770 static int __ocfs2_remove_xattr_range(struct inode *inode,
771                                       struct ocfs2_xattr_value_buf *vb,
772                                       u32 cpos, u32 phys_cpos, u32 len,
773                                       unsigned int ext_flags,
774                                       struct ocfs2_xattr_set_ctxt *ctxt)
775 {
776         int ret;
777         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
778         handle_t *handle = ctxt->handle;
779         struct ocfs2_extent_tree et;
780
781         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
782
783         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
784                             OCFS2_JOURNAL_ACCESS_WRITE);
785         if (ret) {
786                 mlog_errno(ret);
787                 goto out;
788         }
789
790         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
791                                   &ctxt->dealloc);
792         if (ret) {
793                 mlog_errno(ret);
794                 goto out;
795         }
796
797         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
798
799         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
800         if (ret) {
801                 mlog_errno(ret);
802                 goto out;
803         }
804
805         if (ext_flags & OCFS2_EXT_REFCOUNTED)
806                 ret = ocfs2_decrease_refcount(inode, handle,
807                                         ocfs2_blocks_to_clusters(inode->i_sb,
808                                                                  phys_blkno),
809                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
810         else
811                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
812                                                   phys_blkno, len);
813         if (ret)
814                 mlog_errno(ret);
815
816 out:
817         return ret;
818 }
819
820 static int ocfs2_xattr_shrink_size(struct inode *inode,
821                                    u32 old_clusters,
822                                    u32 new_clusters,
823                                    struct ocfs2_xattr_value_buf *vb,
824                                    struct ocfs2_xattr_set_ctxt *ctxt)
825 {
826         int ret = 0;
827         unsigned int ext_flags;
828         u32 trunc_len, cpos, phys_cpos, alloc_size;
829         u64 block;
830
831         if (old_clusters <= new_clusters)
832                 return 0;
833
834         cpos = new_clusters;
835         trunc_len = old_clusters - new_clusters;
836         while (trunc_len) {
837                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
838                                                &alloc_size,
839                                                &vb->vb_xv->xr_list, &ext_flags);
840                 if (ret) {
841                         mlog_errno(ret);
842                         goto out;
843                 }
844
845                 if (alloc_size > trunc_len)
846                         alloc_size = trunc_len;
847
848                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
849                                                  phys_cpos, alloc_size,
850                                                  ext_flags, ctxt);
851                 if (ret) {
852                         mlog_errno(ret);
853                         goto out;
854                 }
855
856                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
857                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
858                                                        block, alloc_size);
859                 cpos += alloc_size;
860                 trunc_len -= alloc_size;
861         }
862
863 out:
864         return ret;
865 }
866
867 static int ocfs2_xattr_value_truncate(struct inode *inode,
868                                       struct ocfs2_xattr_value_buf *vb,
869                                       int len,
870                                       struct ocfs2_xattr_set_ctxt *ctxt)
871 {
872         int ret;
873         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
874         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
875
876         if (new_clusters == old_clusters)
877                 return 0;
878
879         if (new_clusters > old_clusters)
880                 ret = ocfs2_xattr_extend_allocation(inode,
881                                                     new_clusters - old_clusters,
882                                                     vb, ctxt);
883         else
884                 ret = ocfs2_xattr_shrink_size(inode,
885                                               old_clusters, new_clusters,
886                                               vb, ctxt);
887
888         return ret;
889 }
890
891 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
892                                   size_t *result, const char *prefix,
893                                   const char *name, int name_len)
894 {
895         char *p = buffer + *result;
896         int prefix_len = strlen(prefix);
897         int total_len = prefix_len + name_len + 1;
898
899         *result += total_len;
900
901         /* we are just looking for how big our buffer needs to be */
902         if (!size)
903                 return 0;
904
905         if (*result > size)
906                 return -ERANGE;
907
908         memcpy(p, prefix, prefix_len);
909         memcpy(p + prefix_len, name, name_len);
910         p[prefix_len + name_len] = '\0';
911
912         return 0;
913 }
914
915 static int ocfs2_xattr_list_entries(struct inode *inode,
916                                     struct ocfs2_xattr_header *header,
917                                     char *buffer, size_t buffer_size)
918 {
919         size_t result = 0;
920         int i, type, ret;
921         const char *prefix, *name;
922
923         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
924                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
925                 type = ocfs2_xattr_get_type(entry);
926                 prefix = ocfs2_xattr_prefix(type);
927
928                 if (prefix) {
929                         name = (const char *)header +
930                                 le16_to_cpu(entry->xe_name_offset);
931
932                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
933                                                      &result, prefix, name,
934                                                      entry->xe_name_len);
935                         if (ret)
936                                 return ret;
937                 }
938         }
939
940         return result;
941 }
942
943 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
944                                          struct ocfs2_dinode *di)
945 {
946         struct ocfs2_xattr_header *xh;
947         int i;
948
949         xh = (struct ocfs2_xattr_header *)
950                  ((void *)di + inode->i_sb->s_blocksize -
951                  le16_to_cpu(di->i_xattr_inline_size));
952
953         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
954                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
955                         return 1;
956
957         return 0;
958 }
959
960 static int ocfs2_xattr_ibody_list(struct inode *inode,
961                                   struct ocfs2_dinode *di,
962                                   char *buffer,
963                                   size_t buffer_size)
964 {
965         struct ocfs2_xattr_header *header = NULL;
966         struct ocfs2_inode_info *oi = OCFS2_I(inode);
967         int ret = 0;
968
969         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
970                 return ret;
971
972         header = (struct ocfs2_xattr_header *)
973                  ((void *)di + inode->i_sb->s_blocksize -
974                  le16_to_cpu(di->i_xattr_inline_size));
975
976         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
977
978         return ret;
979 }
980
981 static int ocfs2_xattr_block_list(struct inode *inode,
982                                   struct ocfs2_dinode *di,
983                                   char *buffer,
984                                   size_t buffer_size)
985 {
986         struct buffer_head *blk_bh = NULL;
987         struct ocfs2_xattr_block *xb;
988         int ret = 0;
989
990         if (!di->i_xattr_loc)
991                 return ret;
992
993         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
994                                      &blk_bh);
995         if (ret < 0) {
996                 mlog_errno(ret);
997                 return ret;
998         }
999
1000         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1001         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1002                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1003                 ret = ocfs2_xattr_list_entries(inode, header,
1004                                                buffer, buffer_size);
1005         } else
1006                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1007                                                    buffer, buffer_size);
1008
1009         brelse(blk_bh);
1010
1011         return ret;
1012 }
1013
1014 ssize_t ocfs2_listxattr(struct dentry *dentry,
1015                         char *buffer,
1016                         size_t size)
1017 {
1018         int ret = 0, i_ret = 0, b_ret = 0;
1019         struct buffer_head *di_bh = NULL;
1020         struct ocfs2_dinode *di = NULL;
1021         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1022
1023         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1024                 return -EOPNOTSUPP;
1025
1026         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1027                 return ret;
1028
1029         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1030         if (ret < 0) {
1031                 mlog_errno(ret);
1032                 return ret;
1033         }
1034
1035         di = (struct ocfs2_dinode *)di_bh->b_data;
1036
1037         down_read(&oi->ip_xattr_sem);
1038         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1039         if (i_ret < 0)
1040                 b_ret = 0;
1041         else {
1042                 if (buffer) {
1043                         buffer += i_ret;
1044                         size -= i_ret;
1045                 }
1046                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1047                                                buffer, size);
1048                 if (b_ret < 0)
1049                         i_ret = 0;
1050         }
1051         up_read(&oi->ip_xattr_sem);
1052         ocfs2_inode_unlock(dentry->d_inode, 0);
1053
1054         brelse(di_bh);
1055
1056         return i_ret + b_ret;
1057 }
1058
1059 static int ocfs2_xattr_find_entry(int name_index,
1060                                   const char *name,
1061                                   struct ocfs2_xattr_search *xs)
1062 {
1063         struct ocfs2_xattr_entry *entry;
1064         size_t name_len;
1065         int i, cmp = 1;
1066
1067         if (name == NULL)
1068                 return -EINVAL;
1069
1070         name_len = strlen(name);
1071         entry = xs->here;
1072         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1073                 cmp = name_index - ocfs2_xattr_get_type(entry);
1074                 if (!cmp)
1075                         cmp = name_len - entry->xe_name_len;
1076                 if (!cmp)
1077                         cmp = memcmp(name, (xs->base +
1078                                      le16_to_cpu(entry->xe_name_offset)),
1079                                      name_len);
1080                 if (cmp == 0)
1081                         break;
1082                 entry += 1;
1083         }
1084         xs->here = entry;
1085
1086         return cmp ? -ENODATA : 0;
1087 }
1088
1089 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1090                                          struct ocfs2_xattr_value_root *xv,
1091                                          void *buffer,
1092                                          size_t len)
1093 {
1094         u32 cpos, p_cluster, num_clusters, bpc, clusters;
1095         u64 blkno;
1096         int i, ret = 0;
1097         size_t cplen, blocksize;
1098         struct buffer_head *bh = NULL;
1099         struct ocfs2_extent_list *el;
1100
1101         el = &xv->xr_list;
1102         clusters = le32_to_cpu(xv->xr_clusters);
1103         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1104         blocksize = inode->i_sb->s_blocksize;
1105
1106         cpos = 0;
1107         while (cpos < clusters) {
1108                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1109                                                &num_clusters, el, NULL);
1110                 if (ret) {
1111                         mlog_errno(ret);
1112                         goto out;
1113                 }
1114
1115                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1116                 /* Copy ocfs2_xattr_value */
1117                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1118                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1119                                                &bh, NULL);
1120                         if (ret) {
1121                                 mlog_errno(ret);
1122                                 goto out;
1123                         }
1124
1125                         cplen = len >= blocksize ? blocksize : len;
1126                         memcpy(buffer, bh->b_data, cplen);
1127                         len -= cplen;
1128                         buffer += cplen;
1129
1130                         brelse(bh);
1131                         bh = NULL;
1132                         if (len == 0)
1133                                 break;
1134                 }
1135                 cpos += num_clusters;
1136         }
1137 out:
1138         return ret;
1139 }
1140
1141 static int ocfs2_xattr_ibody_get(struct inode *inode,
1142                                  int name_index,
1143                                  const char *name,
1144                                  void *buffer,
1145                                  size_t buffer_size,
1146                                  struct ocfs2_xattr_search *xs)
1147 {
1148         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1149         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1150         struct ocfs2_xattr_value_root *xv;
1151         size_t size;
1152         int ret = 0;
1153
1154         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1155                 return -ENODATA;
1156
1157         xs->end = (void *)di + inode->i_sb->s_blocksize;
1158         xs->header = (struct ocfs2_xattr_header *)
1159                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1160         xs->base = (void *)xs->header;
1161         xs->here = xs->header->xh_entries;
1162
1163         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1164         if (ret)
1165                 return ret;
1166         size = le64_to_cpu(xs->here->xe_value_size);
1167         if (buffer) {
1168                 if (size > buffer_size)
1169                         return -ERANGE;
1170                 if (ocfs2_xattr_is_local(xs->here)) {
1171                         memcpy(buffer, (void *)xs->base +
1172                                le16_to_cpu(xs->here->xe_name_offset) +
1173                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1174                 } else {
1175                         xv = (struct ocfs2_xattr_value_root *)
1176                                 (xs->base + le16_to_cpu(
1177                                  xs->here->xe_name_offset) +
1178                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1179                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1180                                                             buffer, size);
1181                         if (ret < 0) {
1182                                 mlog_errno(ret);
1183                                 return ret;
1184                         }
1185                 }
1186         }
1187
1188         return size;
1189 }
1190
1191 static int ocfs2_xattr_block_get(struct inode *inode,
1192                                  int name_index,
1193                                  const char *name,
1194                                  void *buffer,
1195                                  size_t buffer_size,
1196                                  struct ocfs2_xattr_search *xs)
1197 {
1198         struct ocfs2_xattr_block *xb;
1199         struct ocfs2_xattr_value_root *xv;
1200         size_t size;
1201         int ret = -ENODATA, name_offset, name_len, i;
1202         int uninitialized_var(block_off);
1203
1204         xs->bucket = ocfs2_xattr_bucket_new(inode);
1205         if (!xs->bucket) {
1206                 ret = -ENOMEM;
1207                 mlog_errno(ret);
1208                 goto cleanup;
1209         }
1210
1211         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1212         if (ret) {
1213                 mlog_errno(ret);
1214                 goto cleanup;
1215         }
1216
1217         if (xs->not_found) {
1218                 ret = -ENODATA;
1219                 goto cleanup;
1220         }
1221
1222         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1223         size = le64_to_cpu(xs->here->xe_value_size);
1224         if (buffer) {
1225                 ret = -ERANGE;
1226                 if (size > buffer_size)
1227                         goto cleanup;
1228
1229                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1230                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1231                 i = xs->here - xs->header->xh_entries;
1232
1233                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1234                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1235                                                                 bucket_xh(xs->bucket),
1236                                                                 i,
1237                                                                 &block_off,
1238                                                                 &name_offset);
1239                         xs->base = bucket_block(xs->bucket, block_off);
1240                 }
1241                 if (ocfs2_xattr_is_local(xs->here)) {
1242                         memcpy(buffer, (void *)xs->base +
1243                                name_offset + name_len, size);
1244                 } else {
1245                         xv = (struct ocfs2_xattr_value_root *)
1246                                 (xs->base + name_offset + name_len);
1247                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1248                                                             buffer, size);
1249                         if (ret < 0) {
1250                                 mlog_errno(ret);
1251                                 goto cleanup;
1252                         }
1253                 }
1254         }
1255         ret = size;
1256 cleanup:
1257         ocfs2_xattr_bucket_free(xs->bucket);
1258
1259         brelse(xs->xattr_bh);
1260         xs->xattr_bh = NULL;
1261         return ret;
1262 }
1263
1264 int ocfs2_xattr_get_nolock(struct inode *inode,
1265                            struct buffer_head *di_bh,
1266                            int name_index,
1267                            const char *name,
1268                            void *buffer,
1269                            size_t buffer_size)
1270 {
1271         int ret;
1272         struct ocfs2_dinode *di = NULL;
1273         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1274         struct ocfs2_xattr_search xis = {
1275                 .not_found = -ENODATA,
1276         };
1277         struct ocfs2_xattr_search xbs = {
1278                 .not_found = -ENODATA,
1279         };
1280
1281         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1282                 return -EOPNOTSUPP;
1283
1284         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1285                 ret = -ENODATA;
1286
1287         xis.inode_bh = xbs.inode_bh = di_bh;
1288         di = (struct ocfs2_dinode *)di_bh->b_data;
1289
1290         down_read(&oi->ip_xattr_sem);
1291         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1292                                     buffer_size, &xis);
1293         if (ret == -ENODATA && di->i_xattr_loc)
1294                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1295                                             buffer_size, &xbs);
1296         up_read(&oi->ip_xattr_sem);
1297
1298         return ret;
1299 }
1300
1301 /* ocfs2_xattr_get()
1302  *
1303  * Copy an extended attribute into the buffer provided.
1304  * Buffer is NULL to compute the size of buffer required.
1305  */
1306 static int ocfs2_xattr_get(struct inode *inode,
1307                            int name_index,
1308                            const char *name,
1309                            void *buffer,
1310                            size_t buffer_size)
1311 {
1312         int ret;
1313         struct buffer_head *di_bh = NULL;
1314
1315         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1316         if (ret < 0) {
1317                 mlog_errno(ret);
1318                 return ret;
1319         }
1320         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1321                                      name, buffer, buffer_size);
1322
1323         ocfs2_inode_unlock(inode, 0);
1324
1325         brelse(di_bh);
1326
1327         return ret;
1328 }
1329
1330 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1331                                            handle_t *handle,
1332                                            struct ocfs2_xattr_value_buf *vb,
1333                                            const void *value,
1334                                            int value_len)
1335 {
1336         int ret = 0, i, cp_len;
1337         u16 blocksize = inode->i_sb->s_blocksize;
1338         u32 p_cluster, num_clusters;
1339         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1340         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1341         u64 blkno;
1342         struct buffer_head *bh = NULL;
1343         unsigned int ext_flags;
1344         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1345
1346         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1347
1348         while (cpos < clusters) {
1349                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1350                                                &num_clusters, &xv->xr_list,
1351                                                &ext_flags);
1352                 if (ret) {
1353                         mlog_errno(ret);
1354                         goto out;
1355                 }
1356
1357                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1358
1359                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1360
1361                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1362                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1363                                                &bh, NULL);
1364                         if (ret) {
1365                                 mlog_errno(ret);
1366                                 goto out;
1367                         }
1368
1369                         ret = ocfs2_journal_access(handle,
1370                                                    INODE_CACHE(inode),
1371                                                    bh,
1372                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1373                         if (ret < 0) {
1374                                 mlog_errno(ret);
1375                                 goto out;
1376                         }
1377
1378                         cp_len = value_len > blocksize ? blocksize : value_len;
1379                         memcpy(bh->b_data, value, cp_len);
1380                         value_len -= cp_len;
1381                         value += cp_len;
1382                         if (cp_len < blocksize)
1383                                 memset(bh->b_data + cp_len, 0,
1384                                        blocksize - cp_len);
1385
1386                         ret = ocfs2_journal_dirty(handle, bh);
1387                         if (ret < 0) {
1388                                 mlog_errno(ret);
1389                                 goto out;
1390                         }
1391                         brelse(bh);
1392                         bh = NULL;
1393
1394                         /*
1395                          * XXX: do we need to empty all the following
1396                          * blocks in this cluster?
1397                          */
1398                         if (!value_len)
1399                                 break;
1400                 }
1401                 cpos += num_clusters;
1402         }
1403 out:
1404         brelse(bh);
1405
1406         return ret;
1407 }
1408
1409 static int ocfs2_xattr_cleanup(struct inode *inode,
1410                                handle_t *handle,
1411                                struct ocfs2_xattr_info *xi,
1412                                struct ocfs2_xattr_search *xs,
1413                                struct ocfs2_xattr_value_buf *vb,
1414                                size_t offs)
1415 {
1416         int ret = 0;
1417         void *val = xs->base + offs;
1418         size_t size = namevalue_size_xi(xi);
1419
1420         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1421                             OCFS2_JOURNAL_ACCESS_WRITE);
1422         if (ret) {
1423                 mlog_errno(ret);
1424                 goto out;
1425         }
1426         /* Decrease xattr count */
1427         le16_add_cpu(&xs->header->xh_count, -1);
1428         /* Remove the xattr entry and tree root which has already be set*/
1429         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1430         memset(val, 0, size);
1431
1432         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1433         if (ret < 0)
1434                 mlog_errno(ret);
1435 out:
1436         return ret;
1437 }
1438
1439 static int ocfs2_xattr_update_entry(struct inode *inode,
1440                                     handle_t *handle,
1441                                     struct ocfs2_xattr_info *xi,
1442                                     struct ocfs2_xattr_search *xs,
1443                                     struct ocfs2_xattr_value_buf *vb,
1444                                     size_t offs)
1445 {
1446         int ret;
1447
1448         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1449                             OCFS2_JOURNAL_ACCESS_WRITE);
1450         if (ret) {
1451                 mlog_errno(ret);
1452                 goto out;
1453         }
1454
1455         xs->here->xe_name_offset = cpu_to_le16(offs);
1456         xs->here->xe_value_size = cpu_to_le64(xi->xi_value_len);
1457         if (xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE)
1458                 ocfs2_xattr_set_local(xs->here, 1);
1459         else
1460                 ocfs2_xattr_set_local(xs->here, 0);
1461         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1462
1463         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1464         if (ret < 0)
1465                 mlog_errno(ret);
1466 out:
1467         return ret;
1468 }
1469
1470 /*
1471  * ocfs2_xattr_set_value_outside()
1472  *
1473  * Set large size value in B tree.
1474  */
1475 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1476                                          struct ocfs2_xattr_info *xi,
1477                                          struct ocfs2_xattr_search *xs,
1478                                          struct ocfs2_xattr_set_ctxt *ctxt,
1479                                          struct ocfs2_xattr_value_buf *vb,
1480                                          size_t offs)
1481 {
1482         void *val = xs->base + offs;
1483         struct ocfs2_xattr_value_root *xv = NULL;
1484         size_t size = namevalue_size_xi(xi);
1485         int ret = 0;
1486
1487         memset(val, 0, size);
1488         memcpy(val, xi->xi_name, xi->xi_name_len);
1489         xv = (struct ocfs2_xattr_value_root *)
1490                 (val + OCFS2_XATTR_SIZE(xi->xi_name_len));
1491         xv->xr_clusters = 0;
1492         xv->xr_last_eb_blk = 0;
1493         xv->xr_list.l_tree_depth = 0;
1494         xv->xr_list.l_count = cpu_to_le16(1);
1495         xv->xr_list.l_next_free_rec = 0;
1496         vb->vb_xv = xv;
1497
1498         ret = ocfs2_xattr_value_truncate(inode, vb, xi->xi_value_len, ctxt);
1499         if (ret < 0) {
1500                 mlog_errno(ret);
1501                 return ret;
1502         }
1503         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1504         if (ret < 0) {
1505                 mlog_errno(ret);
1506                 return ret;
1507         }
1508         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1509                                               xi->xi_value, xi->xi_value_len);
1510         if (ret < 0)
1511                 mlog_errno(ret);
1512
1513         return ret;
1514 }
1515
1516 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1517                                        int num_entries)
1518 {
1519         int free_space;
1520
1521         if (!needed_space)
1522                 return 0;
1523
1524         free_space = free_start -
1525                 sizeof(struct ocfs2_xattr_header) -
1526                 (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1527                 OCFS2_XATTR_HEADER_GAP;
1528         if (free_space < 0)
1529                 return -EIO;
1530         if (free_space < needed_space)
1531                 return -ENOSPC;
1532
1533         return 0;
1534 }
1535
1536 /* Give a pointer into the storage for the given offset */
1537 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1538 {
1539         BUG_ON(offset >= loc->xl_size);
1540         return loc->xl_ops->xlo_offset_pointer(loc, offset);
1541 }
1542
1543 /*
1544  * Wipe the name+value pair and allow the storage to reclaim it.  This
1545  * must be followed by either removal of the entry or a call to
1546  * ocfs2_xa_add_namevalue().
1547  */
1548 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1549 {
1550         loc->xl_ops->xlo_wipe_namevalue(loc);
1551 }
1552
1553 /*
1554  * Find lowest offset to a name+value pair.  This is the start of our
1555  * downward-growing free space.
1556  */
1557 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1558 {
1559         return loc->xl_ops->xlo_get_free_start(loc);
1560 }
1561
1562 /* Can we reuse loc->xl_entry for xi? */
1563 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1564                                     struct ocfs2_xattr_info *xi)
1565 {
1566         return loc->xl_ops->xlo_can_reuse(loc, xi);
1567 }
1568
1569 /* How much free space is needed to set the new value */
1570 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1571                                 struct ocfs2_xattr_info *xi)
1572 {
1573         return loc->xl_ops->xlo_check_space(loc, xi);
1574 }
1575
1576 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1577 {
1578         loc->xl_ops->xlo_add_entry(loc, name_hash);
1579         loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1580         /*
1581          * We can't leave the new entry's xe_name_offset at zero or
1582          * add_namevalue() will go nuts.  We set it to the size of our
1583          * storage so that it can never be less than any other entry.
1584          */
1585         loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1586 }
1587
1588 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1589                                    struct ocfs2_xattr_info *xi)
1590 {
1591         int size = namevalue_size_xi(xi);
1592         int nameval_offset;
1593         char *nameval_buf;
1594
1595         loc->xl_ops->xlo_add_namevalue(loc, size);
1596         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1597         loc->xl_entry->xe_name_len = xi->xi_name_len;
1598         ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1599         ocfs2_xattr_set_local(loc->xl_entry,
1600                               xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1601
1602         nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1603         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1604         memset(nameval_buf, 0, size);
1605         memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1606 }
1607
1608 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1609                                            int offset)
1610 {
1611         return (char *)loc->xl_header + offset;
1612 }
1613
1614 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1615                                     struct ocfs2_xattr_info *xi)
1616 {
1617         /*
1618          * Block storage is strict.  If the sizes aren't exact, we will
1619          * remove the old one and reinsert the new.
1620          */
1621         return namevalue_size_xe(loc->xl_entry) ==
1622                 namevalue_size_xi(xi);
1623 }
1624
1625 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1626 {
1627         struct ocfs2_xattr_header *xh = loc->xl_header;
1628         int i, count = le16_to_cpu(xh->xh_count);
1629         int offset, free_start = loc->xl_size;
1630
1631         for (i = 0; i < count; i++) {
1632                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1633                 if (offset < free_start)
1634                         free_start = offset;
1635         }
1636
1637         return free_start;
1638 }
1639
1640 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1641                                       struct ocfs2_xattr_info *xi)
1642 {
1643         int count = le16_to_cpu(loc->xl_header->xh_count);
1644         int free_start = ocfs2_xa_get_free_start(loc);
1645         int needed_space = ocfs2_xi_entry_usage(xi);
1646
1647         /*
1648          * Block storage will reclaim the original entry before inserting
1649          * the new value, so we only need the difference.  If the new
1650          * entry is smaller than the old one, we don't need anything.
1651          */
1652         if (loc->xl_entry) {
1653                 /* Don't need space if we're reusing! */
1654                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1655                         needed_space = 0;
1656                 else
1657                         needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1658         }
1659         if (needed_space < 0)
1660                 needed_space = 0;
1661         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1662 }
1663
1664 /*
1665  * Block storage for xattrs keeps the name+value pairs compacted.  When
1666  * we remove one, we have to shift any that preceded it towards the end.
1667  */
1668 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1669 {
1670         int i, offset;
1671         int namevalue_offset, first_namevalue_offset, namevalue_size;
1672         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1673         struct ocfs2_xattr_header *xh = loc->xl_header;
1674         int count = le16_to_cpu(xh->xh_count);
1675
1676         namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1677         namevalue_size = namevalue_size_xe(entry);
1678         first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1679
1680         /* Shift the name+value pairs */
1681         memmove((char *)xh + first_namevalue_offset + namevalue_size,
1682                 (char *)xh + first_namevalue_offset,
1683                 namevalue_offset - first_namevalue_offset);
1684         memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1685
1686         /* Now tell xh->xh_entries about it */
1687         for (i = 0; i < count; i++) {
1688                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1689                 if (offset < namevalue_offset)
1690                         le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1691                                      namevalue_size);
1692         }
1693
1694         /*
1695          * Note that we don't update xh_free_start or xh_name_value_len
1696          * because they're not used in block-stored xattrs.
1697          */
1698 }
1699
1700 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1701 {
1702         int count = le16_to_cpu(loc->xl_header->xh_count);
1703         loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1704         le16_add_cpu(&loc->xl_header->xh_count, 1);
1705         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1706 }
1707
1708 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1709 {
1710         int free_start = ocfs2_xa_get_free_start(loc);
1711
1712         loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1713 }
1714
1715 /*
1716  * Operations for xattrs stored in blocks.  This includes inline inode
1717  * storage and unindexed ocfs2_xattr_blocks.
1718  */
1719 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1720         .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1721         .xlo_check_space        = ocfs2_xa_block_check_space,
1722         .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1723         .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1724         .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1725         .xlo_add_entry          = ocfs2_xa_block_add_entry,
1726         .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1727 };
1728
1729 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1730                                             int offset)
1731 {
1732         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1733         int block, block_offset;
1734
1735         /* The header is at the front of the bucket */
1736         block = offset >> bucket->bu_inode->i_sb->s_blocksize_bits;
1737         block_offset = offset % bucket->bu_inode->i_sb->s_blocksize;
1738
1739         return bucket_block(bucket, block) + block_offset;
1740 }
1741
1742 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1743                                      struct ocfs2_xattr_info *xi)
1744 {
1745         return namevalue_size_xe(loc->xl_entry) >=
1746                 namevalue_size_xi(xi);
1747 }
1748
1749 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1750 {
1751         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1752         return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1753 }
1754
1755 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1756                                          int free_start, int size)
1757 {
1758         /*
1759          * We need to make sure that the name+value pair fits within
1760          * one block.
1761          */
1762         if (((free_start - size) >> sb->s_blocksize_bits) !=
1763             ((free_start - 1) >> sb->s_blocksize_bits))
1764                 free_start -= free_start % sb->s_blocksize;
1765
1766         return free_start;
1767 }
1768
1769 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1770                                        struct ocfs2_xattr_info *xi)
1771 {
1772         int rc;
1773         int count = le16_to_cpu(loc->xl_header->xh_count);
1774         int free_start = ocfs2_xa_get_free_start(loc);
1775         int needed_space = ocfs2_xi_entry_usage(xi);
1776         int size = namevalue_size_xi(xi);
1777         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1778         struct super_block *sb = bucket->bu_inode->i_sb;
1779
1780         /*
1781          * Bucket storage does not reclaim name+value pairs it cannot
1782          * reuse.  They live as holes until the bucket fills, and then
1783          * the bucket is defragmented.  However, the bucket can reclaim
1784          * the ocfs2_xattr_entry.
1785          */
1786         if (loc->xl_entry) {
1787                 /* Don't need space if we're reusing! */
1788                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1789                         needed_space = 0;
1790                 else
1791                         needed_space -= sizeof(struct ocfs2_xattr_entry);
1792         }
1793         BUG_ON(needed_space < 0);
1794
1795         if (free_start < size) {
1796                 if (needed_space)
1797                         return -ENOSPC;
1798         } else {
1799                 /*
1800                  * First we check if it would fit in the first place.
1801                  * Below, we align the free start to a block.  This may
1802                  * slide us below the minimum gap.  By checking unaligned
1803                  * first, we avoid that error.
1804                  */
1805                 rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1806                                                  count);
1807                 if (rc)
1808                         return rc;
1809                 free_start = ocfs2_bucket_align_free_start(sb, free_start,
1810                                                            size);
1811         }
1812         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1813 }
1814
1815 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1816 {
1817         le16_add_cpu(&loc->xl_header->xh_name_value_len,
1818                      -namevalue_size_xe(loc->xl_entry));
1819 }
1820
1821 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1822 {
1823         struct ocfs2_xattr_header *xh = loc->xl_header;
1824         int count = le16_to_cpu(xh->xh_count);
1825         int low = 0, high = count - 1, tmp;
1826         struct ocfs2_xattr_entry *tmp_xe;
1827
1828         /*
1829          * We keep buckets sorted by name_hash, so we need to find
1830          * our insert place.
1831          */
1832         while (low <= high && count) {
1833                 tmp = (low + high) / 2;
1834                 tmp_xe = &xh->xh_entries[tmp];
1835
1836                 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1837                         low = tmp + 1;
1838                 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1839                         high = tmp - 1;
1840                 else {
1841                         low = tmp;
1842                         break;
1843                 }
1844         }
1845
1846         if (low != count)
1847                 memmove(&xh->xh_entries[low + 1],
1848                         &xh->xh_entries[low],
1849                         ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1850
1851         le16_add_cpu(&xh->xh_count, 1);
1852         loc->xl_entry = &xh->xh_entries[low];
1853         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1854 }
1855
1856 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1857 {
1858         int free_start = ocfs2_xa_get_free_start(loc);
1859         struct ocfs2_xattr_header *xh = loc->xl_header;
1860         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1861         struct super_block *sb = bucket->bu_inode->i_sb;
1862         int nameval_offset;
1863
1864         free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1865         nameval_offset = free_start - size;
1866         loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1867         xh->xh_free_start = cpu_to_le16(nameval_offset);
1868         le16_add_cpu(&xh->xh_name_value_len, size);
1869
1870 }
1871
1872 /* Operations for xattrs stored in buckets. */
1873 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1874         .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1875         .xlo_check_space        = ocfs2_xa_bucket_check_space,
1876         .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1877         .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1878         .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1879         .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1880         .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1881 };
1882
1883 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1884 {
1885         int index, count;
1886         struct ocfs2_xattr_header *xh = loc->xl_header;
1887         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1888
1889         ocfs2_xa_wipe_namevalue(loc);
1890         loc->xl_entry = NULL;
1891
1892         le16_add_cpu(&xh->xh_count, -1);
1893         count = le16_to_cpu(xh->xh_count);
1894
1895         /*
1896          * Only zero out the entry if there are more remaining.  This is
1897          * important for an empty bucket, as it keeps track of the
1898          * bucket's hash value.  It doesn't hurt empty block storage.
1899          */
1900         if (count) {
1901                 index = ((char *)entry - (char *)&xh->xh_entries) /
1902                         sizeof(struct ocfs2_xattr_entry);
1903                 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1904                         (count - index) * sizeof(struct ocfs2_xattr_entry));
1905                 memset(&xh->xh_entries[count], 0,
1906                        sizeof(struct ocfs2_xattr_entry));
1907         }
1908 }
1909
1910 /*
1911  * Prepares loc->xl_entry to receive the new xattr.  This includes
1912  * properly setting up the name+value pair region.  If loc->xl_entry
1913  * already exists, it will take care of modifying it appropriately.
1914  * This also includes deleting entries, but don't call this to remove
1915  * a non-existant entry.  That's just a bug.
1916  *
1917  * Note that this modifies the data.  You did journal_access already,
1918  * right?
1919  */
1920 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
1921                                   struct ocfs2_xattr_info *xi,
1922                                   u32 name_hash)
1923 {
1924         int rc = 0;
1925         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
1926         char *nameval_buf;
1927
1928         if (!xi->xi_value) {
1929                 ocfs2_xa_remove_entry(loc);
1930                 goto out;
1931         }
1932
1933         rc = ocfs2_xa_check_space(loc, xi);
1934         if (rc)
1935                 goto out;
1936
1937         if (loc->xl_entry) {
1938                 if (ocfs2_xa_can_reuse_entry(loc, xi)) {
1939                         nameval_buf = ocfs2_xa_offset_pointer(loc,
1940                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
1941                         memset(nameval_buf + name_size, 0,
1942                                namevalue_size_xe(loc->xl_entry) - name_size);
1943                         loc->xl_entry->xe_value_size =
1944                                 cpu_to_le64(xi->xi_value_len);
1945                         goto out;
1946                 }
1947
1948                 ocfs2_xa_wipe_namevalue(loc);
1949         } else
1950                 ocfs2_xa_add_entry(loc, name_hash);
1951
1952         /*
1953          * If we get here, we have a blank entry.  Fill it.  We grow our
1954          * name+value pair back from the end.
1955          */
1956         ocfs2_xa_add_namevalue(loc, xi);
1957
1958 out:
1959         return rc;
1960 }
1961
1962 /*
1963  * Store the value portion of the name+value pair.  This is either an
1964  * inline value or the tree root of an external value.
1965  */
1966 static void ocfs2_xa_store_inline_value(struct ocfs2_xa_loc *loc,
1967                                         struct ocfs2_xattr_info *xi)
1968 {
1969         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1970         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
1971         int size = namevalue_size_xi(xi);
1972         char *nameval_buf;
1973
1974         if (!xi->xi_value)
1975                 return;
1976
1977         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1978         memcpy(nameval_buf + name_size, xi->xi_value, size - name_size);
1979 }
1980
1981 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
1982                                      struct inode *inode,
1983                                      struct buffer_head *bh,
1984                                      struct ocfs2_xattr_entry *entry)
1985 {
1986         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1987
1988         loc->xl_ops = &ocfs2_xa_block_loc_ops;
1989         loc->xl_storage = bh;
1990         loc->xl_entry = entry;
1991
1992         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1993                 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
1994         else {
1995                 BUG_ON(entry);
1996                 loc->xl_size = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1997         }
1998         loc->xl_header =
1999                 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2000                                               loc->xl_size);
2001 }
2002
2003 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2004                                           struct buffer_head *bh,
2005                                           struct ocfs2_xattr_entry *entry)
2006 {
2007         struct ocfs2_xattr_block *xb =
2008                 (struct ocfs2_xattr_block *)bh->b_data;
2009
2010         BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2011
2012         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2013         loc->xl_storage = bh;
2014         loc->xl_header = &(xb->xb_attrs.xb_header);
2015         loc->xl_entry = entry;
2016         loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2017                                              xb_attrs.xb_header);
2018 }
2019
2020 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2021                                            struct ocfs2_xattr_bucket *bucket,
2022                                            struct ocfs2_xattr_entry *entry)
2023 {
2024         loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2025         loc->xl_storage = bucket;
2026         loc->xl_header = bucket_xh(bucket);
2027         loc->xl_entry = entry;
2028         loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2029 }
2030
2031
2032 /*
2033  * ocfs2_xattr_set_entry()
2034  *
2035  * Set extended attribute entry into inode or block.
2036  *
2037  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
2038  * We first insert tree root(ocfs2_xattr_value_root) like a normal value,
2039  * then set value in B tree with set_value_outside().
2040  */
2041 static int ocfs2_xattr_set_entry(struct inode *inode,
2042                                  struct ocfs2_xattr_info *xi,
2043                                  struct ocfs2_xattr_search *xs,
2044                                  struct ocfs2_xattr_set_ctxt *ctxt,
2045                                  int flag)
2046 {
2047         struct ocfs2_xattr_entry *last;
2048         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2049         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2050         size_t min_offs = xs->end - xs->base;
2051         size_t size_l = 0;
2052         handle_t *handle = ctxt->handle;
2053         int free, i, ret;
2054         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->xi_name,
2055                                               xi->xi_name_len);
2056         struct ocfs2_xa_loc loc;
2057         struct ocfs2_xattr_info xi_l = {
2058                 .xi_name_index = xi->xi_name_index,
2059                 .xi_name = xi->xi_name,
2060                 .xi_name_len = xi->xi_name_len,
2061                 .xi_value = xi->xi_value,
2062                 .xi_value_len = xi->xi_value_len,
2063         };
2064         struct ocfs2_xattr_value_buf vb = {
2065                 .vb_bh = xs->xattr_bh,
2066                 .vb_access = ocfs2_journal_access_di,
2067         };
2068
2069         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
2070                 BUG_ON(xs->xattr_bh == xs->inode_bh);
2071                 vb.vb_access = ocfs2_journal_access_xb;
2072         } else
2073                 BUG_ON(xs->xattr_bh != xs->inode_bh);
2074
2075         /* Compute min_offs, last and free space. */
2076         last = xs->header->xh_entries;
2077
2078         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
2079                 size_t offs = le16_to_cpu(last->xe_name_offset);
2080                 if (offs < min_offs)
2081                         min_offs = offs;
2082                 last += 1;
2083         }
2084
2085         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2086         if (free < 0)
2087                 return -EIO;
2088
2089         if (!xs->not_found)
2090                 free += ocfs2_xe_entry_usage(xs->here);
2091
2092         /* Check free space in inode or block */
2093         if (xi->xi_value) {
2094                 if (free < ocfs2_xi_entry_usage(xi)) {
2095                         ret = -ENOSPC;
2096                         goto out;
2097                 }
2098                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2099                         size_l = namevalue_size_xi(xi);
2100                         xi_l.xi_value = (void *)&def_xv;
2101                         xi_l.xi_value_len = OCFS2_XATTR_ROOT_SIZE;
2102                 }
2103         }
2104
2105         if (!xs->not_found) {
2106                 /* For existing extended attribute */
2107                 size_t size = namevalue_size_xe(xs->here);
2108                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
2109                 void *val = xs->base + offs;
2110
2111                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
2112                         /* Replace existing local xattr with tree root */
2113                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
2114                                                             ctxt, &vb, offs);
2115                         if (ret < 0)
2116                                 mlog_errno(ret);
2117                         goto out;
2118                 } else if (!ocfs2_xattr_is_local(xs->here)) {
2119                         /* For existing xattr which has value outside */
2120                         vb.vb_xv = (struct ocfs2_xattr_value_root *)
2121                                 (val + OCFS2_XATTR_SIZE(xi->xi_name_len));
2122
2123                         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2124                                 /*
2125                                  * If new value need set outside also,
2126                                  * first truncate old value to new value,
2127                                  * then set new value with set_value_outside().
2128                                  */
2129                                 ret = ocfs2_xattr_value_truncate(inode,
2130                                                         &vb,
2131                                                         xi->xi_value_len,
2132                                                         ctxt);
2133                                 if (ret < 0) {
2134                                         mlog_errno(ret);
2135                                         goto out;
2136                                 }
2137
2138                                 ret = ocfs2_xattr_update_entry(inode,
2139                                                                handle,
2140                                                                xi,
2141                                                                xs,
2142                                                                &vb,
2143                                                                offs);
2144                                 if (ret < 0) {
2145                                         mlog_errno(ret);
2146                                         goto out;
2147                                 }
2148
2149                                 ret = __ocfs2_xattr_set_value_outside(inode,
2150                                                         handle,
2151                                                         &vb,
2152                                                         xi->xi_value,
2153                                                         xi->xi_value_len);
2154                                 if (ret < 0)
2155                                         mlog_errno(ret);
2156                                 goto out;
2157                         } else {
2158                                 /*
2159                                  * If new value need set in local,
2160                                  * just trucate old value to zero.
2161                                  */
2162                                  ret = ocfs2_xattr_value_truncate(inode,
2163                                                                   &vb,
2164                                                                   0,
2165                                                                   ctxt);
2166                                 if (ret < 0)
2167                                         mlog_errno(ret);
2168                         }
2169                 }
2170         }
2171
2172         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
2173                                       OCFS2_JOURNAL_ACCESS_WRITE);
2174         if (ret) {
2175                 mlog_errno(ret);
2176                 goto out;
2177         }
2178
2179         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
2180                 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
2181                                    OCFS2_JOURNAL_ACCESS_WRITE);
2182                 if (ret) {
2183                         mlog_errno(ret);
2184                         goto out;
2185                 }
2186         }
2187
2188         if (xs->xattr_bh == xs->inode_bh)
2189                 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2190                                          xs->not_found ? NULL : xs->here);
2191         else
2192                 ocfs2_init_xattr_block_xa_loc(&loc, xs->xattr_bh,
2193                                               xs->not_found ? NULL : xs->here);
2194
2195         /*
2196          * Prepare our entry and insert the inline value.  This will
2197          * be a value tree root for values that are larger than
2198          * OCFS2_XATTR_INLINE_SIZE.
2199          */
2200         ret = ocfs2_xa_prepare_entry(&loc, xi, name_hash);
2201         if (ret) {
2202                 if (ret != -ENOSPC)
2203                         mlog_errno(ret);
2204                 goto out;
2205         }
2206         /* XXX For now, until we make ocfs2_xa_prepare_entry() primary */
2207         BUG_ON(ret == -ENOSPC);
2208         ocfs2_xa_store_inline_value(&loc, xi);
2209         xs->here = loc.xl_entry;
2210
2211         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
2212                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
2213                 if (ret < 0) {
2214                         mlog_errno(ret);
2215                         goto out;
2216                 }
2217         }
2218
2219         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
2220             (flag & OCFS2_INLINE_XATTR_FL)) {
2221                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2222                 unsigned int xattrsize = osb->s_xattr_inline_size;
2223
2224                 /*
2225                  * Adjust extent record count or inline data size
2226                  * to reserve space for extended attribute.
2227                  */
2228                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2229                         struct ocfs2_inline_data *idata = &di->id2.i_data;
2230                         le16_add_cpu(&idata->id_count, -xattrsize);
2231                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2232                         struct ocfs2_extent_list *el = &di->id2.i_list;
2233                         le16_add_cpu(&el->l_count, -(xattrsize /
2234                                         sizeof(struct ocfs2_extent_rec)));
2235                 }
2236                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2237         }
2238         /* Update xattr flag */
2239         spin_lock(&oi->ip_lock);
2240         oi->ip_dyn_features |= flag;
2241         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2242         spin_unlock(&oi->ip_lock);
2243
2244         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
2245         if (ret < 0)
2246                 mlog_errno(ret);
2247
2248         if (!ret && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2249                 /*
2250                  * Set value outside in B tree.
2251                  * This is the second step for value size > INLINE_SIZE.
2252                  */
2253                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
2254                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
2255                                                     &vb, offs);
2256                 if (ret < 0) {
2257                         int ret2;
2258
2259                         mlog_errno(ret);
2260                         /*
2261                          * If set value outside failed, we have to clean
2262                          * the junk tree root we have already set in local.
2263                          */
2264                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
2265                                                    xi, xs, &vb, offs);
2266                         if (ret2 < 0)
2267                                 mlog_errno(ret2);
2268                 }
2269         }
2270 out:
2271         return ret;
2272 }
2273
2274 /*
2275  * In xattr remove, if it is stored outside and refcounted, we may have
2276  * the chance to split the refcount tree. So need the allocators.
2277  */
2278 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2279                                         struct ocfs2_xattr_value_root *xv,
2280                                         struct ocfs2_caching_info *ref_ci,
2281                                         struct buffer_head *ref_root_bh,
2282                                         struct ocfs2_alloc_context **meta_ac,
2283                                         int *ref_credits)
2284 {
2285         int ret, meta_add = 0;
2286         u32 p_cluster, num_clusters;
2287         unsigned int ext_flags;
2288
2289         *ref_credits = 0;
2290         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2291                                        &num_clusters,
2292                                        &xv->xr_list,
2293                                        &ext_flags);
2294         if (ret) {
2295                 mlog_errno(ret);
2296                 goto out;
2297         }
2298
2299         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2300                 goto out;
2301
2302         ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2303                                                  ref_root_bh, xv,
2304                                                  &meta_add, ref_credits);
2305         if (ret) {
2306                 mlog_errno(ret);
2307                 goto out;
2308         }
2309
2310         ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2311                                                 meta_add, meta_ac);
2312         if (ret)
2313                 mlog_errno(ret);
2314
2315 out:
2316         return ret;
2317 }
2318
2319 static int ocfs2_remove_value_outside(struct inode*inode,
2320                                       struct ocfs2_xattr_value_buf *vb,
2321                                       struct ocfs2_xattr_header *header,
2322                                       struct ocfs2_caching_info *ref_ci,
2323                                       struct buffer_head *ref_root_bh)
2324 {
2325         int ret = 0, i, ref_credits;
2326         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2327         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2328         void *val;
2329
2330         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2331
2332         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2333                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2334
2335                 if (ocfs2_xattr_is_local(entry))
2336                         continue;
2337
2338                 val = (void *)header +
2339                         le16_to_cpu(entry->xe_name_offset);
2340                 vb->vb_xv = (struct ocfs2_xattr_value_root *)
2341                         (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2342
2343                 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2344                                                          ref_ci, ref_root_bh,
2345                                                          &ctxt.meta_ac,
2346                                                          &ref_credits);
2347
2348                 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2349                                         ocfs2_remove_extent_credits(osb->sb));
2350                 if (IS_ERR(ctxt.handle)) {
2351                         ret = PTR_ERR(ctxt.handle);
2352                         mlog_errno(ret);
2353                         break;
2354                 }
2355
2356                 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2357                 if (ret < 0) {
2358                         mlog_errno(ret);
2359                         break;
2360                 }
2361
2362                 ocfs2_commit_trans(osb, ctxt.handle);
2363                 if (ctxt.meta_ac) {
2364                         ocfs2_free_alloc_context(ctxt.meta_ac);
2365                         ctxt.meta_ac = NULL;
2366                 }
2367         }
2368
2369         if (ctxt.meta_ac)
2370                 ocfs2_free_alloc_context(ctxt.meta_ac);
2371         ocfs2_schedule_truncate_log_flush(osb, 1);
2372         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2373         return ret;
2374 }
2375
2376 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2377                                     struct buffer_head *di_bh,
2378                                     struct ocfs2_caching_info *ref_ci,
2379                                     struct buffer_head *ref_root_bh)
2380 {
2381
2382         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2383         struct ocfs2_xattr_header *header;
2384         int ret;
2385         struct ocfs2_xattr_value_buf vb = {
2386                 .vb_bh = di_bh,
2387                 .vb_access = ocfs2_journal_access_di,
2388         };
2389
2390         header = (struct ocfs2_xattr_header *)
2391                  ((void *)di + inode->i_sb->s_blocksize -
2392                  le16_to_cpu(di->i_xattr_inline_size));
2393
2394         ret = ocfs2_remove_value_outside(inode, &vb, header,
2395                                          ref_ci, ref_root_bh);
2396
2397         return ret;
2398 }
2399
2400 struct ocfs2_rm_xattr_bucket_para {
2401         struct ocfs2_caching_info *ref_ci;
2402         struct buffer_head *ref_root_bh;
2403 };
2404
2405 static int ocfs2_xattr_block_remove(struct inode *inode,
2406                                     struct buffer_head *blk_bh,
2407                                     struct ocfs2_caching_info *ref_ci,
2408                                     struct buffer_head *ref_root_bh)
2409 {
2410         struct ocfs2_xattr_block *xb;
2411         int ret = 0;
2412         struct ocfs2_xattr_value_buf vb = {
2413                 .vb_bh = blk_bh,
2414                 .vb_access = ocfs2_journal_access_xb,
2415         };
2416         struct ocfs2_rm_xattr_bucket_para args = {
2417                 .ref_ci = ref_ci,
2418                 .ref_root_bh = ref_root_bh,
2419         };
2420
2421         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2422         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2423                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2424                 ret = ocfs2_remove_value_outside(inode, &vb, header,
2425                                                  ref_ci, ref_root_bh);
2426         } else
2427                 ret = ocfs2_iterate_xattr_index_block(inode,
2428                                                 blk_bh,
2429                                                 ocfs2_rm_xattr_cluster,
2430                                                 &args);
2431
2432         return ret;
2433 }
2434
2435 static int ocfs2_xattr_free_block(struct inode *inode,
2436                                   u64 block,
2437                                   struct ocfs2_caching_info *ref_ci,
2438                                   struct buffer_head *ref_root_bh)
2439 {
2440         struct inode *xb_alloc_inode;
2441         struct buffer_head *xb_alloc_bh = NULL;
2442         struct buffer_head *blk_bh = NULL;
2443         struct ocfs2_xattr_block *xb;
2444         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2445         handle_t *handle;
2446         int ret = 0;
2447         u64 blk, bg_blkno;
2448         u16 bit;
2449
2450         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2451         if (ret < 0) {
2452                 mlog_errno(ret);
2453                 goto out;
2454         }
2455
2456         ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2457         if (ret < 0) {
2458                 mlog_errno(ret);
2459                 goto out;
2460         }
2461
2462         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2463         blk = le64_to_cpu(xb->xb_blkno);
2464         bit = le16_to_cpu(xb->xb_suballoc_bit);
2465         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2466
2467         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2468                                 EXTENT_ALLOC_SYSTEM_INODE,
2469                                 le16_to_cpu(xb->xb_suballoc_slot));
2470         if (!xb_alloc_inode) {
2471                 ret = -ENOMEM;
2472                 mlog_errno(ret);
2473                 goto out;
2474         }
2475         mutex_lock(&xb_alloc_inode->i_mutex);
2476
2477         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2478         if (ret < 0) {
2479                 mlog_errno(ret);
2480                 goto out_mutex;
2481         }
2482
2483         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2484         if (IS_ERR(handle)) {
2485                 ret = PTR_ERR(handle);
2486                 mlog_errno(ret);
2487                 goto out_unlock;
2488         }
2489
2490         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2491                                        bit, bg_blkno, 1);
2492         if (ret < 0)
2493                 mlog_errno(ret);
2494
2495         ocfs2_commit_trans(osb, handle);
2496 out_unlock:
2497         ocfs2_inode_unlock(xb_alloc_inode, 1);
2498         brelse(xb_alloc_bh);
2499 out_mutex:
2500         mutex_unlock(&xb_alloc_inode->i_mutex);
2501         iput(xb_alloc_inode);
2502 out:
2503         brelse(blk_bh);
2504         return ret;
2505 }
2506
2507 /*
2508  * ocfs2_xattr_remove()
2509  *
2510  * Free extended attribute resources associated with this inode.
2511  */
2512 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2513 {
2514         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2515         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2516         struct ocfs2_refcount_tree *ref_tree = NULL;
2517         struct buffer_head *ref_root_bh = NULL;
2518         struct ocfs2_caching_info *ref_ci = NULL;
2519         handle_t *handle;
2520         int ret;
2521
2522         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2523                 return 0;
2524
2525         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2526                 return 0;
2527
2528         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2529                 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2530                                                le64_to_cpu(di->i_refcount_loc),
2531                                                1, &ref_tree, &ref_root_bh);
2532                 if (ret) {
2533                         mlog_errno(ret);
2534                         goto out;
2535                 }
2536                 ref_ci = &ref_tree->rf_ci;
2537
2538         }
2539
2540         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2541                 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2542                                                ref_ci, ref_root_bh);
2543                 if (ret < 0) {
2544                         mlog_errno(ret);
2545                         goto out;
2546                 }
2547         }
2548
2549         if (di->i_xattr_loc) {
2550                 ret = ocfs2_xattr_free_block(inode,
2551                                              le64_to_cpu(di->i_xattr_loc),
2552                                              ref_ci, ref_root_bh);
2553                 if (ret < 0) {
2554                         mlog_errno(ret);
2555                         goto out;
2556                 }
2557         }
2558
2559         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2560                                    OCFS2_INODE_UPDATE_CREDITS);
2561         if (IS_ERR(handle)) {
2562                 ret = PTR_ERR(handle);
2563                 mlog_errno(ret);
2564                 goto out;
2565         }
2566         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2567                                       OCFS2_JOURNAL_ACCESS_WRITE);
2568         if (ret) {
2569                 mlog_errno(ret);
2570                 goto out_commit;
2571         }
2572
2573         di->i_xattr_loc = 0;
2574
2575         spin_lock(&oi->ip_lock);
2576         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2577         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2578         spin_unlock(&oi->ip_lock);
2579
2580         ret = ocfs2_journal_dirty(handle, di_bh);
2581         if (ret < 0)
2582                 mlog_errno(ret);
2583 out_commit:
2584         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2585 out:
2586         if (ref_tree)
2587                 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2588         brelse(ref_root_bh);
2589         return ret;
2590 }
2591
2592 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2593                                         struct ocfs2_dinode *di)
2594 {
2595         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2596         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2597         int free;
2598
2599         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2600                 return 0;
2601
2602         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2603                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2604                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2605         } else if (ocfs2_inode_is_fast_symlink(inode)) {
2606                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2607                         le64_to_cpu(di->i_size);
2608         } else {
2609                 struct ocfs2_extent_list *el = &di->id2.i_list;
2610                 free = (le16_to_cpu(el->l_count) -
2611                         le16_to_cpu(el->l_next_free_rec)) *
2612                         sizeof(struct ocfs2_extent_rec);
2613         }
2614         if (free >= xattrsize)
2615                 return 1;
2616
2617         return 0;
2618 }
2619
2620 /*
2621  * ocfs2_xattr_ibody_find()
2622  *
2623  * Find extended attribute in inode block and
2624  * fill search info into struct ocfs2_xattr_search.
2625  */
2626 static int ocfs2_xattr_ibody_find(struct inode *inode,
2627                                   int name_index,
2628                                   const char *name,
2629                                   struct ocfs2_xattr_search *xs)
2630 {
2631         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2632         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2633         int ret;
2634         int has_space = 0;
2635
2636         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2637                 return 0;
2638
2639         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2640                 down_read(&oi->ip_alloc_sem);
2641                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2642                 up_read(&oi->ip_alloc_sem);
2643                 if (!has_space)
2644                         return 0;
2645         }
2646
2647         xs->xattr_bh = xs->inode_bh;
2648         xs->end = (void *)di + inode->i_sb->s_blocksize;
2649         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2650                 xs->header = (struct ocfs2_xattr_header *)
2651                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2652         else
2653                 xs->header = (struct ocfs2_xattr_header *)
2654                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2655         xs->base = (void *)xs->header;
2656         xs->here = xs->header->xh_entries;
2657
2658         /* Find the named attribute. */
2659         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2660                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2661                 if (ret && ret != -ENODATA)
2662                         return ret;
2663                 xs->not_found = ret;
2664         }
2665
2666         return 0;
2667 }
2668
2669 /*
2670  * ocfs2_xattr_ibody_set()
2671  *
2672  * Set, replace or remove an extended attribute into inode block.
2673  *
2674  */
2675 static int ocfs2_xattr_ibody_set(struct inode *inode,
2676                                  struct ocfs2_xattr_info *xi,
2677                                  struct ocfs2_xattr_search *xs,
2678                                  struct ocfs2_xattr_set_ctxt *ctxt)
2679 {
2680         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2681         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2682         int ret;
2683
2684         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2685                 return -ENOSPC;
2686
2687         down_write(&oi->ip_alloc_sem);
2688         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2689                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2690                         ret = -ENOSPC;
2691                         goto out;
2692                 }
2693         }
2694
2695         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2696                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2697 out:
2698         up_write(&oi->ip_alloc_sem);
2699
2700         return ret;
2701 }
2702
2703 /*
2704  * ocfs2_xattr_block_find()
2705  *
2706  * Find extended attribute in external block and
2707  * fill search info into struct ocfs2_xattr_search.
2708  */
2709 static int ocfs2_xattr_block_find(struct inode *inode,
2710                                   int name_index,
2711                                   const char *name,
2712                                   struct ocfs2_xattr_search *xs)
2713 {
2714         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2715         struct buffer_head *blk_bh = NULL;
2716         struct ocfs2_xattr_block *xb;
2717         int ret = 0;
2718
2719         if (!di->i_xattr_loc)
2720                 return ret;
2721
2722         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2723                                      &blk_bh);
2724         if (ret < 0) {
2725                 mlog_errno(ret);
2726                 return ret;
2727         }
2728
2729         xs->xattr_bh = blk_bh;
2730         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2731
2732         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2733                 xs->header = &xb->xb_attrs.xb_header;
2734                 xs->base = (void *)xs->header;
2735                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2736                 xs->here = xs->header->xh_entries;
2737
2738                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2739         } else
2740                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2741                                                    name_index,
2742                                                    name, xs);
2743
2744         if (ret && ret != -ENODATA) {
2745                 xs->xattr_bh = NULL;
2746                 goto cleanup;
2747         }
2748         xs->not_found = ret;
2749         return 0;
2750 cleanup:
2751         brelse(blk_bh);
2752
2753         return ret;
2754 }
2755
2756 static int ocfs2_create_xattr_block(handle_t *handle,
2757                                     struct inode *inode,
2758                                     struct buffer_head *inode_bh,
2759                                     struct ocfs2_alloc_context *meta_ac,
2760                                     struct buffer_head **ret_bh,
2761                                     int indexed)
2762 {
2763         int ret;
2764         u16 suballoc_bit_start;
2765         u32 num_got;
2766         u64 first_blkno;
2767         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2768         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2769         struct buffer_head *new_bh = NULL;
2770         struct ocfs2_xattr_block *xblk;
2771
2772         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2773                                       OCFS2_JOURNAL_ACCESS_CREATE);
2774         if (ret < 0) {
2775                 mlog_errno(ret);
2776                 goto end;
2777         }
2778
2779         ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2780                                    &suballoc_bit_start, &num_got,
2781                                    &first_blkno);
2782         if (ret < 0) {
2783                 mlog_errno(ret);
2784                 goto end;
2785         }
2786
2787         new_bh = sb_getblk(inode->i_sb, first_blkno);
2788         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2789
2790         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2791                                       new_bh,
2792                                       OCFS2_JOURNAL_ACCESS_CREATE);
2793         if (ret < 0) {
2794                 mlog_errno(ret);
2795                 goto end;
2796         }
2797
2798         /* Initialize ocfs2_xattr_block */
2799         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2800         memset(xblk, 0, inode->i_sb->s_blocksize);
2801         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2802         xblk->xb_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2803         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2804         xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2805         xblk->xb_blkno = cpu_to_le64(first_blkno);
2806
2807         if (indexed) {
2808                 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2809                 xr->xt_clusters = cpu_to_le32(1);
2810                 xr->xt_last_eb_blk = 0;
2811                 xr->xt_list.l_tree_depth = 0;
2812                 xr->xt_list.l_count = cpu_to_le16(
2813                                         ocfs2_xattr_recs_per_xb(inode->i_sb));
2814                 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2815                 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2816         }
2817
2818         ret = ocfs2_journal_dirty(handle, new_bh);
2819         if (ret < 0) {
2820                 mlog_errno(ret);
2821                 goto end;
2822         }
2823         di->i_xattr_loc = cpu_to_le64(first_blkno);
2824         ocfs2_journal_dirty(handle, inode_bh);
2825
2826         *ret_bh = new_bh;
2827         new_bh = NULL;
2828
2829 end:
2830         brelse(new_bh);
2831         return ret;
2832 }
2833
2834 /*
2835  * ocfs2_xattr_block_set()
2836  *
2837  * Set, replace or remove an extended attribute into external block.
2838  *
2839  */
2840 static int ocfs2_xattr_block_set(struct inode *inode,
2841                                  struct ocfs2_xattr_info *xi,
2842                                  struct ocfs2_xattr_search *xs,
2843                                  struct ocfs2_xattr_set_ctxt *ctxt)
2844 {
2845         struct buffer_head *new_bh = NULL;
2846         handle_t *handle = ctxt->handle;
2847         struct ocfs2_xattr_block *xblk = NULL;
2848         int ret;
2849
2850         if (!xs->xattr_bh) {
2851                 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2852                                                ctxt->meta_ac, &new_bh, 0);
2853                 if (ret) {
2854                         mlog_errno(ret);
2855                         goto end;
2856                 }
2857
2858                 xs->xattr_bh = new_bh;
2859                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2860                 xs->header = &xblk->xb_attrs.xb_header;
2861                 xs->base = (void *)xs->header;
2862                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2863                 xs->here = xs->header->xh_entries;
2864         } else
2865                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2866
2867         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2868                 /* Set extended attribute into external block */
2869                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2870                                             OCFS2_HAS_XATTR_FL);
2871                 if (!ret || ret != -ENOSPC)
2872                         goto end;
2873
2874                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2875                 if (ret)
2876                         goto end;
2877         }
2878
2879         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2880
2881 end:
2882
2883         return ret;
2884 }
2885
2886 /* Check whether the new xattr can be inserted into the inode. */
2887 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2888                                        struct ocfs2_xattr_info *xi,
2889                                        struct ocfs2_xattr_search *xs)
2890 {
2891         struct ocfs2_xattr_entry *last;
2892         int free, i;
2893         size_t min_offs = xs->end - xs->base;
2894
2895         if (!xs->header)
2896                 return 0;
2897
2898         last = xs->header->xh_entries;
2899
2900         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2901                 size_t offs = le16_to_cpu(last->xe_name_offset);
2902                 if (offs < min_offs)
2903                         min_offs = offs;
2904                 last += 1;
2905         }
2906
2907         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2908         if (free < 0)
2909                 return 0;
2910
2911         BUG_ON(!xs->not_found);
2912
2913         if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
2914                 return 1;
2915
2916         return 0;
2917 }
2918
2919 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2920                                      struct ocfs2_dinode *di,
2921                                      struct ocfs2_xattr_info *xi,
2922                                      struct ocfs2_xattr_search *xis,
2923                                      struct ocfs2_xattr_search *xbs,
2924                                      int *clusters_need,
2925                                      int *meta_need,
2926                                      int *credits_need)
2927 {
2928         int ret = 0, old_in_xb = 0;
2929         int clusters_add = 0, meta_add = 0, credits = 0;
2930         struct buffer_head *bh = NULL;
2931         struct ocfs2_xattr_block *xb = NULL;
2932         struct ocfs2_xattr_entry *xe = NULL;
2933         struct ocfs2_xattr_value_root *xv = NULL;
2934         char *base = NULL;
2935         int name_offset, name_len = 0;
2936         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2937                                                     xi->xi_value_len);
2938         u64 value_size;
2939
2940         /*
2941          * Calculate the clusters we need to write.
2942          * No matter whether we replace an old one or add a new one,
2943          * we need this for writing.
2944          */
2945         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2946                 credits += new_clusters *
2947                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2948
2949         if (xis->not_found && xbs->not_found) {
2950                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2951
2952                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2953                         clusters_add += new_clusters;
2954                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2955                                                         &def_xv.xv.xr_list,
2956                                                         new_clusters);
2957                 }
2958
2959                 goto meta_guess;
2960         }
2961
2962         if (!xis->not_found) {
2963                 xe = xis->here;
2964                 name_offset = le16_to_cpu(xe->xe_name_offset);
2965                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2966                 base = xis->base;
2967                 credits += OCFS2_INODE_UPDATE_CREDITS;
2968         } else {
2969                 int i, block_off = 0;
2970                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2971                 xe = xbs->here;
2972                 name_offset = le16_to_cpu(xe->xe_name_offset);
2973                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2974                 i = xbs->here - xbs->header->xh_entries;
2975                 old_in_xb = 1;
2976
2977                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2978                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2979                                                         bucket_xh(xbs->bucket),
2980                                                         i, &block_off,
2981                                                         &name_offset);
2982                         base = bucket_block(xbs->bucket, block_off);
2983                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2984                 } else {
2985                         base = xbs->base;
2986                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2987                 }
2988         }
2989
2990         /*
2991          * delete a xattr doesn't need metadata and cluster allocation.
2992          * so just calculate the credits and return.
2993          *
2994          * The credits for removing the value tree will be extended
2995          * by ocfs2_remove_extent itself.
2996          */
2997         if (!xi->xi_value) {
2998                 if (!ocfs2_xattr_is_local(xe))
2999                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3000
3001                 goto out;
3002         }
3003
3004         /* do cluster allocation guess first. */
3005         value_size = le64_to_cpu(xe->xe_value_size);
3006
3007         if (old_in_xb) {
3008                 /*
3009                  * In xattr set, we always try to set the xe in inode first,
3010                  * so if it can be inserted into inode successfully, the old
3011                  * one will be removed from the xattr block, and this xattr
3012                  * will be inserted into inode as a new xattr in inode.
3013                  */
3014                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3015                         clusters_add += new_clusters;
3016                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
3017                                     OCFS2_INODE_UPDATE_CREDITS;
3018                         if (!ocfs2_xattr_is_local(xe))
3019                                 credits += ocfs2_calc_extend_credits(
3020                                                         inode->i_sb,
3021                                                         &def_xv.xv.xr_list,
3022                                                         new_clusters);
3023                         goto out;
3024                 }
3025         }
3026
3027         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3028                 /* the new values will be stored outside. */
3029                 u32 old_clusters = 0;
3030
3031                 if (!ocfs2_xattr_is_local(xe)) {
3032                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3033                                                                  value_size);
3034                         xv = (struct ocfs2_xattr_value_root *)
3035                              (base + name_offset + name_len);
3036                         value_size = OCFS2_XATTR_ROOT_SIZE;
3037                 } else
3038                         xv = &def_xv.xv;
3039
3040                 if (old_clusters >= new_clusters) {
3041                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3042                         goto out;
3043                 } else {
3044                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3045                         clusters_add += new_clusters - old_clusters;
3046                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3047                                                              &xv->xr_list,
3048                                                              new_clusters -
3049                                                              old_clusters);
3050                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3051                                 goto out;
3052                 }
3053         } else {
3054                 /*
3055                  * Now the new value will be stored inside. So if the new
3056                  * value is smaller than the size of value root or the old
3057                  * value, we don't need any allocation, otherwise we have
3058                  * to guess metadata allocation.
3059                  */
3060                 if ((ocfs2_xattr_is_local(xe) &&
3061                      (value_size >= xi->xi_value_len)) ||
3062                     (!ocfs2_xattr_is_local(xe) &&
3063                      OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3064                         goto out;
3065         }
3066
3067 meta_guess:
3068         /* calculate metadata allocation. */
3069         if (di->i_xattr_loc) {
3070                 if (!xbs->xattr_bh) {
3071                         ret = ocfs2_read_xattr_block(inode,
3072                                                      le64_to_cpu(di->i_xattr_loc),
3073                                                      &bh);
3074                         if (ret) {
3075                                 mlog_errno(ret);
3076                                 goto out;
3077                         }
3078
3079                         xb = (struct ocfs2_xattr_block *)bh->b_data;
3080                 } else
3081                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3082
3083                 /*
3084                  * If there is already an xattr tree, good, we can calculate
3085                  * like other b-trees. Otherwise we may have the chance of
3086                  * create a tree, the credit calculation is borrowed from
3087                  * ocfs2_calc_extend_credits with root_el = NULL. And the
3088                  * new tree will be cluster based, so no meta is needed.
3089                  */
3090                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3091                         struct ocfs2_extent_list *el =
3092                                  &xb->xb_attrs.xb_root.xt_list;
3093                         meta_add += ocfs2_extend_meta_needed(el);
3094                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3095                                                              el, 1);
3096                 } else
3097                         credits += OCFS2_SUBALLOC_ALLOC + 1;
3098
3099                 /*
3100                  * This cluster will be used either for new bucket or for
3101                  * new xattr block.
3102                  * If the cluster size is the same as the bucket size, one
3103                  * more is needed since we may need to extend the bucket
3104                  * also.
3105                  */
3106                 clusters_add += 1;
3107                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3108                 if (OCFS2_XATTR_BUCKET_SIZE ==
3109                         OCFS2_SB(inode->i_sb)->s_clustersize) {
3110                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3111                         clusters_add += 1;
3112                 }
3113         } else {
3114                 meta_add += 1;
3115                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3116         }
3117 out:
3118         if (clusters_need)
3119                 *clusters_need = clusters_add;
3120         if (meta_need)
3121                 *meta_need = meta_add;
3122         if (credits_need)
3123                 *credits_need = credits;
3124         brelse(bh);
3125         return ret;
3126 }
3127
3128 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3129                                      struct ocfs2_dinode *di,
3130                                      struct ocfs2_xattr_info *xi,
3131                                      struct ocfs2_xattr_search *xis,
3132                                      struct ocfs2_xattr_search *xbs,
3133                                      struct ocfs2_xattr_set_ctxt *ctxt,
3134                                      int extra_meta,
3135                                      int *credits)
3136 {
3137         int clusters_add, meta_add, ret;
3138         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3139
3140         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3141
3142         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3143
3144         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3145                                         &clusters_add, &meta_add, credits);
3146         if (ret) {
3147                 mlog_errno(ret);
3148                 return ret;
3149         }
3150
3151         meta_add += extra_meta;
3152         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3153              "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3154
3155         if (meta_add) {
3156                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3157                                                         &ctxt->meta_ac);
3158                 if (ret) {
3159                         mlog_errno(ret);
3160                         goto out;
3161                 }
3162         }
3163
3164         if (clusters_add) {
3165                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3166                 if (ret)
3167                         mlog_errno(ret);
3168         }
3169 out:
3170         if (ret) {
3171                 if (ctxt->meta_ac) {
3172                         ocfs2_free_alloc_context(ctxt->meta_ac);
3173                         ctxt->meta_ac = NULL;
3174                 }
3175
3176                 /*
3177                  * We cannot have an error and a non null ctxt->data_ac.
3178                  */
3179         }
3180
3181         return ret;
3182 }
3183
3184 static int __ocfs2_xattr_set_handle(struct inode *inode,
3185                                     struct ocfs2_dinode *di,
3186                                     struct ocfs2_xattr_info *xi,
3187                                     struct ocfs2_xattr_search *xis,
3188                                     struct ocfs2_xattr_search *xbs,
3189                                     struct ocfs2_xattr_set_ctxt *ctxt)
3190 {
3191         int ret = 0, credits, old_found;
3192
3193         if (!xi->xi_value) {
3194                 /* Remove existing extended attribute */
3195                 if (!xis->not_found)
3196                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3197                 else if (!xbs->not_found)
3198                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3199         } else {
3200                 /* We always try to set extended attribute into inode first*/
3201                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3202                 if (!ret && !xbs->not_found) {
3203                         /*
3204                          * If succeed and that extended attribute existing in
3205                          * external block, then we will remove it.
3206                          */
3207                         xi->xi_value = NULL;
3208                         xi->xi_value_len = 0;
3209
3210                         old_found = xis->not_found;
3211                         xis->not_found = -ENODATA;
3212                         ret = ocfs2_calc_xattr_set_need(inode,
3213                                                         di,
3214                                                         xi,
3215                                                         xis,
3216                                                         xbs,
3217                                                         NULL,
3218                                                         NULL,
3219                                                         &credits);
3220                         xis->not_found = old_found;
3221                         if (ret) {
3222                                 mlog_errno(ret);
3223                                 goto out;
3224                         }
3225
3226                         ret = ocfs2_extend_trans(ctxt->handle, credits +
3227                                         ctxt->handle->h_buffer_credits);
3228                         if (ret) {
3229                                 mlog_errno(ret);
3230                                 goto out;
3231                         }
3232                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3233                 } else if (ret == -ENOSPC) {
3234                         if (di->i_xattr_loc && !xbs->xattr_bh) {
3235                                 ret = ocfs2_xattr_block_find(inode,
3236                                                              xi->xi_name_index,
3237                                                              xi->xi_name, xbs);
3238                                 if (ret)
3239                                         goto out;
3240
3241                                 old_found = xis->not_found;
3242                                 xis->not_found = -ENODATA;
3243                                 ret = ocfs2_calc_xattr_set_need(inode,
3244                                                                 di,
3245                                                                 xi,
3246                                                                 xis,
3247                                                                 xbs,
3248                                                                 NULL,
3249                                                                 NULL,
3250                                                                 &credits);
3251                                 xis->not_found = old_found;
3252                                 if (ret) {
3253                                         mlog_errno(ret);
3254                                         goto out;
3255                                 }
3256
3257                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3258                                         ctxt->handle->h_buffer_credits);
3259                                 if (ret) {
3260                                         mlog_errno(ret);
3261                                         goto out;
3262                                 }
3263                         }
3264                         /*
3265                          * If no space in inode, we will set extended attribute
3266                          * into external block.
3267                          */
3268                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3269                         if (ret)
3270                                 goto out;
3271                         if (!xis->not_found) {
3272                                 /*
3273                                  * If succeed and that extended attribute
3274                                  * existing in inode, we will remove it.
3275                                  */
3276                                 xi->xi_value = NULL;
3277                                 xi->xi_value_len = 0;
3278                                 xbs->not_found = -ENODATA;
3279                                 ret = ocfs2_calc_xattr_set_need(inode,
3280                                                                 di,
3281                                                                 xi,
3282                                                                 xis,
3283                                                                 xbs,
3284                                                                 NULL,
3285                                                                 NULL,
3286                                                                 &credits);
3287                                 if (ret) {
3288                                         mlog_errno(ret);
3289                                         goto out;
3290                                 }
3291
3292                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3293                                                 ctxt->handle->h_buffer_credits);
3294                                 if (ret) {
3295                                         mlog_errno(ret);
3296                                         goto out;
3297                                 }
3298                                 ret = ocfs2_xattr_ibody_set(inode, xi,
3299                                                             xis, ctxt);
3300                         }
3301                 }
3302         }
3303
3304         if (!ret) {
3305                 /* Update inode ctime. */
3306                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3307                                               xis->inode_bh,
3308                                               OCFS2_JOURNAL_ACCESS_WRITE);
3309                 if (ret) {
3310                         mlog_errno(ret);
3311                         goto out;
3312                 }
3313
3314                 inode->i_ctime = CURRENT_TIME;
3315                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3316                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3317                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3318         }
3319 out:
3320         return ret;
3321 }
3322
3323 /*
3324  * This function only called duing creating inode
3325  * for init security/acl xattrs of the new inode.
3326  * All transanction credits have been reserved in mknod.
3327  */
3328 int ocfs2_xattr_set_handle(handle_t *handle,
3329                            struct inode *inode,
3330                            struct buffer_head *di_bh,
3331                            int name_index,
3332                            const char *name,
3333                            const void *value,
3334                            size_t value_len,
3335                            int flags,
3336                            struct ocfs2_alloc_context *meta_ac,
3337                            struct ocfs2_alloc_context *data_ac)
3338 {
3339         struct ocfs2_dinode *di;
3340         int ret;
3341
3342         struct ocfs2_xattr_info xi = {
3343                 .xi_name_index = name_index,
3344                 .xi_name = name,
3345                 .xi_name_len = strlen(name),
3346                 .xi_value = value,
3347                 .xi_value_len = value_len,
3348         };
3349
3350         struct ocfs2_xattr_search xis = {
3351                 .not_found = -ENODATA,
3352         };
3353
3354         struct ocfs2_xattr_search xbs = {
3355                 .not_found = -ENODATA,
3356         };
3357
3358         struct ocfs2_xattr_set_ctxt ctxt = {
3359                 .handle = handle,
3360                 .meta_ac = meta_ac,
3361                 .data_ac = data_ac,
3362         };
3363
3364         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3365                 return -EOPNOTSUPP;
3366
3367         /*
3368          * In extreme situation, may need xattr bucket when
3369          * block size is too small. And we have already reserved
3370          * the credits for bucket in mknod.
3371          */
3372         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3373                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
3374                 if (!xbs.bucket) {
3375                         mlog_errno(-ENOMEM);
3376                         return -ENOMEM;
3377                 }
3378         }
3379
3380         xis.inode_bh = xbs.inode_bh = di_bh;
3381         di = (struct ocfs2_dinode *)di_bh->b_data;
3382
3383         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3384
3385         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3386         if (ret)
3387                 goto cleanup;
3388         if (xis.not_found) {
3389                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3390                 if (ret)
3391                         goto cleanup;
3392         }
3393
3394         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3395
3396 cleanup:
3397         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3398         brelse(xbs.xattr_bh);
3399         ocfs2_xattr_bucket_free(xbs.bucket);
3400
3401         return ret;
3402 }
3403
3404 /*
3405  * ocfs2_xattr_set()
3406  *
3407  * Set, replace or remove an extended attribute for this inode.
3408  * value is NULL to remove an existing extended attribute, else either
3409  * create or replace an extended attribute.
3410  */
3411 int ocfs2_xattr_set(struct inode *inode,
3412                     int name_index,
3413                     const char *name,
3414                     const void *value,
3415                     size_t value_len,
3416                     int flags)
3417 {
3418         struct buffer_head *di_bh = NULL;
3419         struct ocfs2_dinode *di;
3420         int ret, credits, ref_meta = 0, ref_credits = 0;
3421         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3422         struct inode *tl_inode = osb->osb_tl_inode;
3423         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3424         struct ocfs2_refcount_tree *ref_tree = NULL;
3425
3426         struct ocfs2_xattr_info xi = {
3427                 .xi_name_index = name_index,
3428                 .xi_name = name,
3429                 .xi_name_len = strlen(name),
3430                 .xi_value = value,
3431                 .xi_value_len = value_len,
3432         };
3433
3434         struct ocfs2_xattr_search xis = {
3435                 .not_found = -ENODATA,
3436         };
3437
3438         struct ocfs2_xattr_search xbs = {
3439                 .not_found = -ENODATA,
3440         };
3441
3442         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3443                 return -EOPNOTSUPP;
3444
3445         /*
3446          * Only xbs will be used on indexed trees.  xis doesn't need a
3447          * bucket.
3448          */
3449         xbs.bucket = ocfs2_xattr_bucket_new(inode);
3450         if (!xbs.bucket) {
3451                 mlog_errno(-ENOMEM);
3452                 return -ENOMEM;
3453         }
3454
3455         ret = ocfs2_inode_lock(inode, &di_bh, 1);
3456         if (ret < 0) {
3457                 mlog_errno(ret);
3458                 goto cleanup_nolock;
3459         }
3460         xis.inode_bh = xbs.inode_bh = di_bh;
3461         di = (struct ocfs2_dinode *)di_bh->b_data;
3462
3463         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3464         /*
3465          * Scan inode and external block to find the same name
3466          * extended attribute and collect search infomation.
3467          */
3468         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3469         if (ret)
3470                 goto cleanup;
3471         if (xis.not_found) {
3472                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3473                 if (ret)
3474                         goto cleanup;
3475         }
3476
3477         if (xis.not_found && xbs.not_found) {
3478                 ret = -ENODATA;
3479                 if (flags & XATTR_REPLACE)
3480                         goto cleanup;
3481                 ret = 0;
3482                 if (!value)
3483                         goto cleanup;
3484         } else {
3485                 ret = -EEXIST;
3486                 if (flags & XATTR_CREATE)
3487                         goto cleanup;
3488         }
3489
3490         /* Check whether the value is refcounted and do some prepartion. */
3491         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3492             (!xis.not_found || !xbs.not_found)) {
3493                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3494                                                    &xis, &xbs, &ref_tree,
3495                                                    &ref_meta, &ref_credits);
3496                 if (ret) {
3497                         mlog_errno(ret);
3498                         goto cleanup;
3499                 }
3500         }
3501
3502         mutex_lock(&tl_inode->i_mutex);
3503
3504         if (ocfs2_truncate_log_needs_flush(osb)) {
3505                 ret = __ocfs2_flush_truncate_log(osb);
3506                 if (ret < 0) {
3507                         mutex_unlock(&tl_inode->i_mutex);
3508                         mlog_errno(ret);
3509                         goto cleanup;
3510                 }
3511         }
3512         mutex_unlock(&tl_inode->i_mutex);
3513
3514         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3515                                         &xbs, &ctxt, ref_meta, &credits);
3516         if (ret) {
3517                 mlog_errno(ret);
3518                 goto cleanup;
3519         }
3520
3521         /* we need to update inode's ctime field, so add credit for it. */
3522         credits += OCFS2_INODE_UPDATE_CREDITS;
3523         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3524         if (IS_ERR(ctxt.handle)) {
3525                 ret = PTR_ERR(ctxt.handle);
3526                 mlog_errno(ret);
3527                 goto cleanup;
3528         }
3529
3530         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3531
3532         ocfs2_commit_trans(osb, ctxt.handle);
3533
3534         if (ctxt.data_ac)
3535                 ocfs2_free_alloc_context(ctxt.data_ac);
3536         if (ctxt.meta_ac)
3537                 ocfs2_free_alloc_context(ctxt.meta_ac);
3538         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3539                 ocfs2_schedule_truncate_log_flush(osb, 1);
3540         ocfs2_run_deallocs(osb, &ctxt.dealloc);
3541
3542 cleanup:
3543         if (ref_tree)
3544                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3545         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3546         if (!value && !ret) {
3547                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3548                 if (ret)
3549                         mlog_errno(ret);
3550         }
3551         ocfs2_inode_unlock(inode, 1);
3552 cleanup_nolock:
3553         brelse(di_bh);
3554         brelse(xbs.xattr_bh);
3555         ocfs2_xattr_bucket_free(xbs.bucket);
3556
3557         return ret;
3558 }
3559
3560 /*
3561  * Find the xattr extent rec which may contains name_hash.
3562  * e_cpos will be the first name hash of the xattr rec.
3563  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3564  */
3565 static int ocfs2_xattr_get_rec(struct inode *inode,
3566                                u32 name_hash,
3567                                u64 *p_blkno,
3568                                u32 *e_cpos,
3569                                u32 *num_clusters,
3570                                struct ocfs2_extent_list *el)
3571 {
3572         int ret = 0, i;
3573         struct buffer_head *eb_bh = NULL;
3574         struct ocfs2_extent_block *eb;
3575         struct ocfs2_extent_rec *rec = NULL;
3576         u64 e_blkno = 0;
3577
3578         if (el->l_tree_depth) {
3579                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3580                                       &eb_bh);
3581                 if (ret) {
3582                         mlog_errno(ret);
3583                         goto out;
3584                 }
3585
3586                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3587                 el = &eb->h_list;
3588
3589                 if (el->l_tree_depth) {
3590                         ocfs2_error(inode->i_sb,
3591                                     "Inode %lu has non zero tree depth in "
3592                                     "xattr tree block %llu\n", inode->i_ino,
3593                                     (unsigned long long)eb_bh->b_blocknr);
3594                         ret = -EROFS;
3595                         goto out;
3596                 }
3597         }
3598
3599         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3600                 rec = &el->l_recs[i];
3601
3602                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3603                         e_blkno = le64_to_cpu(rec->e_blkno);
3604                         break;
3605                 }
3606         }
3607
3608         if (!e_blkno) {
3609                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3610                             "record (%u, %u, 0) in xattr", inode->i_ino,
3611                             le32_to_cpu(rec->e_cpos),
3612                             ocfs2_rec_clusters(el, rec));
3613                 ret = -EROFS;
3614                 goto out;
3615         }
3616
3617         *p_blkno = le64_to_cpu(rec->e_blkno);
3618         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3619         if (e_cpos)
3620                 *e_cpos = le32_to_cpu(rec->e_cpos);
3621 out:
3622         brelse(eb_bh);
3623         return ret;
3624 }
3625
3626 typedef int (xattr_bucket_func)(struct inode *inode,
3627                                 struct ocfs2_xattr_bucket *bucket,
3628                                 void *para);
3629
3630 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3631                                    struct ocfs2_xattr_bucket *bucket,
3632                                    int name_index,
3633                                    const char *name,
3634                                    u32 name_hash,
3635                                    u16 *xe_index,
3636                                    int *found)
3637 {
3638         int i, ret = 0, cmp = 1, block_off, new_offset;
3639         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3640         size_t name_len = strlen(name);
3641         struct ocfs2_xattr_entry *xe = NULL;
3642         char *xe_name;
3643
3644         /*
3645          * We don't use binary search in the bucket because there
3646          * may be multiple entries with the same name hash.
3647          */
3648         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3649                 xe = &xh->xh_entries[i];
3650
3651                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3652                         continue;
3653                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3654                         break;
3655
3656                 cmp = name_index - ocfs2_xattr_get_type(xe);
3657                 if (!cmp)
3658                         cmp = name_len - xe->xe_name_len;
3659                 if (cmp)
3660                         continue;
3661
3662                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3663                                                         xh,
3664                                                         i,
3665                                                         &block_off,
3666                                                         &new_offset);
3667                 if (ret) {
3668                         mlog_errno(ret);
3669                         break;
3670                 }
3671
3672
3673                 xe_name = bucket_block(bucket, block_off) + new_offset;
3674                 if (!memcmp(name, xe_name, name_len)) {
3675                         *xe_index = i;
3676                         *found = 1;
3677                         ret = 0;
3678                         break;
3679                 }
3680         }
3681
3682         return ret;
3683 }
3684
3685 /*
3686  * Find the specified xattr entry in a series of buckets.
3687  * This series start from p_blkno and last for num_clusters.
3688  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3689  * the num of the valid buckets.
3690  *
3691  * Return the buffer_head this xattr should reside in. And if the xattr's
3692  * hash is in the gap of 2 buckets, return the lower bucket.
3693  */
3694 static int ocfs2_xattr_bucket_find(struct inode *inode,
3695                                    int name_index,
3696                                    const char *name,
3697                                    u32 name_hash,
3698                                    u64 p_blkno,
3699                                    u32 first_hash,
3700                                    u32 num_clusters,
3701                                    struct ocfs2_xattr_search *xs)
3702 {
3703         int ret, found = 0;
3704         struct ocfs2_xattr_header *xh = NULL;
3705         struct ocfs2_xattr_entry *xe = NULL;
3706         u16 index = 0;
3707         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3708         int low_bucket = 0, bucket, high_bucket;
3709         struct ocfs2_xattr_bucket *search;
3710         u32 last_hash;
3711         u64 blkno, lower_blkno = 0;
3712
3713         search = ocfs2_xattr_bucket_new(inode);
3714         if (!search) {
3715                 ret = -ENOMEM;
3716                 mlog_errno(ret);
3717                 goto out;
3718         }
3719
3720         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3721         if (ret) {
3722                 mlog_errno(ret);
3723                 goto out;
3724         }
3725
3726         xh = bucket_xh(search);
3727         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3728         while (low_bucket <= high_bucket) {
3729                 ocfs2_xattr_bucket_relse(search);
3730
3731                 bucket = (low_bucket + high_bucket) / 2;
3732                 blkno = p_blkno + bucket * blk_per_bucket;
3733                 ret = ocfs2_read_xattr_bucket(search, blkno);
3734                 if (ret) {
3735                         mlog_errno(ret);
3736                         goto out;
3737                 }
3738
3739                 xh = bucket_xh(search);
3740                 xe = &xh->xh_entries[0];
3741                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3742                         high_bucket = bucket - 1;
3743                         continue;
3744                 }
3745
3746                 /*
3747                  * Check whether the hash of the last entry in our
3748                  * bucket is larger than the search one. for an empty
3749                  * bucket, the last one is also the first one.
3750                  */
3751                 if (xh->xh_count)
3752                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3753
3754                 last_hash = le32_to_cpu(xe->xe_name_hash);
3755
3756                 /* record lower_blkno which may be the insert place. */
3757                 lower_blkno = blkno;
3758
3759                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3760                         low_bucket = bucket + 1;
3761                         continue;
3762                 }
3763
3764                 /* the searched xattr should reside in this bucket if exists. */
3765                 ret = ocfs2_find_xe_in_bucket(inode, search,
3766                                               name_index, name, name_hash,
3767                                               &index, &found);
3768                 if (ret) {
3769                         mlog_errno(ret);
3770                         goto out;
3771                 }
3772                 break;
3773         }
3774
3775         /*
3776          * Record the bucket we have found.
3777          * When the xattr's hash value is in the gap of 2 buckets, we will
3778          * always set it to the previous bucket.
3779          */
3780         if (!lower_blkno)
3781                 lower_blkno = p_blkno;
3782
3783         /* This should be in cache - we just read it during the search */
3784         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3785         if (ret) {
3786                 mlog_errno(ret);
3787                 goto out;
3788         }
3789
3790         xs->header = bucket_xh(xs->bucket);
3791         xs->base = bucket_block(xs->bucket, 0);
3792         xs->end = xs->base + inode->i_sb->s_blocksize;
3793
3794         if (found) {
3795                 xs->here = &xs->header->xh_entries[index];
3796                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3797                      (unsigned long long)bucket_blkno(xs->bucket), index);
3798         } else
3799                 ret = -ENODATA;
3800
3801 out:
3802         ocfs2_xattr_bucket_free(search);
3803         return ret;
3804 }
3805
3806 static int ocfs2_xattr_index_block_find(struct inode *inode,
3807                                         struct buffer_head *root_bh,
3808                                         int name_index,
3809                                         const char *name,
3810                                         struct ocfs2_xattr_search *xs)
3811 {
3812         int ret;
3813         struct ocfs2_xattr_block *xb =
3814                         (struct ocfs2_xattr_block *)root_bh->b_data;
3815         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3816         struct ocfs2_extent_list *el = &xb_root->xt_list;
3817         u64 p_blkno = 0;
3818         u32 first_hash, num_clusters = 0;
3819         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3820
3821         if (le16_to_cpu(el->l_next_free_rec) == 0)
3822                 return -ENODATA;
3823
3824         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3825              name, name_hash, name_index);
3826
3827         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3828                                   &num_clusters, el);
3829         if (ret) {
3830                 mlog_errno(ret);
3831                 goto out;
3832         }
3833
3834         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3835
3836         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3837              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3838              first_hash);
3839
3840         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3841                                       p_blkno, first_hash, num_clusters, xs);
3842
3843 out:
3844         return ret;
3845 }
3846
3847 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3848                                        u64 blkno,
3849                                        u32 clusters,
3850                                        xattr_bucket_func *func,
3851                                        void *para)
3852 {
3853         int i, ret = 0;
3854         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3855         u32 num_buckets = clusters * bpc;
3856         struct ocfs2_xattr_bucket *bucket;
3857
3858         bucket = ocfs2_xattr_bucket_new(inode);
3859         if (!bucket) {
3860                 mlog_errno(-ENOMEM);
3861                 return -ENOMEM;
3862         }
3863
3864         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3865              clusters, (unsigned long long)blkno);
3866
3867         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3868                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3869                 if (ret) {
3870                         mlog_errno(ret);
3871                         break;
3872                 }
3873
3874                 /*
3875                  * The real bucket num in this series of blocks is stored
3876                  * in the 1st bucket.
3877                  */
3878                 if (i == 0)
3879                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3880
3881                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3882                      (unsigned long long)blkno,
3883                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3884                 if (func) {
3885                         ret = func(inode, bucket, para);
3886                         if (ret && ret != -ERANGE)
3887                                 mlog_errno(ret);
3888                         /* Fall through to bucket_relse() */
3889                 }
3890
3891                 ocfs2_xattr_bucket_relse(bucket);
3892                 if (ret)
3893                         break;
3894         }
3895
3896         ocfs2_xattr_bucket_free(bucket);
3897         return ret;
3898 }
3899
3900 struct ocfs2_xattr_tree_list {
3901         char *buffer;
3902         size_t buffer_size;
3903         size_t result;
3904 };
3905
3906 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3907                                              struct ocfs2_xattr_header *xh,
3908                                              int index,
3909                                              int *block_off,
3910                                              int *new_offset)
3911 {
3912         u16 name_offset;
3913
3914         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3915                 return -EINVAL;
3916
3917         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3918
3919         *block_off = name_offset >> sb->s_blocksize_bits;
3920         *new_offset = name_offset % sb->s_blocksize;
3921
3922         return 0;
3923 }
3924
3925 static int ocfs2_list_xattr_bucket(struct inode *inode,
3926                                    struct ocfs2_xattr_bucket *bucket,
3927                                    void *para)
3928 {
3929         int ret = 0, type;
3930         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3931         int i, block_off, new_offset;
3932         const char *prefix, *name;
3933
3934         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3935                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3936                 type = ocfs2_xattr_get_type(entry);
3937                 prefix = ocfs2_xattr_prefix(type);
3938
3939                 if (prefix) {
3940                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3941                                                                 bucket_xh(bucket),
3942                                                                 i,
3943                                                                 &block_off,
3944                                                                 &new_offset);
3945                         if (ret)
3946                                 break;
3947
3948                         name = (const char *)bucket_block(bucket, block_off) +
3949                                 new_offset;
3950                         ret = ocfs2_xattr_list_entry(xl->buffer,
3951                                                      xl->buffer_size,
3952                                                      &xl->result,
3953                                                      prefix, name,
3954                                                      entry->xe_name_len);
3955                         if (ret)
3956                                 break;
3957                 }
3958         }
3959
3960         return ret;
3961 }
3962
3963 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3964                                            struct buffer_head *blk_bh,
3965                                            xattr_tree_rec_func *rec_func,
3966                                            void *para)
3967 {
3968         struct ocfs2_xattr_block *xb =
3969                         (struct ocfs2_xattr_block *)blk_bh->b_data;
3970         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3971         int ret = 0;
3972         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3973         u64 p_blkno = 0;
3974
3975         if (!el->l_next_free_rec || !rec_func)
3976                 return 0;
3977
3978         while (name_hash > 0) {
3979                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3980                                           &e_cpos, &num_clusters, el);
3981                 if (ret) {
3982                         mlog_errno(ret);
3983                         break;
3984                 }
3985
3986                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3987                                num_clusters, para);
3988                 if (ret) {
3989                         if (ret != -ERANGE)
3990                                 mlog_errno(ret);
3991                         break;
3992                 }
3993
3994                 if (e_cpos == 0)
3995                         break;
3996
3997                 name_hash = e_cpos - 1;
3998         }
3999
4000         return ret;
4001
4002 }
4003
4004 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4005                                      struct buffer_head *root_bh,
4006                                      u64 blkno, u32 cpos, u32 len, void *para)
4007 {
4008         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4009                                            ocfs2_list_xattr_bucket, para);
4010 }
4011
4012 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4013                                              struct buffer_head *blk_bh,
4014                                              char *buffer,
4015                                              size_t buffer_size)
4016 {
4017         int ret;
4018         struct ocfs2_xattr_tree_list xl = {
4019                 .buffer = buffer,
4020                 .buffer_size = buffer_size,
4021                 .result = 0,
4022         };
4023
4024         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4025                                               ocfs2_list_xattr_tree_rec, &xl);
4026         if (ret) {
4027                 mlog_errno(ret);
4028                 goto out;
4029         }
4030
4031         ret = xl.result;
4032 out:
4033         return ret;
4034 }
4035
4036 static int cmp_xe(const void *a, const void *b)
4037 {
4038         const struct ocfs2_xattr_entry *l = a, *r = b;
4039         u32 l_hash = le32_to_cpu(l->xe_name_hash);
4040         u32 r_hash = le32_to_cpu(r->xe_name_hash);
4041
4042         if (l_hash > r_hash)
4043                 return 1;
4044         if (l_hash < r_hash)
4045                 return -1;
4046         return 0;
4047 }
4048
4049 static void swap_xe(void *a, void *b, int size)
4050 {
4051         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4052
4053         tmp = *l;
4054         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4055         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4056 }
4057
4058 /*
4059  * When the ocfs2_xattr_block is filled up, new bucket will be created
4060  * and all the xattr entries will be moved to the new bucket.
4061  * The header goes at the start of the bucket, and the names+values are
4062  * filled from the end.  This is why *target starts as the last buffer.
4063  * Note: we need to sort the entries since they are not saved in order
4064  * in the ocfs2_xattr_block.
4065  */
4066 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4067                                            struct buffer_head *xb_bh,
4068                                            struct ocfs2_xattr_bucket *bucket)
4069 {
4070         int i, blocksize = inode->i_sb->s_blocksize;
4071         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4072         u16 offset, size, off_change;
4073         struct ocfs2_xattr_entry *xe;
4074         struct ocfs2_xattr_block *xb =
4075                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
4076         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4077         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4078         u16 count = le16_to_cpu(xb_xh->xh_count);
4079         char *src = xb_bh->b_data;
4080         char *target = bucket_block(bucket, blks - 1);
4081
4082         mlog(0, "cp xattr from block %llu to bucket %llu\n",
4083              (unsigned long long)xb_bh->b_blocknr,
4084              (unsigned long long)bucket_blkno(bucket));
4085
4086         for (i = 0; i < blks; i++)
4087                 memset(bucket_block(bucket, i), 0, blocksize);
4088
4089         /*
4090          * Since the xe_name_offset is based on ocfs2_xattr_header,
4091          * there is a offset change corresponding to the change of
4092          * ocfs2_xattr_header's position.
4093          */
4094         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4095         xe = &xb_xh->xh_entries[count - 1];
4096         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4097         size = blocksize - offset;
4098
4099         /* copy all the names and values. */
4100         memcpy(target + offset, src + offset, size);
4101
4102         /* Init new header now. */
4103         xh->xh_count = xb_xh->xh_count;
4104         xh->xh_num_buckets = cpu_to_le16(1);
4105         xh->xh_name_value_len = cpu_to_le16(size);
4106         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4107
4108         /* copy all the entries. */
4109         target = bucket_block(bucket, 0);
4110         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4111         size = count * sizeof(struct ocfs2_xattr_entry);
4112         memcpy(target + offset, (char *)xb_xh + offset, size);
4113
4114         /* Change the xe offset for all the xe because of the move. */
4115         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4116                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4117         for (i = 0; i < count; i++)
4118                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4119
4120         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4121              offset, size, off_change);
4122
4123         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4124              cmp_xe, swap_xe);
4125 }
4126
4127 /*
4128  * After we move xattr from block to index btree, we have to
4129  * update ocfs2_xattr_search to the new xe and base.
4130  *
4131  * When the entry is in xattr block, xattr_bh indicates the storage place.
4132  * While if the entry is in index b-tree, "bucket" indicates the
4133  * real place of the xattr.
4134  */
4135 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4136                                             struct ocfs2_xattr_search *xs,
4137                                             struct buffer_head *old_bh)
4138 {
4139         char *buf = old_bh->b_data;
4140         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4141         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4142         int i;
4143
4144         xs->header = bucket_xh(xs->bucket);
4145         xs->base = bucket_block(xs->bucket, 0);
4146         xs->end = xs->base + inode->i_sb->s_blocksize;
4147
4148         if (xs->not_found)
4149                 return;
4150
4151         i = xs->here - old_xh->xh_entries;
4152         xs->here = &xs->header->xh_entries[i];
4153 }
4154
4155 static int ocfs2_xattr_create_index_block(struct inode *inode,
4156                                           struct ocfs2_xattr_search *xs,
4157                                           struct ocfs2_xattr_set_ctxt *ctxt)
4158 {
4159         int ret;
4160         u32 bit_off, len;
4161         u64 blkno;
4162         handle_t *handle = ctxt->handle;
4163         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4164         struct ocfs2_inode_info *oi = OCFS2_I(inode);
4165         struct buffer_head *xb_bh = xs->xattr_bh;
4166         struct ocfs2_xattr_block *xb =
4167                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4168         struct ocfs2_xattr_tree_root *xr;
4169         u16 xb_flags = le16_to_cpu(xb->xb_flags);
4170
4171         mlog(0, "create xattr index block for %llu\n",
4172              (unsigned long long)xb_bh->b_blocknr);
4173
4174         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4175         BUG_ON(!xs->bucket);
4176
4177         /*
4178          * XXX:
4179          * We can use this lock for now, and maybe move to a dedicated mutex
4180          * if performance becomes a problem later.
4181          */
4182         down_write(&oi->ip_alloc_sem);
4183
4184         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4185                                       OCFS2_JOURNAL_ACCESS_WRITE);
4186         if (ret) {
4187                 mlog_errno(ret);
4188                 goto out;
4189         }
4190
4191         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
4192                                      1, 1, &bit_off, &len);
4193         if (ret) {
4194                 mlog_errno(ret);
4195                 goto out;
4196         }
4197
4198         /*
4199          * The bucket may spread in many blocks, and
4200          * we will only touch the 1st block and the last block
4201          * in the whole bucket(one for entry and one for data).
4202          */
4203         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4204
4205         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4206              (unsigned long long)blkno);
4207
4208         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4209         if (ret) {
4210                 mlog_errno(ret);
4211                 goto out;
4212         }
4213
4214         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4215                                                 OCFS2_JOURNAL_ACCESS_CREATE);
4216         if (ret) {
4217                 mlog_errno(ret);
4218                 goto out;
4219         }
4220
4221         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4222         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4223
4224         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4225
4226         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4227         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4228                offsetof(struct ocfs2_xattr_block, xb_attrs));
4229
4230         xr = &xb->xb_attrs.xb_root;
4231         xr->xt_clusters = cpu_to_le32(1);
4232         xr->xt_last_eb_blk = 0;
4233         xr->xt_list.l_tree_depth = 0;
4234         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4235         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4236
4237         xr->xt_list.l_recs[0].e_cpos = 0;
4238         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4239         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4240
4241         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4242
4243         ocfs2_journal_dirty(handle, xb_bh);
4244
4245 out:
4246         up_write(&oi->ip_alloc_sem);
4247
4248         return ret;
4249 }
4250
4251 static int cmp_xe_offset(const void *a, const void *b)
4252 {
4253         const struct ocfs2_xattr_entry *l = a, *r = b;
4254         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4255         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4256
4257         if (l_name_offset < r_name_offset)
4258                 return 1;
4259         if (l_name_offset > r_name_offset)
4260                 return -1;
4261         return 0;
4262 }
4263
4264 /*
4265  * defrag a xattr bucket if we find that the bucket has some
4266  * holes beteen name/value pairs.
4267  * We will move all the name/value pairs to the end of the bucket
4268  * so that we can spare some space for insertion.
4269  */
4270 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4271                                      handle_t *handle,
4272                                      struct ocfs2_xattr_bucket *bucket)
4273 {
4274         int ret, i;
4275         size_t end, offset, len;
4276         struct ocfs2_xattr_header *xh;
4277         char *entries, *buf, *bucket_buf = NULL;
4278         u64 blkno = bucket_blkno(bucket);
4279         u16 xh_free_start;
4280         size_t blocksize = inode->i_sb->s_blocksize;
4281         struct ocfs2_xattr_entry *xe;
4282
4283         /*
4284          * In order to make the operation more efficient and generic,
4285          * we copy all the blocks into a contiguous memory and do the
4286          * defragment there, so if anything is error, we will not touch
4287          * the real block.
4288          */
4289         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4290         if (!bucket_buf) {
4291                 ret = -EIO;
4292                 goto out;
4293         }
4294
4295         buf = bucket_buf;
4296         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4297                 memcpy(buf, bucket_block(bucket, i), blocksize);
4298
4299         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4300                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4301         if (ret < 0) {
4302                 mlog_errno(ret);
4303                 goto out;
4304         }
4305
4306         xh = (struct ocfs2_xattr_header *)bucket_buf;
4307         entries = (char *)xh->xh_entries;
4308         xh_free_start = le16_to_cpu(xh->xh_free_start);
4309
4310         mlog(0, "adjust xattr bucket in %llu, count = %u, "
4311              "xh_free_start = %u, xh_name_value_len = %u.\n",
4312              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4313              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4314
4315         /*
4316          * sort all the entries by their offset.
4317          * the largest will be the first, so that we can
4318          * move them to the end one by one.
4319          */
4320         sort(entries, le16_to_cpu(xh->xh_count),
4321              sizeof(struct ocfs2_xattr_entry),
4322              cmp_xe_offset, swap_xe);
4323
4324         /* Move all name/values to the end of the bucket. */
4325         xe = xh->xh_entries;
4326         end = OCFS2_XATTR_BUCKET_SIZE;
4327         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4328                 offset = le16_to_cpu(xe->xe_name_offset);
4329                 len = namevalue_size_xe(xe);
4330
4331                 /*
4332                  * We must make sure that the name/value pair
4333                  * exist in the same block. So adjust end to
4334                  * the previous block end if needed.
4335                  */
4336                 if (((end - len) / blocksize !=
4337                         (end - 1) / blocksize))
4338                         end = end - end % blocksize;
4339
4340                 if (end > offset + len) {
4341                         memmove(bucket_buf + end - len,
4342                                 bucket_buf + offset, len);
4343                         xe->xe_name_offset = cpu_to_le16(end - len);
4344                 }
4345
4346                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4347                                 "bucket %llu\n", (unsigned long long)blkno);
4348
4349                 end -= len;
4350         }
4351
4352         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4353                         "bucket %llu\n", (unsigned long long)blkno);
4354
4355         if (xh_free_start == end)
4356                 goto out;
4357
4358         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4359         xh->xh_free_start = cpu_to_le16(end);
4360
4361         /* sort the entries by their name_hash. */
4362         sort(entries, le16_to_cpu(xh->xh_count),
4363              sizeof(struct ocfs2_xattr_entry),
4364              cmp_xe, swap_xe);
4365
4366         buf = bucket_buf;
4367         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4368                 memcpy(bucket_block(bucket, i), buf, blocksize);
4369         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4370
4371 out:
4372         kfree(bucket_buf);
4373         return ret;
4374 }
4375
4376 /*
4377  * prev_blkno points to the start of an existing extent.  new_blkno
4378  * points to a newly allocated extent.  Because we know each of our
4379  * clusters contains more than bucket, we can easily split one cluster
4380  * at a bucket boundary.  So we take the last cluster of the existing
4381  * extent and split it down the middle.  We move the last half of the
4382  * buckets in the last cluster of the existing extent over to the new
4383  * extent.
4384  *
4385  * first_bh is the buffer at prev_blkno so we can update the existing
4386  * extent's bucket count.  header_bh is the bucket were we were hoping
4387  * to insert our xattr.  If the bucket move places the target in the new
4388  * extent, we'll update first_bh and header_bh after modifying the old
4389  * extent.
4390  *
4391  * first_hash will be set as the 1st xe's name_hash in the new extent.
4392  */
4393 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4394                                                handle_t *handle,
4395                                                struct ocfs2_xattr_bucket *first,
4396                                                struct ocfs2_xattr_bucket *target,
4397                                                u64 new_blkno,
4398                                                u32 num_clusters,
4399                                                u32 *first_hash)
4400 {
4401         int ret;
4402         struct super_block *sb = inode->i_sb;
4403         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4404         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4405         int to_move = num_buckets / 2;
4406         u64 src_blkno;
4407         u64 last_cluster_blkno = bucket_blkno(first) +
4408                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4409
4410         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4411         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4412
4413         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4414              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4415
4416         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4417                                      last_cluster_blkno, new_blkno,
4418                                      to_move, first_hash);
4419         if (ret) {
4420                 mlog_errno(ret);
4421                 goto out;
4422         }
4423
4424         /* This is the first bucket that got moved */
4425         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4426
4427         /*
4428          * If the target bucket was part of the moved buckets, we need to
4429          * update first and target.
4430          */
4431         if (bucket_blkno(target) >= src_blkno) {
4432                 /* Find the block for the new target bucket */
4433                 src_blkno = new_blkno +
4434                         (bucket_blkno(target) - src_blkno);
4435
4436                 ocfs2_xattr_bucket_relse(first);
4437                 ocfs2_xattr_bucket_relse(target);
4438
4439                 /*
4440                  * These shouldn't fail - the buffers are in the
4441                  * journal from ocfs2_cp_xattr_bucket().
4442                  */
4443                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
4444                 if (ret) {
4445                         mlog_errno(ret);
4446                         goto out;
4447                 }
4448                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
4449                 if (ret)
4450                         mlog_errno(ret);
4451
4452         }
4453
4454 out:
4455         return ret;
4456 }
4457
4458 /*
4459  * Find the suitable pos when we divide a bucket into 2.
4460  * We have to make sure the xattrs with the same hash value exist
4461  * in the same bucket.
4462  *
4463  * If this ocfs2_xattr_header covers more than one hash value, find a
4464  * place where the hash value changes.  Try to find the most even split.
4465  * The most common case is that all entries have different hash values,
4466  * and the first check we make will find a place to split.
4467  */
4468 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4469 {
4470         struct ocfs2_xattr_entry *entries = xh->xh_entries;
4471         int count = le16_to_cpu(xh->xh_count);
4472         int delta, middle = count / 2;
4473
4474         /*
4475          * We start at the middle.  Each step gets farther away in both
4476          * directions.  We therefore hit the change in hash value
4477          * nearest to the middle.  Note that this loop does not execute for
4478          * count < 2.
4479          */
4480         for (delta = 0; delta < middle; delta++) {
4481                 /* Let's check delta earlier than middle */
4482                 if (cmp_xe(&entries[middle - delta - 1],
4483                            &entries[middle - delta]))
4484                         return middle - delta;
4485
4486                 /* For even counts, don't walk off the end */
4487                 if ((middle + delta + 1) == count)
4488                         continue;
4489
4490                 /* Now try delta past middle */
4491                 if (cmp_xe(&entries[middle + delta],
4492                            &entries[middle + delta + 1]))
4493                         return middle + delta + 1;
4494         }
4495
4496         /* Every entry had the same hash */
4497         return count;
4498 }
4499
4500 /*
4501  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4502  * first_hash will record the 1st hash of the new bucket.
4503  *
4504  * Normally half of the xattrs will be moved.  But we have to make
4505  * sure that the xattrs with the same hash value are stored in the
4506  * same bucket. If all the xattrs in this bucket have the same hash
4507  * value, the new bucket will be initialized as an empty one and the
4508  * first_hash will be initialized as (hash_value+1).
4509  */
4510 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4511                                     handle_t *handle,
4512                                     u64 blk,
4513                                     u64 new_blk,
4514                                     u32 *first_hash,
4515                                     int new_bucket_head)
4516 {
4517         int ret, i;
4518         int count, start, len, name_value_len = 0, name_offset = 0;
4519         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4520         struct ocfs2_xattr_header *xh;
4521         struct ocfs2_xattr_entry *xe;
4522         int blocksize = inode->i_sb->s_blocksize;
4523
4524         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4525              (unsigned long long)blk, (unsigned long long)new_blk);
4526
4527         s_bucket = ocfs2_xattr_bucket_new(inode);
4528         t_bucket = ocfs2_xattr_bucket_new(inode);
4529         if (!s_bucket || !t_bucket) {
4530                 ret = -ENOMEM;
4531                 mlog_errno(ret);
4532                 goto out;
4533         }
4534
4535         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4536         if (ret) {
4537                 mlog_errno(ret);
4538                 goto out;
4539         }
4540
4541         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4542                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4543         if (ret) {
4544                 mlog_errno(ret);
4545                 goto out;
4546         }
4547
4548         /*
4549          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4550          * there's no need to read it.
4551          */
4552         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4553         if (ret) {
4554                 mlog_errno(ret);
4555                 goto out;
4556         }
4557
4558         /*
4559          * Hey, if we're overwriting t_bucket, what difference does
4560          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4561          * same part of ocfs2_cp_xattr_bucket().
4562          */
4563         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4564                                                 new_bucket_head ?
4565                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4566                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4567         if (ret) {
4568                 mlog_errno(ret);
4569                 goto out;
4570         }
4571
4572         xh = bucket_xh(s_bucket);
4573         count = le16_to_cpu(xh->xh_count);
4574         start = ocfs2_xattr_find_divide_pos(xh);
4575
4576         if (start == count) {
4577                 xe = &xh->xh_entries[start-1];
4578
4579                 /*
4580                  * initialized a new empty bucket here.
4581                  * The hash value is set as one larger than
4582                  * that of the last entry in the previous bucket.
4583                  */
4584                 for (i = 0; i < t_bucket->bu_blocks; i++)
4585                         memset(bucket_block(t_bucket, i), 0, blocksize);
4586
4587                 xh = bucket_xh(t_bucket);
4588                 xh->xh_free_start = cpu_to_le16(blocksize);
4589                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4590                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4591
4592                 goto set_num_buckets;
4593         }
4594
4595         /* copy the whole bucket to the new first. */
4596         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4597
4598         /* update the new bucket. */
4599         xh = bucket_xh(t_bucket);
4600
4601         /*
4602          * Calculate the total name/value len and xh_free_start for
4603          * the old bucket first.
4604          */
4605         name_offset = OCFS2_XATTR_BUCKET_SIZE;
4606         name_value_len = 0;
4607         for (i = 0; i < start; i++) {
4608                 xe = &xh->xh_entries[i];
4609                 name_value_len += namevalue_size_xe(xe);
4610                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4611                         name_offset = le16_to_cpu(xe->xe_name_offset);
4612         }
4613
4614         /*
4615          * Now begin the modification to the new bucket.
4616          *
4617          * In the new bucket, We just move the xattr entry to the beginning
4618          * and don't touch the name/value. So there will be some holes in the
4619          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4620          * called.
4621          */
4622         xe = &xh->xh_entries[start];
4623         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4624         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4625              (int)((char *)xe - (char *)xh),
4626              (int)((char *)xh->xh_entries - (char *)xh));
4627         memmove((char *)xh->xh_entries, (char *)xe, len);
4628         xe = &xh->xh_entries[count - start];
4629         len = sizeof(struct ocfs2_xattr_entry) * start;
4630         memset((char *)xe, 0, len);
4631
4632         le16_add_cpu(&xh->xh_count, -start);
4633         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4634
4635         /* Calculate xh_free_start for the new bucket. */
4636         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4637         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4638                 xe = &xh->xh_entries[i];
4639                 if (le16_to_cpu(xe->xe_name_offset) <
4640                     le16_to_cpu(xh->xh_free_start))
4641                         xh->xh_free_start = xe->xe_name_offset;
4642         }
4643
4644 set_num_buckets:
4645         /* set xh->xh_num_buckets for the new xh. */
4646         if (new_bucket_head)
4647                 xh->xh_num_buckets = cpu_to_le16(1);
4648         else
4649                 xh->xh_num_buckets = 0;
4650
4651         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4652
4653         /* store the first_hash of the new bucket. */
4654         if (first_hash)
4655                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4656
4657         /*
4658          * Now only update the 1st block of the old bucket.  If we
4659          * just added a new empty bucket, there is no need to modify
4660          * it.
4661          */
4662         if (start == count)
4663                 goto out;
4664
4665         xh = bucket_xh(s_bucket);
4666         memset(&xh->xh_entries[start], 0,
4667                sizeof(struct ocfs2_xattr_entry) * (count - start));
4668         xh->xh_count = cpu_to_le16(start);
4669         xh->xh_free_start = cpu_to_le16(name_offset);
4670         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4671
4672         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4673
4674 out:
4675         ocfs2_xattr_bucket_free(s_bucket);
4676         ocfs2_xattr_bucket_free(t_bucket);
4677
4678         return ret;
4679 }
4680
4681 /*
4682  * Copy xattr from one bucket to another bucket.
4683  *
4684  * The caller must make sure that the journal transaction
4685  * has enough space for journaling.
4686  */
4687 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4688                                  handle_t *handle,
4689                                  u64 s_blkno,
4690                                  u64 t_blkno,
4691                                  int t_is_new)
4692 {
4693         int ret;
4694         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4695
4696         BUG_ON(s_blkno == t_blkno);
4697
4698         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4699              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4700              t_is_new);
4701
4702         s_bucket = ocfs2_xattr_bucket_new(inode);
4703         t_bucket = ocfs2_xattr_bucket_new(inode);
4704         if (!s_bucket || !t_bucket) {
4705                 ret = -ENOMEM;
4706                 mlog_errno(ret);
4707                 goto out;
4708         }
4709
4710         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4711         if (ret)
4712                 goto out;
4713
4714         /*
4715          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4716          * there's no need to read it.
4717          */
4718         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4719         if (ret)
4720                 goto out;
4721
4722         /*
4723          * Hey, if we're overwriting t_bucket, what difference does
4724          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4725          * cluster to fill, we came here from
4726          * ocfs2_mv_xattr_buckets(), and it is really new -
4727          * ACCESS_CREATE is required.  But we also might have moved data
4728          * out of t_bucket before extending back into it.
4729          * ocfs2_add_new_xattr_bucket() can do this - its call to
4730          * ocfs2_add_new_xattr_cluster() may have created a new extent
4731          * and copied out the end of the old extent.  Then it re-extends
4732          * the old extent back to create space for new xattrs.  That's
4733          * how we get here, and the bucket isn't really new.
4734          */
4735         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4736                                                 t_is_new ?
4737                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4738                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4739         if (ret)
4740                 goto out;
4741
4742         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4743         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4744
4745 out:
4746         ocfs2_xattr_bucket_free(t_bucket);
4747         ocfs2_xattr_bucket_free(s_bucket);
4748
4749         return ret;
4750 }
4751
4752 /*
4753  * src_blk points to the start of an existing extent.  last_blk points to
4754  * last cluster in that extent.  to_blk points to a newly allocated
4755  * extent.  We copy the buckets from the cluster at last_blk to the new
4756  * extent.  If start_bucket is non-zero, we skip that many buckets before
4757  * we start copying.  The new extent's xh_num_buckets gets set to the
4758  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4759  * by the same amount.
4760  */
4761 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4762                                   u64 src_blk, u64 last_blk, u64 to_blk,
4763                                   unsigned int start_bucket,
4764                                   u32 *first_hash)
4765 {
4766         int i, ret, credits;
4767         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4768         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4769         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4770         struct ocfs2_xattr_bucket *old_first, *new_first;
4771
4772         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4773              (unsigned long long)last_blk, (unsigned long long)to_blk);
4774
4775         BUG_ON(start_bucket >= num_buckets);
4776         if (start_bucket) {
4777                 num_buckets -= start_bucket;
4778                 last_blk += (start_bucket * blks_per_bucket);
4779         }
4780
4781         /* The first bucket of the original extent */
4782         old_first = ocfs2_xattr_bucket_new(inode);
4783         /* The first bucket of the new extent */
4784         new_first = ocfs2_xattr_bucket_new(inode);
4785         if (!old_first || !new_first) {
4786                 ret = -ENOMEM;
4787                 mlog_errno(ret);
4788                 goto out;
4789         }
4790
4791         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4792         if (ret) {
4793                 mlog_errno(ret);
4794                 goto out;
4795         }
4796
4797         /*
4798          * We need to update the first bucket of the old extent and all
4799          * the buckets going to the new extent.
4800          */
4801         credits = ((num_buckets + 1) * blks_per_bucket) +
4802                 handle->h_buffer_credits;
4803         ret = ocfs2_extend_trans(handle, credits);
4804         if (ret) {
4805                 mlog_errno(ret);
4806                 goto out;
4807         }
4808
4809         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4810                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4811         if (ret) {
4812                 mlog_errno(ret);
4813                 goto out;
4814         }
4815
4816         for (i = 0; i < num_buckets; i++) {
4817                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4818                                             last_blk + (i * blks_per_bucket),
4819                                             to_blk + (i * blks_per_bucket),
4820                                             1);
4821                 if (ret) {
4822                         mlog_errno(ret);
4823                         goto out;
4824                 }
4825         }
4826
4827         /*
4828          * Get the new bucket ready before we dirty anything
4829          * (This actually shouldn't fail, because we already dirtied
4830          * it once in ocfs2_cp_xattr_bucket()).
4831          */
4832         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4833         if (ret) {
4834                 mlog_errno(ret);
4835                 goto out;
4836         }
4837         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4838                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4839         if (ret) {
4840                 mlog_errno(ret);
4841                 goto out;
4842         }
4843
4844         /* Now update the headers */
4845         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4846         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4847
4848         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4849         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4850
4851         if (first_hash)
4852                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4853
4854 out:
4855         ocfs2_xattr_bucket_free(new_first);
4856         ocfs2_xattr_bucket_free(old_first);
4857         return ret;
4858 }
4859
4860 /*
4861  * Move some xattrs in this cluster to the new cluster.
4862  * This function should only be called when bucket size == cluster size.
4863  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4864  */
4865 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4866                                       handle_t *handle,
4867                                       u64 prev_blk,
4868                                       u64 new_blk,
4869                                       u32 *first_hash)
4870 {
4871         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4872         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4873
4874         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4875
4876         ret = ocfs2_extend_trans(handle, credits);
4877         if (ret) {
4878                 mlog_errno(ret);
4879                 return ret;
4880         }
4881
4882         /* Move half of the xattr in start_blk to the next bucket. */
4883         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4884                                           new_blk, first_hash, 1);
4885 }
4886
4887 /*
4888  * Move some xattrs from the old cluster to the new one since they are not
4889  * contiguous in ocfs2 xattr tree.
4890  *
4891  * new_blk starts a new separate cluster, and we will move some xattrs from
4892  * prev_blk to it. v_start will be set as the first name hash value in this
4893  * new cluster so that it can be used as e_cpos during tree insertion and
4894  * don't collide with our original b-tree operations. first_bh and header_bh
4895  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4896  * to extend the insert bucket.
4897  *
4898  * The problem is how much xattr should we move to the new one and when should
4899  * we update first_bh and header_bh?
4900  * 1. If cluster size > bucket size, that means the previous cluster has more
4901  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4902  *    update the first_bh and header_bh if the insert bucket has been moved
4903  *    to the new cluster.
4904  * 2. If cluster_size == bucket_size:
4905  *    a) If the previous extent rec has more than one cluster and the insert
4906  *       place isn't in the last cluster, copy the entire last cluster to the
4907  *       new one. This time, we don't need to upate the first_bh and header_bh
4908  *       since they will not be moved into the new cluster.
4909  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4910  *       the new one. And we set the extend flag to zero if the insert place is
4911  *       moved into the new allocated cluster since no extend is needed.
4912  */
4913 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4914                                             handle_t *handle,
4915                                             struct ocfs2_xattr_bucket *first,
4916                                             struct ocfs2_xattr_bucket *target,
4917                                             u64 new_blk,
4918                                             u32 prev_clusters,
4919                                             u32 *v_start,
4920                                             int *extend)
4921 {
4922         int ret;
4923
4924         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4925              (unsigned long long)bucket_blkno(first), prev_clusters,
4926              (unsigned long long)new_blk);
4927
4928         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4929                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4930                                                           handle,
4931                                                           first, target,
4932                                                           new_blk,
4933                                                           prev_clusters,
4934                                                           v_start);
4935                 if (ret)
4936                         mlog_errno(ret);
4937         } else {
4938                 /* The start of the last cluster in the first extent */
4939                 u64 last_blk = bucket_blkno(first) +
4940                         ((prev_clusters - 1) *
4941                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4942
4943                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4944                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4945                                                      bucket_blkno(first),
4946                                                      last_blk, new_blk, 0,
4947                                                      v_start);
4948                         if (ret)
4949                                 mlog_errno(ret);
4950                 } else {
4951                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4952                                                          last_blk, new_blk,
4953                                                          v_start);
4954                         if (ret)
4955                                 mlog_errno(ret);
4956
4957                         if ((bucket_blkno(target) == last_blk) && extend)
4958                                 *extend = 0;
4959                 }
4960         }
4961
4962         return ret;
4963 }
4964
4965 /*
4966  * Add a new cluster for xattr storage.
4967  *
4968  * If the new cluster is contiguous with the previous one, it will be
4969  * appended to the same extent record, and num_clusters will be updated.
4970  * If not, we will insert a new extent for it and move some xattrs in
4971  * the last cluster into the new allocated one.
4972  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4973  * lose the benefits of hashing because we'll have to search large leaves.
4974  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4975  * if it's bigger).
4976  *
4977  * first_bh is the first block of the previous extent rec and header_bh
4978  * indicates the bucket we will insert the new xattrs. They will be updated
4979  * when the header_bh is moved into the new cluster.
4980  */
4981 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4982                                        struct buffer_head *root_bh,
4983                                        struct ocfs2_xattr_bucket *first,
4984                                        struct ocfs2_xattr_bucket *target,
4985                                        u32 *num_clusters,
4986                                        u32 prev_cpos,
4987                                        int *extend,
4988                                        struct ocfs2_xattr_set_ctxt *ctxt)
4989 {
4990         int ret;
4991         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4992         u32 prev_clusters = *num_clusters;
4993         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4994         u64 block;
4995         handle_t *handle = ctxt->handle;
4996         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4997         struct ocfs2_extent_tree et;
4998
4999         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
5000              "previous xattr blkno = %llu\n",
5001              (unsigned long long)OCFS2_I(inode)->ip_blkno,
5002              prev_cpos, (unsigned long long)bucket_blkno(first));
5003
5004         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5005
5006         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5007                                       OCFS2_JOURNAL_ACCESS_WRITE);
5008         if (ret < 0) {
5009                 mlog_errno(ret);
5010                 goto leave;
5011         }
5012
5013         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
5014                                      clusters_to_add, &bit_off, &num_bits);
5015         if (ret < 0) {
5016                 if (ret != -ENOSPC)
5017                         mlog_errno(ret);
5018                 goto leave;
5019         }
5020
5021         BUG_ON(num_bits > clusters_to_add);
5022
5023         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5024         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5025              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5026
5027         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5028             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5029              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5030                 /*
5031                  * If this cluster is contiguous with the old one and
5032                  * adding this new cluster, we don't surpass the limit of
5033                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5034                  * initialized and used like other buckets in the previous
5035                  * cluster.
5036                  * So add it as a contiguous one. The caller will handle
5037                  * its init process.
5038                  */
5039                 v_start = prev_cpos + prev_clusters;
5040                 *num_clusters = prev_clusters + num_bits;
5041                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5042                      num_bits);
5043         } else {
5044                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5045                                                        handle,
5046                                                        first,
5047                                                        target,
5048                                                        block,
5049                                                        prev_clusters,
5050                                                        &v_start,
5051                                                        extend);
5052                 if (ret) {
5053                         mlog_errno(ret);
5054                         goto leave;
5055                 }
5056         }
5057
5058         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5059              num_bits, (unsigned long long)block, v_start);
5060         ret = ocfs2_insert_extent(handle, &et, v_start, block,
5061                                   num_bits, 0, ctxt->meta_ac);
5062         if (ret < 0) {
5063                 mlog_errno(ret);
5064                 goto leave;
5065         }
5066
5067         ret = ocfs2_journal_dirty(handle, root_bh);
5068         if (ret < 0)
5069                 mlog_errno(ret);
5070
5071 leave:
5072         return ret;
5073 }
5074
5075 /*
5076  * We are given an extent.  'first' is the bucket at the very front of
5077  * the extent.  The extent has space for an additional bucket past
5078  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5079  * of the target bucket.  We wish to shift every bucket past the target
5080  * down one, filling in that additional space.  When we get back to the
5081  * target, we split the target between itself and the now-empty bucket
5082  * at target+1 (aka, target_blkno + blks_per_bucket).
5083  */
5084 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5085                                      handle_t *handle,
5086                                      struct ocfs2_xattr_bucket *first,
5087                                      u64 target_blk,
5088                                      u32 num_clusters)
5089 {
5090         int ret, credits;
5091         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5092         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5093         u64 end_blk;
5094         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5095
5096         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5097              "from %llu, len = %u\n", (unsigned long long)target_blk,
5098              (unsigned long long)bucket_blkno(first), num_clusters);
5099
5100         /* The extent must have room for an additional bucket */
5101         BUG_ON(new_bucket >=
5102                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5103
5104         /* end_blk points to the last existing bucket */
5105         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5106
5107         /*
5108          * end_blk is the start of the last existing bucket.
5109          * Thus, (end_blk - target_blk) covers the target bucket and
5110          * every bucket after it up to, but not including, the last
5111          * existing bucket.  Then we add the last existing bucket, the
5112          * new bucket, and the first bucket (3 * blk_per_bucket).
5113          */
5114         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
5115                   handle->h_buffer_credits;
5116         ret = ocfs2_extend_trans(handle, credits);
5117         if (ret) {
5118                 mlog_errno(ret);
5119                 goto out;
5120         }
5121
5122         ret = ocfs2_xattr_bucket_journal_access(handle, first,
5123                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5124         if (ret) {
5125                 mlog_errno(ret);
5126                 goto out;
5127         }
5128
5129         while (end_blk != target_blk) {
5130                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5131                                             end_blk + blk_per_bucket, 0);
5132                 if (ret)
5133                         goto out;
5134                 end_blk -= blk_per_bucket;
5135         }
5136
5137         /* Move half of the xattr in target_blkno to the next bucket. */
5138         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5139                                         target_blk + blk_per_bucket, NULL, 0);
5140
5141         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5142         ocfs2_xattr_bucket_journal_dirty(handle, first);
5143
5144 out:
5145         return ret;
5146 }
5147
5148 /*
5149  * Add new xattr bucket in an extent record and adjust the buckets
5150  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5151  * bucket we want to insert into.
5152  *
5153  * In the easy case, we will move all the buckets after target down by
5154  * one. Half of target's xattrs will be moved to the next bucket.
5155  *
5156  * If current cluster is full, we'll allocate a new one.  This may not
5157  * be contiguous.  The underlying calls will make sure that there is
5158  * space for the insert, shifting buckets around if necessary.
5159  * 'target' may be moved by those calls.
5160  */
5161 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5162                                       struct buffer_head *xb_bh,
5163                                       struct ocfs2_xattr_bucket *target,
5164                                       struct ocfs2_xattr_set_ctxt *ctxt)
5165 {
5166         struct ocfs2_xattr_block *xb =
5167                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5168         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5169         struct ocfs2_extent_list *el = &xb_root->xt_list;
5170         u32 name_hash =
5171                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5172         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5173         int ret, num_buckets, extend = 1;
5174         u64 p_blkno;
5175         u32 e_cpos, num_clusters;
5176         /* The bucket at the front of the extent */
5177         struct ocfs2_xattr_bucket *first;
5178
5179         mlog(0, "Add new xattr bucket starting from %llu\n",
5180              (unsigned long long)bucket_blkno(target));
5181
5182         /* The first bucket of the original extent */
5183         first = ocfs2_xattr_bucket_new(inode);
5184         if (!first) {
5185                 ret = -ENOMEM;
5186                 mlog_errno(ret);
5187                 goto out;
5188         }
5189
5190         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5191                                   &num_clusters, el);
5192         if (ret) {
5193                 mlog_errno(ret);
5194                 goto out;
5195         }
5196
5197         ret = ocfs2_read_xattr_bucket(first, p_blkno);
5198         if (ret) {
5199                 mlog_errno(ret);
5200                 goto out;
5201         }
5202
5203         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5204         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5205                 /*
5206                  * This can move first+target if the target bucket moves
5207                  * to the new extent.
5208                  */
5209                 ret = ocfs2_add_new_xattr_cluster(inode,
5210                                                   xb_bh,
5211                                                   first,
5212                                                   target,
5213                                                   &num_clusters,
5214                                                   e_cpos,
5215                                                   &extend,
5216                                                   ctxt);
5217                 if (ret) {
5218                         mlog_errno(ret);
5219                         goto out;
5220                 }
5221         }
5222
5223         if (extend) {
5224                 ret = ocfs2_extend_xattr_bucket(inode,
5225                                                 ctxt->handle,
5226                                                 first,
5227                                                 bucket_blkno(target),
5228                                                 num_clusters);
5229                 if (ret)
5230                         mlog_errno(ret);
5231         }
5232
5233 out:
5234         ocfs2_xattr_bucket_free(first);
5235
5236         return ret;
5237 }
5238
5239 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5240                                         struct ocfs2_xattr_bucket *bucket,
5241                                         int offs)
5242 {
5243         int block_off = offs >> inode->i_sb->s_blocksize_bits;
5244
5245         offs = offs % inode->i_sb->s_blocksize;
5246         return bucket_block(bucket, block_off) + offs;
5247 }
5248
5249 /*
5250  * Set the xattr entry in the specified bucket.
5251  * The bucket is indicated by xs->bucket and it should have the enough
5252  * space for the xattr insertion.
5253  */
5254 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
5255                                            handle_t *handle,
5256                                            struct ocfs2_xattr_info *xi,
5257                                            struct ocfs2_xattr_search *xs,
5258                                            u32 name_hash,
5259                                            int local)
5260 {
5261         int ret;
5262         u64 blkno;
5263         struct ocfs2_xa_loc loc;
5264
5265         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
5266              (unsigned long)xi->xi_value_len, xi->xi_name_index,
5267              (unsigned long long)bucket_blkno(xs->bucket));
5268
5269         if (!xs->bucket->bu_bhs[1]) {
5270                 blkno = bucket_blkno(xs->bucket);
5271                 ocfs2_xattr_bucket_relse(xs->bucket);
5272                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
5273                 if (ret) {
5274                         mlog_errno(ret);
5275                         goto out;
5276                 }
5277         }
5278
5279         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5280                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5281         if (ret < 0) {
5282                 mlog_errno(ret);
5283                 goto out;
5284         }
5285
5286         ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5287                                        xs->not_found ? NULL : xs->here);
5288         ret = ocfs2_xa_prepare_entry(&loc, xi, name_hash);
5289         if (ret) {
5290                 if (ret != -ENOSPC)
5291                         mlog_errno(ret);
5292                 goto out;
5293         }
5294         /* XXX For now, until we make ocfs2_xa_prepare_entry() primary */
5295         BUG_ON(ret == -ENOSPC);
5296         ocfs2_xa_store_inline_value(&loc, xi);
5297         xs->here = loc.xl_entry;
5298
5299         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5300
5301 out:
5302         return ret;
5303 }
5304
5305 /*
5306  * Truncate the specified xe_off entry in xattr bucket.
5307  * bucket is indicated by header_bh and len is the new length.
5308  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5309  *
5310  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5311  */
5312 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5313                                              struct ocfs2_xattr_bucket *bucket,
5314                                              int xe_off,
5315                                              int len,
5316                                              struct ocfs2_xattr_set_ctxt *ctxt)
5317 {
5318         int ret, offset;
5319         u64 value_blk;
5320         struct ocfs2_xattr_entry *xe;
5321         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5322         size_t blocksize = inode->i_sb->s_blocksize;
5323         struct ocfs2_xattr_value_buf vb = {
5324                 .vb_access = ocfs2_journal_access,
5325         };
5326
5327         xe = &xh->xh_entries[xe_off];
5328
5329         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5330
5331         offset = le16_to_cpu(xe->xe_name_offset) +
5332                  OCFS2_XATTR_SIZE(xe->xe_name_len);
5333
5334         value_blk = offset / blocksize;
5335
5336         /* We don't allow ocfs2_xattr_value to be stored in different block. */
5337         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5338
5339         vb.vb_bh = bucket->bu_bhs[value_blk];
5340         BUG_ON(!vb.vb_bh);
5341
5342         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5343                 (vb.vb_bh->b_data + offset % blocksize);
5344
5345         /*
5346          * From here on out we have to dirty the bucket.  The generic
5347          * value calls only modify one of the bucket's bhs, but we need
5348          * to send the bucket at once.  So if they error, they *could* have
5349          * modified something.  We have to assume they did, and dirty
5350          * the whole bucket.  This leaves us in a consistent state.
5351          */
5352         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5353              xe_off, (unsigned long long)bucket_blkno(bucket), len);
5354         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5355         if (ret) {
5356                 mlog_errno(ret);
5357                 goto out;
5358         }
5359
5360         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5361                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5362         if (ret) {
5363                 mlog_errno(ret);
5364                 goto out;
5365         }
5366
5367         xe->xe_value_size = cpu_to_le64(len);
5368
5369         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5370
5371 out:
5372         return ret;
5373 }
5374
5375 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
5376                                         struct ocfs2_xattr_search *xs,
5377                                         int len,
5378                                         struct ocfs2_xattr_set_ctxt *ctxt)
5379 {
5380         int ret, offset;
5381         struct ocfs2_xattr_entry *xe = xs->here;
5382         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
5383
5384         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
5385
5386         offset = xe - xh->xh_entries;
5387         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
5388                                                 offset, len, ctxt);
5389         if (ret)
5390                 mlog_errno(ret);
5391
5392         return ret;
5393 }
5394
5395 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
5396                                                 handle_t *handle,
5397                                                 struct ocfs2_xattr_search *xs,
5398                                                 char *val,
5399                                                 int value_len)
5400 {
5401         int ret, offset, block_off;
5402         struct ocfs2_xattr_value_root *xv;
5403         struct ocfs2_xattr_entry *xe = xs->here;
5404         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5405         void *base;
5406         struct ocfs2_xattr_value_buf vb = {
5407                 .vb_access = ocfs2_journal_access,
5408         };
5409
5410         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
5411
5412         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
5413                                                 xe - xh->xh_entries,
5414                                                 &block_off,
5415                                                 &offset);
5416         if (ret) {
5417                 mlog_errno(ret);
5418                 goto out;
5419         }
5420
5421         base = bucket_block(xs->bucket, block_off);
5422         xv = (struct ocfs2_xattr_value_root *)(base + offset +
5423                  OCFS2_XATTR_SIZE(xe->xe_name_len));
5424
5425         vb.vb_xv = xv;
5426         vb.vb_bh = xs->bucket->bu_bhs[block_off];
5427         ret = __ocfs2_xattr_set_value_outside(inode, handle,
5428                                               &vb, val, value_len);
5429         if (ret)
5430                 mlog_errno(ret);
5431 out:
5432         return ret;
5433 }
5434
5435 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5436                                   struct buffer_head *root_bh,
5437                                   u64 blkno,
5438                                   u32 cpos,
5439                                   u32 len,
5440                                   void *para)
5441 {
5442         int ret;
5443         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5444         struct inode *tl_inode = osb->osb_tl_inode;
5445         handle_t *handle;
5446         struct ocfs2_xattr_block *xb =
5447                         (struct ocfs2_xattr_block *)root_bh->b_data;
5448         struct ocfs2_alloc_context *meta_ac = NULL;
5449         struct ocfs2_cached_dealloc_ctxt dealloc;
5450         struct ocfs2_extent_tree et;
5451
5452         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5453                                           ocfs2_delete_xattr_in_bucket, para);
5454         if (ret) {
5455                 mlog_errno(ret);
5456                 return ret;
5457         }
5458
5459         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5460
5461         ocfs2_init_dealloc_ctxt(&dealloc);
5462
5463         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5464              cpos, len, (unsigned long long)blkno);
5465
5466         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5467                                                len);
5468
5469         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5470         if (ret) {
5471                 mlog_errno(ret);
5472                 return ret;
5473         }
5474
5475         mutex_lock(&tl_inode->i_mutex);
5476
5477         if (ocfs2_truncate_log_needs_flush(osb)) {
5478                 ret = __ocfs2_flush_truncate_log(osb);
5479                 if (ret < 0) {
5480                         mlog_errno(ret);
5481                         goto out;
5482                 }
5483         }
5484
5485         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5486         if (IS_ERR(handle)) {
5487                 ret = -ENOMEM;
5488                 mlog_errno(ret);
5489                 goto out;
5490         }
5491
5492         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5493                                       OCFS2_JOURNAL_ACCESS_WRITE);
5494         if (ret) {
5495                 mlog_errno(ret);
5496                 goto out_commit;
5497         }
5498
5499         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5500                                   &dealloc);
5501         if (ret) {
5502                 mlog_errno(ret);
5503                 goto out_commit;
5504         }
5505
5506         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5507
5508         ret = ocfs2_journal_dirty(handle, root_bh);
5509         if (ret) {
5510                 mlog_errno(ret);
5511                 goto out_commit;
5512         }
5513
5514         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5515         if (ret)
5516                 mlog_errno(ret);
5517
5518 out_commit:
5519         ocfs2_commit_trans(osb, handle);
5520 out:
5521         ocfs2_schedule_truncate_log_flush(osb, 1);
5522
5523         mutex_unlock(&tl_inode->i_mutex);
5524
5525         if (meta_ac)
5526                 ocfs2_free_alloc_context(meta_ac);
5527
5528         ocfs2_run_deallocs(osb, &dealloc);
5529
5530         return ret;
5531 }
5532
5533 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
5534                                          handle_t *handle,
5535                                          struct ocfs2_xattr_search *xs)
5536 {
5537         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5538         struct ocfs2_xattr_entry *last = &xh->xh_entries[
5539                                                 le16_to_cpu(xh->xh_count) - 1];
5540         int ret = 0;
5541
5542         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5543                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5544         if (ret) {
5545                 mlog_errno(ret);
5546                 return;
5547         }
5548
5549         /* Remove the old entry. */
5550         memmove(xs->here, xs->here + 1,
5551                 (void *)last - (void *)xs->here);
5552         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
5553         le16_add_cpu(&xh->xh_count, -1);
5554
5555         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5556 }
5557
5558 /*
5559  * Set the xattr name/value in the bucket specified in xs.
5560  *
5561  * As the new value in xi may be stored in the bucket or in an outside cluster,
5562  * we divide the whole process into 3 steps:
5563  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
5564  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
5565  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
5566  * 4. If the clusters for the new outside value can't be allocated, we need
5567  *    to free the xattr we allocated in set.
5568  */
5569 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
5570                                      struct ocfs2_xattr_info *xi,
5571                                      struct ocfs2_xattr_search *xs,
5572                                      struct ocfs2_xattr_set_ctxt *ctxt)
5573 {
5574         int ret, local = 1;
5575         size_t value_len;
5576         char *val = (char *)xi->xi_value;
5577         struct ocfs2_xattr_entry *xe = xs->here;
5578         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->xi_name,
5579                                               xi->xi_name_len);
5580
5581         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
5582                 /*
5583                  * We need to truncate the xattr storage first.
5584                  *
5585                  * If both the old and new value are stored to
5586                  * outside block, we only need to truncate
5587                  * the storage and then set the value outside.
5588                  *
5589                  * If the new value should be stored within block,
5590                  * we should free all the outside block first and
5591                  * the modification to the xattr block will be done
5592                  * by following steps.
5593                  */
5594                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
5595                         value_len = xi->xi_value_len;
5596                 else
5597                         value_len = 0;
5598
5599                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5600                                                            value_len,
5601                                                            ctxt);
5602                 if (ret)
5603                         goto out;
5604
5605                 if (value_len)
5606                         goto set_value_outside;
5607         }
5608
5609         value_len = xi->xi_value_len;
5610         /* So we have to handle the inside block change now. */
5611         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
5612                 /*
5613                  * If the new value will be stored outside of block,
5614                  * initalize a new empty value root and insert it first.
5615                  */
5616                 local = 0;
5617                 xi->xi_value = &def_xv;
5618                 xi->xi_value_len = OCFS2_XATTR_ROOT_SIZE;
5619         }
5620
5621         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5622                                               name_hash, local);
5623         if (ret) {
5624                 mlog_errno(ret);
5625                 goto out;
5626         }
5627
5628         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5629                 goto out;
5630
5631         /* allocate the space now for the outside block storage. */
5632         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5633                                                    value_len, ctxt);
5634         if (ret) {
5635                 mlog_errno(ret);
5636
5637                 if (xs->not_found) {
5638                         /*
5639                          * We can't allocate enough clusters for outside
5640                          * storage and we have allocated xattr already,
5641                          * so need to remove it.
5642                          */
5643                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5644                 }
5645                 goto out;
5646         }
5647
5648 set_value_outside:
5649         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5650                                                    xs, val, value_len);
5651 out:
5652         return ret;
5653 }
5654
5655 /*
5656  * check whether the xattr bucket is filled up with the same hash value.
5657  * If we want to insert the xattr with the same hash, return -ENOSPC.
5658  * If we want to insert a xattr with different hash value, go ahead
5659  * and ocfs2_divide_xattr_bucket will handle this.
5660  */
5661 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5662                                               struct ocfs2_xattr_bucket *bucket,
5663                                               const char *name)
5664 {
5665         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5666         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5667
5668         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5669                 return 0;
5670
5671         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5672             xh->xh_entries[0].xe_name_hash) {
5673                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5674                      "hash = %u\n",
5675                      (unsigned long long)bucket_blkno(bucket),
5676                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5677                 return -ENOSPC;
5678         }
5679
5680         return 0;
5681 }
5682
5683 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5684                                              struct ocfs2_xattr_info *xi,
5685                                              struct ocfs2_xattr_search *xs,
5686                                              struct ocfs2_xattr_set_ctxt *ctxt)
5687 {
5688         struct ocfs2_xattr_header *xh;
5689         struct ocfs2_xattr_entry *xe;
5690         u16 count, header_size, xh_free_start;
5691         int free, max_free, need, old;
5692         size_t value_size = 0;
5693         size_t blocksize = inode->i_sb->s_blocksize;
5694         int ret, allocation = 0;
5695
5696         mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5697
5698 try_again:
5699         xh = xs->header;
5700         count = le16_to_cpu(xh->xh_count);
5701         xh_free_start = le16_to_cpu(xh->xh_free_start);
5702         header_size = sizeof(struct ocfs2_xattr_header) +
5703                         count * sizeof(struct ocfs2_xattr_entry);
5704         max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5705                 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5706
5707         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5708                         "of %u which exceed block size\n",
5709                         (unsigned long long)bucket_blkno(xs->bucket),
5710                         header_size);
5711
5712         if (xi->xi_value && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
5713                 value_size = OCFS2_XATTR_ROOT_SIZE;
5714         else if (xi->xi_value)
5715                 value_size = OCFS2_XATTR_SIZE(xi->xi_value_len);
5716
5717         if (xs->not_found)
5718                 need = sizeof(struct ocfs2_xattr_entry) +
5719                         OCFS2_XATTR_SIZE(xi->xi_name_len) + value_size;
5720         else {
5721                 need = value_size + OCFS2_XATTR_SIZE(xi->xi_name_len);
5722
5723                 /*
5724                  * We only replace the old value if the new length is smaller
5725                  * than the old one. Otherwise we will allocate new space in the
5726                  * bucket to store it.
5727                  */
5728                 xe = xs->here;
5729                 if (ocfs2_xattr_is_local(xe))
5730                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5731                 else
5732                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5733
5734                 if (old >= value_size)
5735                         need = 0;
5736         }
5737
5738         free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5739         /*
5740          * We need to make sure the new name/value pair
5741          * can exist in the same block.
5742          */
5743         if (xh_free_start % blocksize < need)
5744                 free -= xh_free_start % blocksize;
5745
5746         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5747              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5748              " %u\n", xs->not_found,
5749              (unsigned long long)bucket_blkno(xs->bucket),
5750              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5751              le16_to_cpu(xh->xh_name_value_len));
5752
5753         if (free < need ||
5754             (xs->not_found &&
5755              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5756                 if (need <= max_free &&
5757                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5758                         /*
5759                          * We can create the space by defragment. Since only the
5760                          * name/value will be moved, the xe shouldn't be changed
5761                          * in xs.
5762                          */
5763                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5764                                                         xs->bucket);
5765                         if (ret) {
5766                                 mlog_errno(ret);
5767                                 goto out;
5768                         }
5769
5770                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5771                         free = xh_free_start - header_size
5772                                 - OCFS2_XATTR_HEADER_GAP;
5773                         if (xh_free_start % blocksize < need)
5774                                 free -= xh_free_start % blocksize;
5775
5776                         if (free >= need)
5777                                 goto xattr_set;
5778
5779                         mlog(0, "Can't get enough space for xattr insert by "
5780                              "defragment. Need %u bytes, but we have %d, so "
5781                              "allocate new bucket for it.\n", need, free);
5782                 }
5783
5784                 /*
5785                  * We have to add new buckets or clusters and one
5786                  * allocation should leave us enough space for insert.
5787                  */
5788                 BUG_ON(allocation);
5789
5790                 /*
5791                  * We do not allow for overlapping ranges between buckets. And
5792                  * the maximum number of collisions we will allow for then is
5793                  * one bucket's worth, so check it here whether we need to
5794                  * add a new bucket for the insert.
5795                  */
5796                 ret = ocfs2_check_xattr_bucket_collision(inode,
5797                                                          xs->bucket,
5798                                                          xi->xi_name);
5799                 if (ret) {
5800                         mlog_errno(ret);
5801                         goto out;
5802                 }
5803
5804                 ret = ocfs2_add_new_xattr_bucket(inode,
5805                                                  xs->xattr_bh,
5806                                                  xs->bucket,
5807                                                  ctxt);
5808                 if (ret) {
5809                         mlog_errno(ret);
5810                         goto out;
5811                 }
5812
5813                 /*
5814                  * ocfs2_add_new_xattr_bucket() will have updated
5815                  * xs->bucket if it moved, but it will not have updated
5816                  * any of the other search fields.  Thus, we drop it and
5817                  * re-search.  Everything should be cached, so it'll be
5818                  * quick.
5819                  */
5820                 ocfs2_xattr_bucket_relse(xs->bucket);
5821                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5822                                                    xi->xi_name_index,
5823                                                    xi->xi_name, xs);
5824                 if (ret && ret != -ENODATA)
5825                         goto out;
5826                 xs->not_found = ret;
5827                 allocation = 1;
5828                 goto try_again;
5829         }
5830
5831 xattr_set:
5832         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5833 out:
5834         mlog_exit(ret);
5835         return ret;
5836 }
5837
5838 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5839                                         struct ocfs2_xattr_bucket *bucket,
5840                                         void *para)
5841 {
5842         int ret = 0, ref_credits;
5843         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5844         u16 i;
5845         struct ocfs2_xattr_entry *xe;
5846         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5847         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5848         int credits = ocfs2_remove_extent_credits(osb->sb) +
5849                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5850         struct ocfs2_xattr_value_root *xv;
5851         struct ocfs2_rm_xattr_bucket_para *args =
5852                         (struct ocfs2_rm_xattr_bucket_para *)para;
5853
5854         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5855
5856         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5857                 xe = &xh->xh_entries[i];
5858                 if (ocfs2_xattr_is_local(xe))
5859                         continue;
5860
5861                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5862                                                       i, &xv, NULL);
5863
5864                 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5865                                                          args->ref_ci,
5866                                                          args->ref_root_bh,
5867                                                          &ctxt.meta_ac,
5868                                                          &ref_credits);
5869
5870                 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5871                 if (IS_ERR(ctxt.handle)) {
5872                         ret = PTR_ERR(ctxt.handle);
5873                         mlog_errno(ret);
5874                         break;
5875                 }
5876
5877                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5878                                                         i, 0, &ctxt);
5879
5880                 ocfs2_commit_trans(osb, ctxt.handle);
5881                 if (ctxt.meta_ac) {
5882                         ocfs2_free_alloc_context(ctxt.meta_ac);
5883                         ctxt.meta_ac = NULL;
5884                 }
5885                 if (ret) {
5886                         mlog_errno(ret);
5887                         break;
5888                 }
5889         }
5890
5891         if (ctxt.meta_ac)
5892                 ocfs2_free_alloc_context(ctxt.meta_ac);
5893         ocfs2_schedule_truncate_log_flush(osb, 1);
5894         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5895         return ret;
5896 }
5897
5898 /*
5899  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5900  * or change the extent record flag), we need to recalculate
5901  * the metaecc for the whole bucket. So it is done here.
5902  *
5903  * Note:
5904  * We have to give the extra credits for the caller.
5905  */
5906 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5907                                             handle_t *handle,
5908                                             void *para)
5909 {
5910         int ret;
5911         struct ocfs2_xattr_bucket *bucket =
5912                         (struct ocfs2_xattr_bucket *)para;
5913
5914         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5915                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5916         if (ret) {
5917                 mlog_errno(ret);
5918                 return ret;
5919         }
5920
5921         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5922
5923         return 0;
5924 }
5925
5926 /*
5927  * Special action we need if the xattr value is refcounted.
5928  *
5929  * 1. If the xattr is refcounted, lock the tree.
5930  * 2. CoW the xattr if we are setting the new value and the value
5931  *    will be stored outside.
5932  * 3. In other case, decrease_refcount will work for us, so just
5933  *    lock the refcount tree, calculate the meta and credits is OK.
5934  *
5935  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5936  * currently CoW is a completed transaction, while this function
5937  * will also lock the allocators and let us deadlock. So we will
5938  * CoW the whole xattr value.
5939  */
5940 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5941                                         struct ocfs2_dinode *di,
5942                                         struct ocfs2_xattr_info *xi,
5943                                         struct ocfs2_xattr_search *xis,
5944                                         struct ocfs2_xattr_search *xbs,
5945                                         struct ocfs2_refcount_tree **ref_tree,
5946                                         int *meta_add,
5947                                         int *credits)
5948 {
5949         int ret = 0;
5950         struct ocfs2_xattr_block *xb;
5951         struct ocfs2_xattr_entry *xe;
5952         char *base;
5953         u32 p_cluster, num_clusters;
5954         unsigned int ext_flags;
5955         int name_offset, name_len;
5956         struct ocfs2_xattr_value_buf vb;
5957         struct ocfs2_xattr_bucket *bucket = NULL;
5958         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5959         struct ocfs2_post_refcount refcount;
5960         struct ocfs2_post_refcount *p = NULL;
5961         struct buffer_head *ref_root_bh = NULL;
5962
5963         if (!xis->not_found) {
5964                 xe = xis->here;
5965                 name_offset = le16_to_cpu(xe->xe_name_offset);
5966                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5967                 base = xis->base;
5968                 vb.vb_bh = xis->inode_bh;
5969                 vb.vb_access = ocfs2_journal_access_di;
5970         } else {
5971                 int i, block_off = 0;
5972                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5973                 xe = xbs->here;
5974                 name_offset = le16_to_cpu(xe->xe_name_offset);
5975                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5976                 i = xbs->here - xbs->header->xh_entries;
5977
5978                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5979                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5980                                                         bucket_xh(xbs->bucket),
5981                                                         i, &block_off,
5982                                                         &name_offset);
5983                         if (ret) {
5984                                 mlog_errno(ret);
5985                                 goto out;
5986                         }
5987                         base = bucket_block(xbs->bucket, block_off);
5988                         vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5989                         vb.vb_access = ocfs2_journal_access;
5990
5991                         if (ocfs2_meta_ecc(osb)) {
5992                                 /*create parameters for ocfs2_post_refcount. */
5993                                 bucket = xbs->bucket;
5994                                 refcount.credits = bucket->bu_blocks;
5995                                 refcount.para = bucket;
5996                                 refcount.func =
5997                                         ocfs2_xattr_bucket_post_refcount;
5998                                 p = &refcount;
5999                         }
6000                 } else {
6001                         base = xbs->base;
6002                         vb.vb_bh = xbs->xattr_bh;
6003                         vb.vb_access = ocfs2_journal_access_xb;
6004                 }
6005         }
6006
6007         if (ocfs2_xattr_is_local(xe))
6008                 goto out;
6009
6010         vb.vb_xv = (struct ocfs2_xattr_value_root *)
6011                                 (base + name_offset + name_len);
6012
6013         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
6014                                        &num_clusters, &vb.vb_xv->xr_list,
6015                                        &ext_flags);
6016         if (ret) {
6017                 mlog_errno(ret);
6018                 goto out;
6019         }
6020
6021         /*
6022          * We just need to check the 1st extent record, since we always
6023          * CoW the whole xattr. So there shouldn't be a xattr with
6024          * some REFCOUNT extent recs after the 1st one.
6025          */
6026         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
6027                 goto out;
6028
6029         ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
6030                                        1, ref_tree, &ref_root_bh);
6031         if (ret) {
6032                 mlog_errno(ret);
6033                 goto out;
6034         }
6035
6036         /*
6037          * If we are deleting the xattr or the new size will be stored inside,
6038          * cool, leave it there, the xattr truncate process will remove them
6039          * for us(it still needs the refcount tree lock and the meta, credits).
6040          * And the worse case is that every cluster truncate will split the
6041          * refcount tree, and make the original extent become 3. So we will need
6042          * 2 * cluster more extent recs at most.
6043          */
6044         if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
6045
6046                 ret = ocfs2_refcounted_xattr_delete_need(inode,
6047                                                          &(*ref_tree)->rf_ci,
6048                                                          ref_root_bh, vb.vb_xv,
6049                                                          meta_add, credits);
6050                 if (ret)
6051                         mlog_errno(ret);
6052                 goto out;
6053         }
6054
6055         ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
6056                                        *ref_tree, ref_root_bh, 0,
6057                                        le32_to_cpu(vb.vb_xv->xr_clusters), p);
6058         if (ret)
6059                 mlog_errno(ret);
6060
6061 out:
6062         brelse(ref_root_bh);
6063         return ret;
6064 }
6065
6066 /*
6067  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
6068  * The physical clusters will be added to refcount tree.
6069  */
6070 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
6071                                 struct ocfs2_xattr_value_root *xv,
6072                                 struct ocfs2_extent_tree *value_et,
6073                                 struct ocfs2_caching_info *ref_ci,
6074                                 struct buffer_head *ref_root_bh,
6075                                 struct ocfs2_cached_dealloc_ctxt *dealloc,
6076                                 struct ocfs2_post_refcount *refcount)
6077 {
6078         int ret = 0;
6079         u32 clusters = le32_to_cpu(xv->xr_clusters);
6080         u32 cpos, p_cluster, num_clusters;
6081         struct ocfs2_extent_list *el = &xv->xr_list;
6082         unsigned int ext_flags;
6083
6084         cpos = 0;
6085         while (cpos < clusters) {
6086                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
6087                                                &num_clusters, el, &ext_flags);
6088
6089                 cpos += num_clusters;
6090                 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
6091                         continue;
6092
6093                 BUG_ON(!p_cluster);
6094
6095                 ret = ocfs2_add_refcount_flag(inode, value_et,
6096                                               ref_ci, ref_root_bh,
6097                                               cpos - num_clusters,
6098                                               p_cluster, num_clusters,
6099                                               dealloc, refcount);
6100                 if (ret) {
6101                         mlog_errno(ret);
6102                         break;
6103                 }
6104         }
6105
6106         return ret;
6107 }
6108
6109 /*
6110  * Given a normal ocfs2_xattr_header, refcount all the entries which
6111  * have value stored outside.
6112  * Used for xattrs stored in inode and ocfs2_xattr_block.
6113  */
6114 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
6115                                 struct ocfs2_xattr_value_buf *vb,
6116                                 struct ocfs2_xattr_header *header,
6117                                 struct ocfs2_caching_info *ref_ci,
6118                                 struct buffer_head *ref_root_bh,
6119                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6120 {
6121
6122         struct ocfs2_xattr_entry *xe;
6123         struct ocfs2_xattr_value_root *xv;
6124         struct ocfs2_extent_tree et;
6125         int i, ret = 0;
6126
6127         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
6128                 xe = &header->xh_entries[i];
6129
6130                 if (ocfs2_xattr_is_local(xe))
6131                         continue;
6132
6133                 xv = (struct ocfs2_xattr_value_root *)((void *)header +
6134                         le16_to_cpu(xe->xe_name_offset) +
6135                         OCFS2_XATTR_SIZE(xe->xe_name_len));
6136
6137                 vb->vb_xv = xv;
6138                 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
6139
6140                 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
6141                                                         ref_ci, ref_root_bh,
6142                                                         dealloc, NULL);
6143                 if (ret) {
6144                         mlog_errno(ret);
6145                         break;
6146                 }
6147         }
6148
6149         return ret;
6150 }
6151
6152 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
6153                                 struct buffer_head *fe_bh,
6154                                 struct ocfs2_caching_info *ref_ci,
6155                                 struct buffer_head *ref_root_bh,
6156                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6157 {
6158         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6159         struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
6160                                 (fe_bh->b_data + inode->i_sb->s_blocksize -
6161                                 le16_to_cpu(di->i_xattr_inline_size));
6162         struct ocfs2_xattr_value_buf vb = {
6163                 .vb_bh = fe_bh,
6164                 .vb_access = ocfs2_journal_access_di,
6165         };
6166
6167         return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6168                                                   ref_ci, ref_root_bh, dealloc);
6169 }
6170
6171 struct ocfs2_xattr_tree_value_refcount_para {
6172         struct ocfs2_caching_info *ref_ci;
6173         struct buffer_head *ref_root_bh;
6174         struct ocfs2_cached_dealloc_ctxt *dealloc;
6175 };
6176
6177 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
6178                                            struct ocfs2_xattr_bucket *bucket,
6179                                            int offset,
6180                                            struct ocfs2_xattr_value_root **xv,
6181                                            struct buffer_head **bh)
6182 {
6183         int ret, block_off, name_offset;
6184         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
6185         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6186         void *base;
6187
6188         ret = ocfs2_xattr_bucket_get_name_value(sb,
6189                                                 bucket_xh(bucket),
6190                                                 offset,
6191                                                 &block_off,
6192                                                 &name_offset);
6193         if (ret) {
6194                 mlog_errno(ret);
6195                 goto out;
6196         }
6197
6198         base = bucket_block(bucket, block_off);
6199
6200         *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6201                          OCFS2_XATTR_SIZE(xe->xe_name_len));
6202
6203         if (bh)
6204                 *bh = bucket->bu_bhs[block_off];
6205 out:
6206         return ret;
6207 }
6208
6209 /*
6210  * For a given xattr bucket, refcount all the entries which
6211  * have value stored outside.
6212  */
6213 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6214                                              struct ocfs2_xattr_bucket *bucket,
6215                                              void *para)
6216 {
6217         int i, ret = 0;
6218         struct ocfs2_extent_tree et;
6219         struct ocfs2_xattr_tree_value_refcount_para *ref =
6220                         (struct ocfs2_xattr_tree_value_refcount_para *)para;
6221         struct ocfs2_xattr_header *xh =
6222                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6223         struct ocfs2_xattr_entry *xe;
6224         struct ocfs2_xattr_value_buf vb = {
6225                 .vb_access = ocfs2_journal_access,
6226         };
6227         struct ocfs2_post_refcount refcount = {
6228                 .credits = bucket->bu_blocks,
6229                 .para = bucket,
6230                 .func = ocfs2_xattr_bucket_post_refcount,
6231         };
6232         struct ocfs2_post_refcount *p = NULL;
6233
6234         /* We only need post_refcount if we support metaecc. */
6235         if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6236                 p = &refcount;
6237
6238         mlog(0, "refcount bucket %llu, count = %u\n",
6239              (unsigned long long)bucket_blkno(bucket),
6240              le16_to_cpu(xh->xh_count));
6241         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6242                 xe = &xh->xh_entries[i];
6243
6244                 if (ocfs2_xattr_is_local(xe))
6245                         continue;
6246
6247                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6248                                                       &vb.vb_xv, &vb.vb_bh);
6249                 if (ret) {
6250                         mlog_errno(ret);
6251                         break;
6252                 }
6253
6254                 ocfs2_init_xattr_value_extent_tree(&et,
6255                                                    INODE_CACHE(inode), &vb);
6256
6257                 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6258                                                         &et, ref->ref_ci,
6259                                                         ref->ref_root_bh,
6260                                                         ref->dealloc, p);
6261                 if (ret) {
6262                         mlog_errno(ret);
6263                         break;
6264                 }
6265         }
6266
6267         return ret;
6268
6269 }
6270
6271 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6272                                      struct buffer_head *root_bh,
6273                                      u64 blkno, u32 cpos, u32 len, void *para)
6274 {
6275         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6276                                            ocfs2_xattr_bucket_value_refcount,
6277                                            para);
6278 }
6279
6280 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6281                                 struct buffer_head *blk_bh,
6282                                 struct ocfs2_caching_info *ref_ci,
6283                                 struct buffer_head *ref_root_bh,
6284                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6285 {
6286         int ret = 0;
6287         struct ocfs2_xattr_block *xb =
6288                                 (struct ocfs2_xattr_block *)blk_bh->b_data;
6289
6290         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6291                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6292                 struct ocfs2_xattr_value_buf vb = {
6293                         .vb_bh = blk_bh,
6294                         .vb_access = ocfs2_journal_access_xb,
6295                 };
6296
6297                 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6298                                                          ref_ci, ref_root_bh,
6299                                                          dealloc);
6300         } else {
6301                 struct ocfs2_xattr_tree_value_refcount_para para = {
6302                         .ref_ci = ref_ci,
6303                         .ref_root_bh = ref_root_bh,
6304                         .dealloc = dealloc,
6305                 };
6306
6307                 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6308                                                 ocfs2_refcount_xattr_tree_rec,
6309                                                 &para);
6310         }
6311
6312         return ret;
6313 }
6314
6315 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6316                                      struct buffer_head *fe_bh,
6317                                      struct ocfs2_caching_info *ref_ci,
6318                                      struct buffer_head *ref_root_bh,
6319                                      struct ocfs2_cached_dealloc_ctxt *dealloc)
6320 {
6321         int ret = 0;
6322         struct ocfs2_inode_info *oi = OCFS2_I(inode);
6323         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6324         struct buffer_head *blk_bh = NULL;
6325
6326         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6327                 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6328                                                          ref_ci, ref_root_bh,
6329                                                          dealloc);
6330                 if (ret) {
6331                         mlog_errno(ret);
6332                         goto out;
6333                 }
6334         }
6335
6336         if (!di->i_xattr_loc)
6337                 goto out;
6338
6339         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6340                                      &blk_bh);
6341         if (ret < 0) {
6342                 mlog_errno(ret);
6343                 goto out;
6344         }
6345
6346         ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6347                                                 ref_root_bh, dealloc);
6348         if (ret)
6349                 mlog_errno(ret);
6350
6351         brelse(blk_bh);
6352 out:
6353
6354         return ret;
6355 }
6356
6357 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6358 /*
6359  * Store the information we need in xattr reflink.
6360  * old_bh and new_bh are inode bh for the old and new inode.
6361  */
6362 struct ocfs2_xattr_reflink {
6363         struct inode *old_inode;
6364         struct inode *new_inode;
6365         struct buffer_head *old_bh;
6366         struct buffer_head *new_bh;
6367         struct ocfs2_caching_info *ref_ci;
6368         struct buffer_head *ref_root_bh;
6369         struct ocfs2_cached_dealloc_ctxt *dealloc;
6370         should_xattr_reflinked *xattr_reflinked;
6371 };
6372
6373 /*
6374  * Given a xattr header and xe offset,
6375  * return the proper xv and the corresponding bh.
6376  * xattr in inode, block and xattr tree have different implementaions.
6377  */
6378 typedef int (get_xattr_value_root)(struct super_block *sb,
6379                                    struct buffer_head *bh,
6380                                    struct ocfs2_xattr_header *xh,
6381                                    int offset,
6382                                    struct ocfs2_xattr_value_root **xv,
6383                                    struct buffer_head **ret_bh,
6384                                    void *para);
6385
6386 /*
6387  * Calculate all the xattr value root metadata stored in this xattr header and
6388  * credits we need if we create them from the scratch.
6389  * We use get_xattr_value_root so that all types of xattr container can use it.
6390  */
6391 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6392                                              struct buffer_head *bh,
6393                                              struct ocfs2_xattr_header *xh,
6394                                              int *metas, int *credits,
6395                                              int *num_recs,
6396                                              get_xattr_value_root *func,
6397                                              void *para)
6398 {
6399         int i, ret = 0;
6400         struct ocfs2_xattr_value_root *xv;
6401         struct ocfs2_xattr_entry *xe;
6402
6403         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6404                 xe = &xh->xh_entries[i];
6405                 if (ocfs2_xattr_is_local(xe))
6406                         continue;
6407
6408                 ret = func(sb, bh, xh, i, &xv, NULL, para);
6409                 if (ret) {
6410                         mlog_errno(ret);
6411                         break;
6412                 }
6413
6414                 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6415                           le16_to_cpu(xv->xr_list.l_next_free_rec);
6416
6417                 *credits += ocfs2_calc_extend_credits(sb,
6418                                                 &def_xv.xv.xr_list,
6419                                                 le32_to_cpu(xv->xr_clusters));
6420
6421                 /*
6422                  * If the value is a tree with depth > 1, We don't go deep
6423                  * to the extent block, so just calculate a maximum record num.
6424                  */
6425                 if (!xv->xr_list.l_tree_depth)
6426                         *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6427                 else
6428                         *num_recs += ocfs2_clusters_for_bytes(sb,
6429                                                               XATTR_SIZE_MAX);
6430         }
6431
6432         return ret;
6433 }
6434
6435 /* Used by xattr inode and block to return the right xv and buffer_head. */
6436 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6437                                       struct buffer_head *bh,
6438                                       struct ocfs2_xattr_header *xh,
6439                                       int offset,
6440                                       struct ocfs2_xattr_value_root **xv,
6441                                       struct buffer_head **ret_bh,
6442                                       void *para)
6443 {
6444         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6445
6446         *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6447                 le16_to_cpu(xe->xe_name_offset) +
6448                 OCFS2_XATTR_SIZE(xe->xe_name_len));
6449
6450         if (ret_bh)
6451                 *ret_bh = bh;
6452
6453         return 0;
6454 }
6455
6456 /*
6457  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6458  * It is only used for inline xattr and xattr block.
6459  */
6460 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6461                                         struct ocfs2_xattr_header *xh,
6462                                         struct buffer_head *ref_root_bh,
6463                                         int *credits,
6464                                         struct ocfs2_alloc_context **meta_ac)
6465 {
6466         int ret, meta_add = 0, num_recs = 0;
6467         struct ocfs2_refcount_block *rb =
6468                         (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6469
6470         *credits = 0;
6471
6472         ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6473                                                 &meta_add, credits, &num_recs,
6474                                                 ocfs2_get_xattr_value_root,
6475                                                 NULL);
6476         if (ret) {
6477                 mlog_errno(ret);
6478                 goto out;
6479         }
6480
6481         /*
6482          * We need to add/modify num_recs in refcount tree, so just calculate
6483          * an approximate number we need for refcount tree change.
6484          * Sometimes we need to split the tree, and after split,  half recs
6485          * will be moved to the new block, and a new block can only provide
6486          * half number of recs. So we multiple new blocks by 2.
6487          */
6488         num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6489         meta_add += num_recs;
6490         *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6491         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6492                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6493                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6494         else
6495                 *credits += 1;
6496
6497         ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6498         if (ret)
6499                 mlog_errno(ret);
6500
6501 out:
6502         return ret;
6503 }
6504
6505 /*
6506  * Given a xattr header, reflink all the xattrs in this container.
6507  * It can be used for inode, block and bucket.
6508  *
6509  * NOTE:
6510  * Before we call this function, the caller has memcpy the xattr in
6511  * old_xh to the new_xh.
6512  *
6513  * If args.xattr_reflinked is set, call it to decide whether the xe should
6514  * be reflinked or not. If not, remove it from the new xattr header.
6515  */
6516 static int ocfs2_reflink_xattr_header(handle_t *handle,
6517                                       struct ocfs2_xattr_reflink *args,
6518                                       struct buffer_head *old_bh,
6519                                       struct ocfs2_xattr_header *xh,
6520                                       struct buffer_head *new_bh,
6521                                       struct ocfs2_xattr_header *new_xh,
6522                                       struct ocfs2_xattr_value_buf *vb,
6523                                       struct ocfs2_alloc_context *meta_ac,
6524                                       get_xattr_value_root *func,
6525                                       void *para)
6526 {
6527         int ret = 0, i, j;
6528         struct super_block *sb = args->old_inode->i_sb;
6529         struct buffer_head *value_bh;
6530         struct ocfs2_xattr_entry *xe, *last;
6531         struct ocfs2_xattr_value_root *xv, *new_xv;
6532         struct ocfs2_extent_tree data_et;
6533         u32 clusters, cpos, p_cluster, num_clusters;
6534         unsigned int ext_flags = 0;
6535
6536         mlog(0, "reflink xattr in container %llu, count = %u\n",
6537              (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6538
6539         last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6540         for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6541                 xe = &xh->xh_entries[i];
6542
6543                 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6544                         xe = &new_xh->xh_entries[j];
6545
6546                         le16_add_cpu(&new_xh->xh_count, -1);
6547                         if (new_xh->xh_count) {
6548                                 memmove(xe, xe + 1,
6549                                         (void *)last - (void *)xe);
6550                                 memset(last, 0,
6551                                        sizeof(struct ocfs2_xattr_entry));
6552                         }
6553
6554                         /*
6555                          * We don't want j to increase in the next round since
6556                          * it is already moved ahead.
6557                          */
6558                         j--;
6559                         continue;
6560                 }
6561
6562                 if (ocfs2_xattr_is_local(xe))
6563                         continue;
6564
6565                 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6566                 if (ret) {
6567                         mlog_errno(ret);
6568                         break;
6569                 }
6570
6571                 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6572                 if (ret) {
6573                         mlog_errno(ret);
6574                         break;
6575                 }
6576
6577                 /*
6578                  * For the xattr which has l_tree_depth = 0, all the extent
6579                  * recs have already be copied to the new xh with the
6580                  * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6581                  * increase the refount count int the refcount tree.
6582                  *
6583                  * For the xattr which has l_tree_depth > 0, we need
6584                  * to initialize it to the empty default value root,
6585                  * and then insert the extents one by one.
6586                  */
6587                 if (xv->xr_list.l_tree_depth) {
6588                         memcpy(new_xv, &def_xv, sizeof(def_xv));
6589                         vb->vb_xv = new_xv;
6590                         vb->vb_bh = value_bh;
6591                         ocfs2_init_xattr_value_extent_tree(&data_et,
6592                                         INODE_CACHE(args->new_inode), vb);
6593                 }
6594
6595                 clusters = le32_to_cpu(xv->xr_clusters);
6596                 cpos = 0;
6597                 while (cpos < clusters) {
6598                         ret = ocfs2_xattr_get_clusters(args->old_inode,
6599                                                        cpos,
6600                                                        &p_cluster,
6601                                                        &num_clusters,
6602                                                        &xv->xr_list,
6603                                                        &ext_flags);
6604                         if (ret) {
6605                                 mlog_errno(ret);
6606                                 goto out;
6607                         }
6608
6609                         BUG_ON(!p_cluster);
6610
6611                         if (xv->xr_list.l_tree_depth) {
6612                                 ret = ocfs2_insert_extent(handle,
6613                                                 &data_et, cpos,
6614                                                 ocfs2_clusters_to_blocks(
6615                                                         args->old_inode->i_sb,
6616                                                         p_cluster),
6617                                                 num_clusters, ext_flags,
6618                                                 meta_ac);
6619                                 if (ret) {
6620                                         mlog_errno(ret);
6621                                         goto out;
6622                                 }
6623                         }
6624
6625                         ret = ocfs2_increase_refcount(handle, args->ref_ci,
6626                                                       args->ref_root_bh,
6627                                                       p_cluster, num_clusters,
6628                                                       meta_ac, args->dealloc);
6629                         if (ret) {
6630                                 mlog_errno(ret);
6631                                 goto out;
6632                         }
6633
6634                         cpos += num_clusters;
6635                 }
6636         }
6637
6638 out:
6639         return ret;
6640 }
6641
6642 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6643 {
6644         int ret = 0, credits = 0;
6645         handle_t *handle;
6646         struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6647         struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6648         int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6649         int header_off = osb->sb->s_blocksize - inline_size;
6650         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6651                                         (args->old_bh->b_data + header_off);
6652         struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6653                                         (args->new_bh->b_data + header_off);
6654         struct ocfs2_alloc_context *meta_ac = NULL;
6655         struct ocfs2_inode_info *new_oi;
6656         struct ocfs2_dinode *new_di;
6657         struct ocfs2_xattr_value_buf vb = {
6658                 .vb_bh = args->new_bh,
6659                 .vb_access = ocfs2_journal_access_di,
6660         };
6661
6662         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6663                                                   &credits, &meta_ac);
6664         if (ret) {
6665                 mlog_errno(ret);
6666                 goto out;
6667         }
6668
6669         handle = ocfs2_start_trans(osb, credits);
6670         if (IS_ERR(handle)) {
6671                 ret = PTR_ERR(handle);
6672                 mlog_errno(ret);
6673                 goto out;
6674         }
6675
6676         ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6677                                       args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6678         if (ret) {
6679                 mlog_errno(ret);
6680                 goto out_commit;
6681         }
6682
6683         memcpy(args->new_bh->b_data + header_off,
6684                args->old_bh->b_data + header_off, inline_size);
6685
6686         new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6687         new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6688
6689         ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6690                                          args->new_bh, new_xh, &vb, meta_ac,
6691                                          ocfs2_get_xattr_value_root, NULL);
6692         if (ret) {
6693                 mlog_errno(ret);
6694                 goto out_commit;
6695         }
6696
6697         new_oi = OCFS2_I(args->new_inode);
6698         spin_lock(&new_oi->ip_lock);
6699         new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6700         new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6701         spin_unlock(&new_oi->ip_lock);
6702
6703         ocfs2_journal_dirty(handle, args->new_bh);
6704
6705 out_commit:
6706         ocfs2_commit_trans(osb, handle);
6707
6708 out:
6709         if (meta_ac)
6710                 ocfs2_free_alloc_context(meta_ac);
6711         return ret;
6712 }
6713
6714 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6715                                           struct buffer_head *fe_bh,
6716                                           struct buffer_head **ret_bh,
6717                                           int indexed)
6718 {
6719         int ret;
6720         handle_t *handle;
6721         struct ocfs2_alloc_context *meta_ac;
6722         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6723
6724         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6725         if (ret < 0) {
6726                 mlog_errno(ret);
6727                 return ret;
6728         }
6729
6730         handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6731         if (IS_ERR(handle)) {
6732                 ret = PTR_ERR(handle);
6733                 mlog_errno(ret);
6734                 goto out;
6735         }
6736
6737         mlog(0, "create new xattr block for inode %llu, index = %d\n",
6738              (unsigned long long)fe_bh->b_blocknr, indexed);
6739         ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6740                                        meta_ac, ret_bh, indexed);
6741         if (ret)
6742                 mlog_errno(ret);
6743
6744         ocfs2_commit_trans(osb, handle);
6745 out:
6746         ocfs2_free_alloc_context(meta_ac);
6747         return ret;
6748 }
6749
6750 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6751                                      struct buffer_head *blk_bh,
6752                                      struct buffer_head *new_blk_bh)
6753 {
6754         int ret = 0, credits = 0;
6755         handle_t *handle;
6756         struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6757         struct ocfs2_dinode *new_di;
6758         struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6759         int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6760         struct ocfs2_xattr_block *xb =
6761                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6762         struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6763         struct ocfs2_xattr_block *new_xb =
6764                         (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6765         struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6766         struct ocfs2_alloc_context *meta_ac;
6767         struct ocfs2_xattr_value_buf vb = {
6768                 .vb_bh = new_blk_bh,
6769                 .vb_access = ocfs2_journal_access_xb,
6770         };
6771
6772         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6773                                                   &credits, &meta_ac);
6774         if (ret) {
6775                 mlog_errno(ret);
6776                 return ret;
6777         }
6778
6779         /* One more credits in case we need to add xattr flags in new inode. */
6780         handle = ocfs2_start_trans(osb, credits + 1);
6781         if (IS_ERR(handle)) {
6782                 ret = PTR_ERR(handle);
6783                 mlog_errno(ret);
6784                 goto out;
6785         }
6786
6787         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6788                 ret = ocfs2_journal_access_di(handle,
6789                                               INODE_CACHE(args->new_inode),
6790                                               args->new_bh,
6791                                               OCFS2_JOURNAL_ACCESS_WRITE);
6792                 if (ret) {
6793                         mlog_errno(ret);
6794                         goto out_commit;
6795                 }
6796         }
6797
6798         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6799                                       new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6800         if (ret) {
6801                 mlog_errno(ret);
6802                 goto out_commit;
6803         }
6804
6805         memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6806                osb->sb->s_blocksize - header_off);
6807
6808         ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6809                                          new_blk_bh, new_xh, &vb, meta_ac,
6810                                          ocfs2_get_xattr_value_root, NULL);
6811         if (ret) {
6812                 mlog_errno(ret);
6813                 goto out_commit;
6814         }
6815
6816         ocfs2_journal_dirty(handle, new_blk_bh);
6817
6818         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6819                 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6820                 spin_lock(&new_oi->ip_lock);
6821                 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6822                 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6823                 spin_unlock(&new_oi->ip_lock);
6824
6825                 ocfs2_journal_dirty(handle, args->new_bh);
6826         }
6827
6828 out_commit:
6829         ocfs2_commit_trans(osb, handle);
6830
6831 out:
6832         ocfs2_free_alloc_context(meta_ac);
6833         return ret;
6834 }
6835
6836 struct ocfs2_reflink_xattr_tree_args {
6837         struct ocfs2_xattr_reflink *reflink;
6838         struct buffer_head *old_blk_bh;
6839         struct buffer_head *new_blk_bh;
6840         struct ocfs2_xattr_bucket *old_bucket;
6841         struct ocfs2_xattr_bucket *new_bucket;
6842 };
6843
6844 /*
6845  * NOTE:
6846  * We have to handle the case that both old bucket and new bucket
6847  * will call this function to get the right ret_bh.
6848  * So The caller must give us the right bh.
6849  */
6850 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6851                                         struct buffer_head *bh,
6852                                         struct ocfs2_xattr_header *xh,
6853                                         int offset,
6854                                         struct ocfs2_xattr_value_root **xv,
6855                                         struct buffer_head **ret_bh,
6856                                         void *para)
6857 {
6858         struct ocfs2_reflink_xattr_tree_args *args =
6859                         (struct ocfs2_reflink_xattr_tree_args *)para;
6860         struct ocfs2_xattr_bucket *bucket;
6861
6862         if (bh == args->old_bucket->bu_bhs[0])
6863                 bucket = args->old_bucket;
6864         else
6865                 bucket = args->new_bucket;
6866
6867         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6868                                                xv, ret_bh);
6869 }
6870
6871 struct ocfs2_value_tree_metas {
6872         int num_metas;
6873         int credits;
6874         int num_recs;
6875 };
6876
6877 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6878                                         struct buffer_head *bh,
6879                                         struct ocfs2_xattr_header *xh,
6880                                         int offset,
6881                                         struct ocfs2_xattr_value_root **xv,
6882                                         struct buffer_head **ret_bh,
6883                                         void *para)
6884 {
6885         struct ocfs2_xattr_bucket *bucket =
6886                                 (struct ocfs2_xattr_bucket *)para;
6887
6888         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6889                                                xv, ret_bh);
6890 }
6891
6892 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6893                                       struct ocfs2_xattr_bucket *bucket,
6894                                       void *para)
6895 {
6896         struct ocfs2_value_tree_metas *metas =
6897                         (struct ocfs2_value_tree_metas *)para;
6898         struct ocfs2_xattr_header *xh =
6899                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6900
6901         /* Add the credits for this bucket first. */
6902         metas->credits += bucket->bu_blocks;
6903         return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6904                                         xh, &metas->num_metas,
6905                                         &metas->credits, &metas->num_recs,
6906                                         ocfs2_value_tree_metas_in_bucket,
6907                                         bucket);
6908 }
6909
6910 /*
6911  * Given a xattr extent rec starting from blkno and having len clusters,
6912  * iterate all the buckets calculate how much metadata we need for reflinking
6913  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6914  */
6915 static int ocfs2_lock_reflink_xattr_rec_allocators(
6916                                 struct ocfs2_reflink_xattr_tree_args *args,
6917                                 struct ocfs2_extent_tree *xt_et,
6918                                 u64 blkno, u32 len, int *credits,
6919                                 struct ocfs2_alloc_context **meta_ac,
6920                                 struct ocfs2_alloc_context **data_ac)
6921 {
6922         int ret, num_free_extents;
6923         struct ocfs2_value_tree_metas metas;
6924         struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6925         struct ocfs2_refcount_block *rb;
6926
6927         memset(&metas, 0, sizeof(metas));
6928
6929         ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6930                                           ocfs2_calc_value_tree_metas, &metas);
6931         if (ret) {
6932                 mlog_errno(ret);
6933                 goto out;
6934         }
6935
6936         *credits = metas.credits;
6937
6938         /*
6939          * Calculate we need for refcount tree change.
6940          *
6941          * We need to add/modify num_recs in refcount tree, so just calculate
6942          * an approximate number we need for refcount tree change.
6943          * Sometimes we need to split the tree, and after split,  half recs
6944          * will be moved to the new block, and a new block can only provide
6945          * half number of recs. So we multiple new blocks by 2.
6946          * In the end, we have to add credits for modifying the already
6947          * existed refcount block.
6948          */
6949         rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6950         metas.num_recs =
6951                 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6952                  ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6953         metas.num_metas += metas.num_recs;
6954         *credits += metas.num_recs +
6955                     metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6956         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6957                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6958                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6959         else
6960                 *credits += 1;
6961
6962         /* count in the xattr tree change. */
6963         num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6964         if (num_free_extents < 0) {
6965                 ret = num_free_extents;
6966                 mlog_errno(ret);
6967                 goto out;
6968         }
6969
6970         if (num_free_extents < len)
6971                 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6972
6973         *credits += ocfs2_calc_extend_credits(osb->sb,
6974                                               xt_et->et_root_el, len);
6975
6976         if (metas.num_metas) {
6977                 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6978                                                         meta_ac);
6979                 if (ret) {
6980                         mlog_errno(ret);
6981                         goto out;
6982                 }
6983         }
6984
6985         if (len) {
6986                 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6987                 if (ret)
6988                         mlog_errno(ret);
6989         }
6990 out:
6991         if (ret) {
6992                 if (*meta_ac) {
6993                         ocfs2_free_alloc_context(*meta_ac);
6994                         meta_ac = NULL;
6995                 }
6996         }
6997
6998         return ret;
6999 }
7000
7001 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
7002                                 u64 blkno, u64 new_blkno, u32 clusters,
7003                                 struct ocfs2_alloc_context *meta_ac,
7004                                 struct ocfs2_alloc_context *data_ac,
7005                                 struct ocfs2_reflink_xattr_tree_args *args)
7006 {
7007         int i, j, ret = 0;
7008         struct super_block *sb = args->reflink->old_inode->i_sb;
7009         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
7010         u32 num_buckets = clusters * bpc;
7011         int bpb = args->old_bucket->bu_blocks;
7012         struct ocfs2_xattr_value_buf vb = {
7013                 .vb_access = ocfs2_journal_access,
7014         };
7015
7016         for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
7017                 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
7018                 if (ret) {
7019                         mlog_errno(ret);
7020                         break;
7021                 }
7022
7023                 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
7024                 if (ret) {
7025                         mlog_errno(ret);
7026                         break;
7027                 }
7028
7029                 /*
7030                  * The real bucket num in this series of blocks is stored
7031                  * in the 1st bucket.
7032                  */
7033                 if (i == 0)
7034                         num_buckets = le16_to_cpu(
7035                                 bucket_xh(args->old_bucket)->xh_num_buckets);
7036
7037                 ret = ocfs2_xattr_bucket_journal_access(handle,
7038                                                 args->new_bucket,
7039                                                 OCFS2_JOURNAL_ACCESS_CREATE);
7040                 if (ret) {
7041                         mlog_errno(ret);
7042                         break;
7043                 }
7044
7045                 for (j = 0; j < bpb; j++)
7046                         memcpy(bucket_block(args->new_bucket, j),
7047                                bucket_block(args->old_bucket, j),
7048                                sb->s_blocksize);
7049
7050                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
7051
7052                 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
7053                                         args->old_bucket->bu_bhs[0],
7054                                         bucket_xh(args->old_bucket),
7055                                         args->new_bucket->bu_bhs[0],
7056                                         bucket_xh(args->new_bucket),
7057                                         &vb, meta_ac,
7058                                         ocfs2_get_reflink_xattr_value_root,
7059                                         args);
7060                 if (ret) {
7061                         mlog_errno(ret);
7062                         break;
7063                 }
7064
7065                 /*
7066                  * Re-access and dirty the bucket to calculate metaecc.
7067                  * Because we may extend the transaction in reflink_xattr_header
7068                  * which will let the already accessed block gone.
7069                  */
7070                 ret = ocfs2_xattr_bucket_journal_access(handle,
7071                                                 args->new_bucket,
7072                                                 OCFS2_JOURNAL_ACCESS_WRITE);
7073                 if (ret) {
7074                         mlog_errno(ret);
7075                         break;
7076                 }
7077
7078                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
7079                 ocfs2_xattr_bucket_relse(args->old_bucket);
7080                 ocfs2_xattr_bucket_relse(args->new_bucket);
7081         }
7082
7083         ocfs2_xattr_bucket_relse(args->old_bucket);
7084         ocfs2_xattr_bucket_relse(args->new_bucket);
7085         return ret;
7086 }
7087 /*
7088  * Create the same xattr extent record in the new inode's xattr tree.
7089  */
7090 static int ocfs2_reflink_xattr_rec(struct inode *inode,
7091                                    struct buffer_head *root_bh,
7092                                    u64 blkno,
7093                                    u32 cpos,
7094                                    u32 len,
7095                                    void *para)
7096 {
7097         int ret, credits = 0;
7098         u32 p_cluster, num_clusters;
7099         u64 new_blkno;
7100         handle_t *handle;
7101         struct ocfs2_reflink_xattr_tree_args *args =
7102                         (struct ocfs2_reflink_xattr_tree_args *)para;
7103         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7104         struct ocfs2_alloc_context *meta_ac = NULL;
7105         struct ocfs2_alloc_context *data_ac = NULL;
7106         struct ocfs2_extent_tree et;
7107
7108         ocfs2_init_xattr_tree_extent_tree(&et,
7109                                           INODE_CACHE(args->reflink->new_inode),
7110                                           args->new_blk_bh);
7111
7112         ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7113                                                       len, &credits,
7114                                                       &meta_ac, &data_ac);
7115         if (ret) {
7116                 mlog_errno(ret);
7117                 goto out;
7118         }
7119
7120         handle = ocfs2_start_trans(osb, credits);
7121         if (IS_ERR(handle)) {
7122                 ret = PTR_ERR(handle);
7123                 mlog_errno(ret);
7124                 goto out;
7125         }
7126
7127         ret = ocfs2_claim_clusters(osb, handle, data_ac,
7128                                    len, &p_cluster, &num_clusters);
7129         if (ret) {
7130                 mlog_errno(ret);
7131                 goto out_commit;
7132         }
7133
7134         new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
7135
7136         mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
7137              (unsigned long long)blkno, (unsigned long long)new_blkno, len);
7138         ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
7139                                           meta_ac, data_ac, args);
7140         if (ret) {
7141                 mlog_errno(ret);
7142                 goto out_commit;
7143         }
7144
7145         mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
7146              (unsigned long long)new_blkno, len, cpos);
7147         ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
7148                                   len, 0, meta_ac);
7149         if (ret)
7150                 mlog_errno(ret);
7151
7152 out_commit:
7153         ocfs2_commit_trans(osb, handle);
7154
7155 out:
7156         if (meta_ac)
7157                 ocfs2_free_alloc_context(meta_ac);
7158         if (data_ac)
7159                 ocfs2_free_alloc_context(data_ac);
7160         return ret;
7161 }
7162
7163 /*
7164  * Create reflinked xattr buckets.
7165  * We will add bucket one by one, and refcount all the xattrs in the bucket
7166  * if they are stored outside.
7167  */
7168 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7169                                     struct buffer_head *blk_bh,
7170                                     struct buffer_head *new_blk_bh)
7171 {
7172         int ret;
7173         struct ocfs2_reflink_xattr_tree_args para;
7174
7175         memset(&para, 0, sizeof(para));
7176         para.reflink = args;
7177         para.old_blk_bh = blk_bh;
7178         para.new_blk_bh = new_blk_bh;
7179
7180         para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7181         if (!para.old_bucket) {
7182                 mlog_errno(-ENOMEM);
7183                 return -ENOMEM;
7184         }
7185
7186         para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7187         if (!para.new_bucket) {
7188                 ret = -ENOMEM;
7189                 mlog_errno(ret);
7190                 goto out;
7191         }
7192
7193         ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7194                                               ocfs2_reflink_xattr_rec,
7195                                               &para);
7196         if (ret)
7197                 mlog_errno(ret);
7198
7199 out:
7200         ocfs2_xattr_bucket_free(para.old_bucket);
7201         ocfs2_xattr_bucket_free(para.new_bucket);
7202         return ret;
7203 }
7204
7205 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7206                                         struct buffer_head *blk_bh)
7207 {
7208         int ret, indexed = 0;
7209         struct buffer_head *new_blk_bh = NULL;
7210         struct ocfs2_xattr_block *xb =
7211                         (struct ocfs2_xattr_block *)blk_bh->b_data;
7212
7213
7214         if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7215                 indexed = 1;
7216
7217         ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7218                                              &new_blk_bh, indexed);
7219         if (ret) {
7220                 mlog_errno(ret);
7221                 goto out;
7222         }
7223
7224         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
7225                 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7226         else
7227                 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7228         if (ret)
7229                 mlog_errno(ret);
7230
7231 out:
7232         brelse(new_blk_bh);
7233         return ret;
7234 }
7235
7236 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7237 {
7238         int type = ocfs2_xattr_get_type(xe);
7239
7240         return type != OCFS2_XATTR_INDEX_SECURITY &&
7241                type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7242                type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7243 }
7244
7245 int ocfs2_reflink_xattrs(struct inode *old_inode,
7246                          struct buffer_head *old_bh,
7247                          struct inode *new_inode,
7248                          struct buffer_head *new_bh,
7249                          bool preserve_security)
7250 {
7251         int ret;
7252         struct ocfs2_xattr_reflink args;
7253         struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7254         struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7255         struct buffer_head *blk_bh = NULL;
7256         struct ocfs2_cached_dealloc_ctxt dealloc;
7257         struct ocfs2_refcount_tree *ref_tree;
7258         struct buffer_head *ref_root_bh = NULL;
7259
7260         ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7261                                        le64_to_cpu(di->i_refcount_loc),
7262                                        1, &ref_tree, &ref_root_bh);
7263         if (ret) {
7264                 mlog_errno(ret);
7265                 goto out;
7266         }
7267
7268         ocfs2_init_dealloc_ctxt(&dealloc);
7269
7270         args.old_inode = old_inode;
7271         args.new_inode = new_inode;
7272         args.old_bh = old_bh;
7273         args.new_bh = new_bh;
7274         args.ref_ci = &ref_tree->rf_ci;
7275         args.ref_root_bh = ref_root_bh;
7276         args.dealloc = &dealloc;
7277         if (preserve_security)
7278                 args.xattr_reflinked = NULL;
7279         else
7280                 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7281
7282         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7283                 ret = ocfs2_reflink_xattr_inline(&args);
7284                 if (ret) {
7285                         mlog_errno(ret);
7286                         goto out_unlock;
7287                 }
7288         }
7289
7290         if (!di->i_xattr_loc)
7291                 goto out_unlock;
7292
7293         ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7294                                      &blk_bh);
7295         if (ret < 0) {
7296                 mlog_errno(ret);
7297                 goto out_unlock;
7298         }
7299
7300         ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7301         if (ret)
7302                 mlog_errno(ret);
7303
7304         brelse(blk_bh);
7305
7306 out_unlock:
7307         ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7308                                    ref_tree, 1);
7309         brelse(ref_root_bh);
7310
7311         if (ocfs2_dealloc_has_cluster(&dealloc)) {
7312                 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7313                 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7314         }
7315
7316 out:
7317         return ret;
7318 }
7319
7320 /*
7321  * Initialize security and acl for a already created inode.
7322  * Used for reflink a non-preserve-security file.
7323  *
7324  * It uses common api like ocfs2_xattr_set, so the caller
7325  * must not hold any lock expect i_mutex.
7326  */
7327 int ocfs2_init_security_and_acl(struct inode *dir,
7328                                 struct inode *inode)
7329 {
7330         int ret = 0;
7331         struct buffer_head *dir_bh = NULL;
7332         struct ocfs2_security_xattr_info si = {
7333                 .enable = 1,
7334         };
7335
7336         ret = ocfs2_init_security_get(inode, dir, &si);
7337         if (!ret) {
7338                 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7339                                       si.name, si.value, si.value_len,
7340                                       XATTR_CREATE);
7341                 if (ret) {
7342                         mlog_errno(ret);
7343                         goto leave;
7344                 }
7345         } else if (ret != -EOPNOTSUPP) {
7346                 mlog_errno(ret);
7347                 goto leave;
7348         }
7349
7350         ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7351         if (ret) {
7352                 mlog_errno(ret);
7353                 goto leave;
7354         }
7355
7356         ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7357         if (ret)
7358                 mlog_errno(ret);
7359
7360         ocfs2_inode_unlock(dir, 0);
7361         brelse(dir_bh);
7362 leave:
7363         return ret;
7364 }
7365 /*
7366  * 'security' attributes support
7367  */
7368 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7369                                         size_t list_size, const char *name,
7370                                         size_t name_len, int type)
7371 {
7372         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7373         const size_t total_len = prefix_len + name_len + 1;
7374
7375         if (list && total_len <= list_size) {
7376                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7377                 memcpy(list + prefix_len, name, name_len);
7378                 list[prefix_len + name_len] = '\0';
7379         }
7380         return total_len;
7381 }
7382
7383 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7384                                     void *buffer, size_t size, int type)
7385 {
7386         if (strcmp(name, "") == 0)
7387                 return -EINVAL;
7388         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7389                                name, buffer, size);
7390 }
7391
7392 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7393                 const void *value, size_t size, int flags, int type)
7394 {
7395         if (strcmp(name, "") == 0)
7396                 return -EINVAL;
7397
7398         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7399                                name, value, size, flags);
7400 }
7401
7402 int ocfs2_init_security_get(struct inode *inode,
7403                             struct inode *dir,
7404                             struct ocfs2_security_xattr_info *si)
7405 {
7406         /* check whether ocfs2 support feature xattr */
7407         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7408                 return -EOPNOTSUPP;
7409         return security_inode_init_security(inode, dir, &si->name, &si->value,
7410                                             &si->value_len);
7411 }
7412
7413 int ocfs2_init_security_set(handle_t *handle,
7414                             struct inode *inode,
7415                             struct buffer_head *di_bh,
7416                             struct ocfs2_security_xattr_info *si,
7417                             struct ocfs2_alloc_context *xattr_ac,
7418                             struct ocfs2_alloc_context *data_ac)
7419 {
7420         return ocfs2_xattr_set_handle(handle, inode, di_bh,
7421                                      OCFS2_XATTR_INDEX_SECURITY,
7422                                      si->name, si->value, si->value_len, 0,
7423                                      xattr_ac, data_ac);
7424 }
7425
7426 struct xattr_handler ocfs2_xattr_security_handler = {
7427         .prefix = XATTR_SECURITY_PREFIX,
7428         .list   = ocfs2_xattr_security_list,
7429         .get    = ocfs2_xattr_security_get,
7430         .set    = ocfs2_xattr_security_set,
7431 };
7432
7433 /*
7434  * 'trusted' attributes support
7435  */
7436 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7437                                        size_t list_size, const char *name,
7438                                        size_t name_len, int type)
7439 {
7440         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7441         const size_t total_len = prefix_len + name_len + 1;
7442
7443         if (list && total_len <= list_size) {
7444                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7445                 memcpy(list + prefix_len, name, name_len);
7446                 list[prefix_len + name_len] = '\0';
7447         }
7448         return total_len;
7449 }
7450
7451 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7452                 void *buffer, size_t size, int type)
7453 {
7454         if (strcmp(name, "") == 0)
7455                 return -EINVAL;
7456         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7457                                name, buffer, size);
7458 }
7459
7460 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7461                 const void *value, size_t size, int flags, int type)
7462 {
7463         if (strcmp(name, "") == 0)
7464                 return -EINVAL;
7465
7466         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7467                                name, value, size, flags);
7468 }
7469
7470 struct xattr_handler ocfs2_xattr_trusted_handler = {
7471         .prefix = XATTR_TRUSTED_PREFIX,
7472         .list   = ocfs2_xattr_trusted_list,
7473         .get    = ocfs2_xattr_trusted_get,
7474         .set    = ocfs2_xattr_trusted_set,
7475 };
7476
7477 /*
7478  * 'user' attributes support
7479  */
7480 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7481                                     size_t list_size, const char *name,
7482                                     size_t name_len, int type)
7483 {
7484         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7485         const size_t total_len = prefix_len + name_len + 1;
7486         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7487
7488         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7489                 return 0;
7490
7491         if (list && total_len <= list_size) {
7492                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7493                 memcpy(list + prefix_len, name, name_len);
7494                 list[prefix_len + name_len] = '\0';
7495         }
7496         return total_len;
7497 }
7498
7499 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7500                 void *buffer, size_t size, int type)
7501 {
7502         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7503
7504         if (strcmp(name, "") == 0)
7505                 return -EINVAL;
7506         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7507                 return -EOPNOTSUPP;
7508         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7509                                buffer, size);
7510 }
7511
7512 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7513                 const void *value, size_t size, int flags, int type)
7514 {
7515         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7516
7517         if (strcmp(name, "") == 0)
7518                 return -EINVAL;
7519         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7520                 return -EOPNOTSUPP;
7521
7522         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7523                                name, value, size, flags);
7524 }
7525
7526 struct xattr_handler ocfs2_xattr_user_handler = {
7527         .prefix = XATTR_USER_PREFIX,
7528         .list   = ocfs2_xattr_user_list,
7529         .get    = ocfs2_xattr_user_get,
7530         .set    = ocfs2_xattr_user_set,
7531 };