ocfs2: Provide ocfs2_xa_fill_value_buf() for external value processing
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60
61 struct ocfs2_xattr_def_value_root {
62         struct ocfs2_xattr_value_root   xv;
63         struct ocfs2_extent_rec         er;
64 };
65
66 struct ocfs2_xattr_bucket {
67         /* The inode these xattrs are associated with */
68         struct inode *bu_inode;
69
70         /* The actual buffers that make up the bucket */
71         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72
73         /* How many blocks make up one bucket for this filesystem */
74         int bu_blocks;
75 };
76
77 struct ocfs2_xattr_set_ctxt {
78         handle_t *handle;
79         struct ocfs2_alloc_context *meta_ac;
80         struct ocfs2_alloc_context *data_ac;
81         struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83
84 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP  4
87 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
88                                          - sizeof(struct ocfs2_xattr_header) \
89                                          - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
91                                          - sizeof(struct ocfs2_xattr_block) \
92                                          - sizeof(struct ocfs2_xattr_header) \
93                                          - OCFS2_XATTR_HEADER_GAP)
94
95 static struct ocfs2_xattr_def_value_root def_xv = {
96         .xv.xr_list.l_count = cpu_to_le16(1),
97 };
98
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100         &ocfs2_xattr_user_handler,
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103         &ocfs2_xattr_trusted_handler,
104         &ocfs2_xattr_security_handler,
105         NULL
106 };
107
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
115         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
116 };
117
118 struct ocfs2_xattr_info {
119         int             xi_name_index;
120         const char      *xi_name;
121         int             xi_name_len;
122         const void      *xi_value;
123         size_t          xi_value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144         /*
145          * Return a pointer to the appropriate buffer in loc->xl_storage
146          * at the given offset from loc->xl_header.
147          */
148         void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
149
150         /* Can we reuse the existing entry for the new value? */
151         int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
152                              struct ocfs2_xattr_info *xi);
153
154         /* How much space is needed for the new value? */
155         int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
156                                struct ocfs2_xattr_info *xi);
157
158         /*
159          * Return the offset of the first name+value pair.  This is
160          * the start of our downward-filling free space.
161          */
162         int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
163
164         /*
165          * Remove the name+value at this location.  Do whatever is
166          * appropriate with the remaining name+value pairs.
167          */
168         void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
169
170         /* Fill xl_entry with a new entry */
171         void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
172
173         /* Add name+value storage to an entry */
174         void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
175
176         /*
177          * Initialize the value buf's access and bh fields for this entry.
178          * ocfs2_xa_fill_value_buf() will handle the xv pointer.
179          */
180         void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
181                                    struct ocfs2_xattr_value_buf *vb);
182 };
183
184 /*
185  * Describes an xattr entry location.  This is a memory structure
186  * tracking the on-disk structure.
187  */
188 struct ocfs2_xa_loc {
189         /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
190         struct ocfs2_xattr_header *xl_header;
191
192         /* Bytes from xl_header to the end of the storage */
193         int xl_size;
194
195         /*
196          * The ocfs2_xattr_entry this location describes.  If this is
197          * NULL, this location describes the on-disk structure where it
198          * would have been.
199          */
200         struct ocfs2_xattr_entry *xl_entry;
201
202         /*
203          * Internal housekeeping
204          */
205
206         /* Buffer(s) containing this entry */
207         void *xl_storage;
208
209         /* Operations on the storage backing this location */
210         const struct ocfs2_xa_loc_operations *xl_ops;
211 };
212
213 /*
214  * Convenience functions to calculate how much space is needed for a
215  * given name+value pair
216  */
217 static int namevalue_size(int name_len, uint64_t value_len)
218 {
219         if (value_len > OCFS2_XATTR_INLINE_SIZE)
220                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
221         else
222                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
223 }
224
225 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
226 {
227         return namevalue_size(xi->xi_name_len, xi->xi_value_len);
228 }
229
230 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
231 {
232         u64 value_len = le64_to_cpu(xe->xe_value_size);
233
234         BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
235                ocfs2_xattr_is_local(xe));
236         return namevalue_size(xe->xe_name_len, value_len);
237 }
238
239
240 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
241                                              struct ocfs2_xattr_header *xh,
242                                              int index,
243                                              int *block_off,
244                                              int *new_offset);
245
246 static int ocfs2_xattr_block_find(struct inode *inode,
247                                   int name_index,
248                                   const char *name,
249                                   struct ocfs2_xattr_search *xs);
250 static int ocfs2_xattr_index_block_find(struct inode *inode,
251                                         struct buffer_head *root_bh,
252                                         int name_index,
253                                         const char *name,
254                                         struct ocfs2_xattr_search *xs);
255
256 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
257                                         struct buffer_head *blk_bh,
258                                         char *buffer,
259                                         size_t buffer_size);
260
261 static int ocfs2_xattr_create_index_block(struct inode *inode,
262                                           struct ocfs2_xattr_search *xs,
263                                           struct ocfs2_xattr_set_ctxt *ctxt);
264
265 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
266                                              struct ocfs2_xattr_info *xi,
267                                              struct ocfs2_xattr_search *xs,
268                                              struct ocfs2_xattr_set_ctxt *ctxt);
269
270 typedef int (xattr_tree_rec_func)(struct inode *inode,
271                                   struct buffer_head *root_bh,
272                                   u64 blkno, u32 cpos, u32 len, void *para);
273 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
274                                            struct buffer_head *root_bh,
275                                            xattr_tree_rec_func *rec_func,
276                                            void *para);
277 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
278                                         struct ocfs2_xattr_bucket *bucket,
279                                         void *para);
280 static int ocfs2_rm_xattr_cluster(struct inode *inode,
281                                   struct buffer_head *root_bh,
282                                   u64 blkno,
283                                   u32 cpos,
284                                   u32 len,
285                                   void *para);
286
287 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
288                                   u64 src_blk, u64 last_blk, u64 to_blk,
289                                   unsigned int start_bucket,
290                                   u32 *first_hash);
291 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
292                                         struct ocfs2_dinode *di,
293                                         struct ocfs2_xattr_info *xi,
294                                         struct ocfs2_xattr_search *xis,
295                                         struct ocfs2_xattr_search *xbs,
296                                         struct ocfs2_refcount_tree **ref_tree,
297                                         int *meta_need,
298                                         int *credits);
299 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
300                                            struct ocfs2_xattr_bucket *bucket,
301                                            int offset,
302                                            struct ocfs2_xattr_value_root **xv,
303                                            struct buffer_head **bh);
304
305 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
306 {
307         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
308 }
309
310 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
311 {
312         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
313 }
314
315 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
316 {
317         u16 len = sb->s_blocksize -
318                  offsetof(struct ocfs2_xattr_header, xh_entries);
319
320         return len / sizeof(struct ocfs2_xattr_entry);
321 }
322
323 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
324 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
325 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
326
327 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
328 {
329         struct ocfs2_xattr_bucket *bucket;
330         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
331
332         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
333
334         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
335         if (bucket) {
336                 bucket->bu_inode = inode;
337                 bucket->bu_blocks = blks;
338         }
339
340         return bucket;
341 }
342
343 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
344 {
345         int i;
346
347         for (i = 0; i < bucket->bu_blocks; i++) {
348                 brelse(bucket->bu_bhs[i]);
349                 bucket->bu_bhs[i] = NULL;
350         }
351 }
352
353 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
354 {
355         if (bucket) {
356                 ocfs2_xattr_bucket_relse(bucket);
357                 bucket->bu_inode = NULL;
358                 kfree(bucket);
359         }
360 }
361
362 /*
363  * A bucket that has never been written to disk doesn't need to be
364  * read.  We just need the buffer_heads.  Don't call this for
365  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
366  * them fully.
367  */
368 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
369                                    u64 xb_blkno)
370 {
371         int i, rc = 0;
372
373         for (i = 0; i < bucket->bu_blocks; i++) {
374                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
375                                               xb_blkno + i);
376                 if (!bucket->bu_bhs[i]) {
377                         rc = -EIO;
378                         mlog_errno(rc);
379                         break;
380                 }
381
382                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
383                                            bucket->bu_bhs[i]))
384                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
385                                                       bucket->bu_bhs[i]);
386         }
387
388         if (rc)
389                 ocfs2_xattr_bucket_relse(bucket);
390         return rc;
391 }
392
393 /* Read the xattr bucket at xb_blkno */
394 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
395                                    u64 xb_blkno)
396 {
397         int rc;
398
399         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
400                                bucket->bu_blocks, bucket->bu_bhs, 0,
401                                NULL);
402         if (!rc) {
403                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
404                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
405                                                  bucket->bu_bhs,
406                                                  bucket->bu_blocks,
407                                                  &bucket_xh(bucket)->xh_check);
408                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
409                 if (rc)
410                         mlog_errno(rc);
411         }
412
413         if (rc)
414                 ocfs2_xattr_bucket_relse(bucket);
415         return rc;
416 }
417
418 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
419                                              struct ocfs2_xattr_bucket *bucket,
420                                              int type)
421 {
422         int i, rc = 0;
423
424         for (i = 0; i < bucket->bu_blocks; i++) {
425                 rc = ocfs2_journal_access(handle,
426                                           INODE_CACHE(bucket->bu_inode),
427                                           bucket->bu_bhs[i], type);
428                 if (rc) {
429                         mlog_errno(rc);
430                         break;
431                 }
432         }
433
434         return rc;
435 }
436
437 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
438                                              struct ocfs2_xattr_bucket *bucket)
439 {
440         int i;
441
442         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
443         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
444                                    bucket->bu_bhs, bucket->bu_blocks,
445                                    &bucket_xh(bucket)->xh_check);
446         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
447
448         for (i = 0; i < bucket->bu_blocks; i++)
449                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
450 }
451
452 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
453                                          struct ocfs2_xattr_bucket *src)
454 {
455         int i;
456         int blocksize = src->bu_inode->i_sb->s_blocksize;
457
458         BUG_ON(dest->bu_blocks != src->bu_blocks);
459         BUG_ON(dest->bu_inode != src->bu_inode);
460
461         for (i = 0; i < src->bu_blocks; i++) {
462                 memcpy(bucket_block(dest, i), bucket_block(src, i),
463                        blocksize);
464         }
465 }
466
467 static int ocfs2_validate_xattr_block(struct super_block *sb,
468                                       struct buffer_head *bh)
469 {
470         int rc;
471         struct ocfs2_xattr_block *xb =
472                 (struct ocfs2_xattr_block *)bh->b_data;
473
474         mlog(0, "Validating xattr block %llu\n",
475              (unsigned long long)bh->b_blocknr);
476
477         BUG_ON(!buffer_uptodate(bh));
478
479         /*
480          * If the ecc fails, we return the error but otherwise
481          * leave the filesystem running.  We know any error is
482          * local to this block.
483          */
484         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
485         if (rc)
486                 return rc;
487
488         /*
489          * Errors after here are fatal
490          */
491
492         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
493                 ocfs2_error(sb,
494                             "Extended attribute block #%llu has bad "
495                             "signature %.*s",
496                             (unsigned long long)bh->b_blocknr, 7,
497                             xb->xb_signature);
498                 return -EINVAL;
499         }
500
501         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
502                 ocfs2_error(sb,
503                             "Extended attribute block #%llu has an "
504                             "invalid xb_blkno of %llu",
505                             (unsigned long long)bh->b_blocknr,
506                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
507                 return -EINVAL;
508         }
509
510         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
511                 ocfs2_error(sb,
512                             "Extended attribute block #%llu has an invalid "
513                             "xb_fs_generation of #%u",
514                             (unsigned long long)bh->b_blocknr,
515                             le32_to_cpu(xb->xb_fs_generation));
516                 return -EINVAL;
517         }
518
519         return 0;
520 }
521
522 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
523                                   struct buffer_head **bh)
524 {
525         int rc;
526         struct buffer_head *tmp = *bh;
527
528         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
529                               ocfs2_validate_xattr_block);
530
531         /* If ocfs2_read_block() got us a new bh, pass it up. */
532         if (!rc && !*bh)
533                 *bh = tmp;
534
535         return rc;
536 }
537
538 static inline const char *ocfs2_xattr_prefix(int name_index)
539 {
540         struct xattr_handler *handler = NULL;
541
542         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
543                 handler = ocfs2_xattr_handler_map[name_index];
544
545         return handler ? handler->prefix : NULL;
546 }
547
548 static u32 ocfs2_xattr_name_hash(struct inode *inode,
549                                  const char *name,
550                                  int name_len)
551 {
552         /* Get hash value of uuid from super block */
553         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
554         int i;
555
556         /* hash extended attribute name */
557         for (i = 0; i < name_len; i++) {
558                 hash = (hash << OCFS2_HASH_SHIFT) ^
559                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
560                        *name++;
561         }
562
563         return hash;
564 }
565
566 /*
567  * ocfs2_xattr_hash_entry()
568  *
569  * Compute the hash of an extended attribute.
570  */
571 static void ocfs2_xattr_hash_entry(struct inode *inode,
572                                    struct ocfs2_xattr_header *header,
573                                    struct ocfs2_xattr_entry *entry)
574 {
575         u32 hash = 0;
576         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
577
578         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
579         entry->xe_name_hash = cpu_to_le32(hash);
580
581         return;
582 }
583
584 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
585 {
586         return namevalue_size(name_len, value_len) +
587                 sizeof(struct ocfs2_xattr_entry);
588 }
589
590 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
591 {
592         return namevalue_size_xi(xi) +
593                 sizeof(struct ocfs2_xattr_entry);
594 }
595
596 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
597 {
598         return namevalue_size_xe(xe) +
599                 sizeof(struct ocfs2_xattr_entry);
600 }
601
602 int ocfs2_calc_security_init(struct inode *dir,
603                              struct ocfs2_security_xattr_info *si,
604                              int *want_clusters,
605                              int *xattr_credits,
606                              struct ocfs2_alloc_context **xattr_ac)
607 {
608         int ret = 0;
609         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
610         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
611                                                  si->value_len);
612
613         /*
614          * The max space of security xattr taken inline is
615          * 256(name) + 80(value) + 16(entry) = 352 bytes,
616          * So reserve one metadata block for it is ok.
617          */
618         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
619             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
620                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
621                 if (ret) {
622                         mlog_errno(ret);
623                         return ret;
624                 }
625                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
626         }
627
628         /* reserve clusters for xattr value which will be set in B tree*/
629         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
630                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
631                                                             si->value_len);
632
633                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
634                                                            new_clusters);
635                 *want_clusters += new_clusters;
636         }
637         return ret;
638 }
639
640 int ocfs2_calc_xattr_init(struct inode *dir,
641                           struct buffer_head *dir_bh,
642                           int mode,
643                           struct ocfs2_security_xattr_info *si,
644                           int *want_clusters,
645                           int *xattr_credits,
646                           int *want_meta)
647 {
648         int ret = 0;
649         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
650         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
651
652         if (si->enable)
653                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
654                                                      si->value_len);
655
656         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
657                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
658                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
659                                         "", NULL, 0);
660                 if (acl_len > 0) {
661                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
662                         if (S_ISDIR(mode))
663                                 a_size <<= 1;
664                 } else if (acl_len != 0 && acl_len != -ENODATA) {
665                         mlog_errno(ret);
666                         return ret;
667                 }
668         }
669
670         if (!(s_size + a_size))
671                 return ret;
672
673         /*
674          * The max space of security xattr taken inline is
675          * 256(name) + 80(value) + 16(entry) = 352 bytes,
676          * The max space of acl xattr taken inline is
677          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
678          * when blocksize = 512, may reserve one more cluser for
679          * xattr bucket, otherwise reserve one metadata block
680          * for them is ok.
681          * If this is a new directory with inline data,
682          * we choose to reserve the entire inline area for
683          * directory contents and force an external xattr block.
684          */
685         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
686             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
687             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
688                 *want_meta = *want_meta + 1;
689                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
690         }
691
692         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
693             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
694                 *want_clusters += 1;
695                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
696         }
697
698         /*
699          * reserve credits and clusters for xattrs which has large value
700          * and have to be set outside
701          */
702         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
703                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
704                                                         si->value_len);
705                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
706                                                            new_clusters);
707                 *want_clusters += new_clusters;
708         }
709         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
710             acl_len > OCFS2_XATTR_INLINE_SIZE) {
711                 /* for directory, it has DEFAULT and ACCESS two types of acls */
712                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
713                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
714                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
715                                                            new_clusters);
716                 *want_clusters += new_clusters;
717         }
718
719         return ret;
720 }
721
722 static int ocfs2_xattr_extend_allocation(struct inode *inode,
723                                          u32 clusters_to_add,
724                                          struct ocfs2_xattr_value_buf *vb,
725                                          struct ocfs2_xattr_set_ctxt *ctxt)
726 {
727         int status = 0;
728         handle_t *handle = ctxt->handle;
729         enum ocfs2_alloc_restarted why;
730         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
731         struct ocfs2_extent_tree et;
732
733         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
734
735         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
736
737         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
738                               OCFS2_JOURNAL_ACCESS_WRITE);
739         if (status < 0) {
740                 mlog_errno(status);
741                 goto leave;
742         }
743
744         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
745         status = ocfs2_add_clusters_in_btree(handle,
746                                              &et,
747                                              &logical_start,
748                                              clusters_to_add,
749                                              0,
750                                              ctxt->data_ac,
751                                              ctxt->meta_ac,
752                                              &why);
753         if (status < 0) {
754                 mlog_errno(status);
755                 goto leave;
756         }
757
758         status = ocfs2_journal_dirty(handle, vb->vb_bh);
759         if (status < 0) {
760                 mlog_errno(status);
761                 goto leave;
762         }
763
764         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
765
766         /*
767          * We should have already allocated enough space before the transaction,
768          * so no need to restart.
769          */
770         BUG_ON(why != RESTART_NONE || clusters_to_add);
771
772 leave:
773
774         return status;
775 }
776
777 static int __ocfs2_remove_xattr_range(struct inode *inode,
778                                       struct ocfs2_xattr_value_buf *vb,
779                                       u32 cpos, u32 phys_cpos, u32 len,
780                                       unsigned int ext_flags,
781                                       struct ocfs2_xattr_set_ctxt *ctxt)
782 {
783         int ret;
784         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
785         handle_t *handle = ctxt->handle;
786         struct ocfs2_extent_tree et;
787
788         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
789
790         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
791                             OCFS2_JOURNAL_ACCESS_WRITE);
792         if (ret) {
793                 mlog_errno(ret);
794                 goto out;
795         }
796
797         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
798                                   &ctxt->dealloc);
799         if (ret) {
800                 mlog_errno(ret);
801                 goto out;
802         }
803
804         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
805
806         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
807         if (ret) {
808                 mlog_errno(ret);
809                 goto out;
810         }
811
812         if (ext_flags & OCFS2_EXT_REFCOUNTED)
813                 ret = ocfs2_decrease_refcount(inode, handle,
814                                         ocfs2_blocks_to_clusters(inode->i_sb,
815                                                                  phys_blkno),
816                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
817         else
818                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
819                                                   phys_blkno, len);
820         if (ret)
821                 mlog_errno(ret);
822
823 out:
824         return ret;
825 }
826
827 static int ocfs2_xattr_shrink_size(struct inode *inode,
828                                    u32 old_clusters,
829                                    u32 new_clusters,
830                                    struct ocfs2_xattr_value_buf *vb,
831                                    struct ocfs2_xattr_set_ctxt *ctxt)
832 {
833         int ret = 0;
834         unsigned int ext_flags;
835         u32 trunc_len, cpos, phys_cpos, alloc_size;
836         u64 block;
837
838         if (old_clusters <= new_clusters)
839                 return 0;
840
841         cpos = new_clusters;
842         trunc_len = old_clusters - new_clusters;
843         while (trunc_len) {
844                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
845                                                &alloc_size,
846                                                &vb->vb_xv->xr_list, &ext_flags);
847                 if (ret) {
848                         mlog_errno(ret);
849                         goto out;
850                 }
851
852                 if (alloc_size > trunc_len)
853                         alloc_size = trunc_len;
854
855                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
856                                                  phys_cpos, alloc_size,
857                                                  ext_flags, ctxt);
858                 if (ret) {
859                         mlog_errno(ret);
860                         goto out;
861                 }
862
863                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
864                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
865                                                        block, alloc_size);
866                 cpos += alloc_size;
867                 trunc_len -= alloc_size;
868         }
869
870 out:
871         return ret;
872 }
873
874 static int ocfs2_xattr_value_truncate(struct inode *inode,
875                                       struct ocfs2_xattr_value_buf *vb,
876                                       int len,
877                                       struct ocfs2_xattr_set_ctxt *ctxt)
878 {
879         int ret;
880         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
881         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
882
883         if (new_clusters == old_clusters)
884                 return 0;
885
886         if (new_clusters > old_clusters)
887                 ret = ocfs2_xattr_extend_allocation(inode,
888                                                     new_clusters - old_clusters,
889                                                     vb, ctxt);
890         else
891                 ret = ocfs2_xattr_shrink_size(inode,
892                                               old_clusters, new_clusters,
893                                               vb, ctxt);
894
895         return ret;
896 }
897
898 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
899                                   size_t *result, const char *prefix,
900                                   const char *name, int name_len)
901 {
902         char *p = buffer + *result;
903         int prefix_len = strlen(prefix);
904         int total_len = prefix_len + name_len + 1;
905
906         *result += total_len;
907
908         /* we are just looking for how big our buffer needs to be */
909         if (!size)
910                 return 0;
911
912         if (*result > size)
913                 return -ERANGE;
914
915         memcpy(p, prefix, prefix_len);
916         memcpy(p + prefix_len, name, name_len);
917         p[prefix_len + name_len] = '\0';
918
919         return 0;
920 }
921
922 static int ocfs2_xattr_list_entries(struct inode *inode,
923                                     struct ocfs2_xattr_header *header,
924                                     char *buffer, size_t buffer_size)
925 {
926         size_t result = 0;
927         int i, type, ret;
928         const char *prefix, *name;
929
930         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
931                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
932                 type = ocfs2_xattr_get_type(entry);
933                 prefix = ocfs2_xattr_prefix(type);
934
935                 if (prefix) {
936                         name = (const char *)header +
937                                 le16_to_cpu(entry->xe_name_offset);
938
939                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
940                                                      &result, prefix, name,
941                                                      entry->xe_name_len);
942                         if (ret)
943                                 return ret;
944                 }
945         }
946
947         return result;
948 }
949
950 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
951                                          struct ocfs2_dinode *di)
952 {
953         struct ocfs2_xattr_header *xh;
954         int i;
955
956         xh = (struct ocfs2_xattr_header *)
957                  ((void *)di + inode->i_sb->s_blocksize -
958                  le16_to_cpu(di->i_xattr_inline_size));
959
960         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
961                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
962                         return 1;
963
964         return 0;
965 }
966
967 static int ocfs2_xattr_ibody_list(struct inode *inode,
968                                   struct ocfs2_dinode *di,
969                                   char *buffer,
970                                   size_t buffer_size)
971 {
972         struct ocfs2_xattr_header *header = NULL;
973         struct ocfs2_inode_info *oi = OCFS2_I(inode);
974         int ret = 0;
975
976         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
977                 return ret;
978
979         header = (struct ocfs2_xattr_header *)
980                  ((void *)di + inode->i_sb->s_blocksize -
981                  le16_to_cpu(di->i_xattr_inline_size));
982
983         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
984
985         return ret;
986 }
987
988 static int ocfs2_xattr_block_list(struct inode *inode,
989                                   struct ocfs2_dinode *di,
990                                   char *buffer,
991                                   size_t buffer_size)
992 {
993         struct buffer_head *blk_bh = NULL;
994         struct ocfs2_xattr_block *xb;
995         int ret = 0;
996
997         if (!di->i_xattr_loc)
998                 return ret;
999
1000         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
1001                                      &blk_bh);
1002         if (ret < 0) {
1003                 mlog_errno(ret);
1004                 return ret;
1005         }
1006
1007         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1008         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1009                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1010                 ret = ocfs2_xattr_list_entries(inode, header,
1011                                                buffer, buffer_size);
1012         } else
1013                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1014                                                    buffer, buffer_size);
1015
1016         brelse(blk_bh);
1017
1018         return ret;
1019 }
1020
1021 ssize_t ocfs2_listxattr(struct dentry *dentry,
1022                         char *buffer,
1023                         size_t size)
1024 {
1025         int ret = 0, i_ret = 0, b_ret = 0;
1026         struct buffer_head *di_bh = NULL;
1027         struct ocfs2_dinode *di = NULL;
1028         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1029
1030         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1031                 return -EOPNOTSUPP;
1032
1033         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1034                 return ret;
1035
1036         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1037         if (ret < 0) {
1038                 mlog_errno(ret);
1039                 return ret;
1040         }
1041
1042         di = (struct ocfs2_dinode *)di_bh->b_data;
1043
1044         down_read(&oi->ip_xattr_sem);
1045         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1046         if (i_ret < 0)
1047                 b_ret = 0;
1048         else {
1049                 if (buffer) {
1050                         buffer += i_ret;
1051                         size -= i_ret;
1052                 }
1053                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1054                                                buffer, size);
1055                 if (b_ret < 0)
1056                         i_ret = 0;
1057         }
1058         up_read(&oi->ip_xattr_sem);
1059         ocfs2_inode_unlock(dentry->d_inode, 0);
1060
1061         brelse(di_bh);
1062
1063         return i_ret + b_ret;
1064 }
1065
1066 static int ocfs2_xattr_find_entry(int name_index,
1067                                   const char *name,
1068                                   struct ocfs2_xattr_search *xs)
1069 {
1070         struct ocfs2_xattr_entry *entry;
1071         size_t name_len;
1072         int i, cmp = 1;
1073
1074         if (name == NULL)
1075                 return -EINVAL;
1076
1077         name_len = strlen(name);
1078         entry = xs->here;
1079         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1080                 cmp = name_index - ocfs2_xattr_get_type(entry);
1081                 if (!cmp)
1082                         cmp = name_len - entry->xe_name_len;
1083                 if (!cmp)
1084                         cmp = memcmp(name, (xs->base +
1085                                      le16_to_cpu(entry->xe_name_offset)),
1086                                      name_len);
1087                 if (cmp == 0)
1088                         break;
1089                 entry += 1;
1090         }
1091         xs->here = entry;
1092
1093         return cmp ? -ENODATA : 0;
1094 }
1095
1096 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1097                                          struct ocfs2_xattr_value_root *xv,
1098                                          void *buffer,
1099                                          size_t len)
1100 {
1101         u32 cpos, p_cluster, num_clusters, bpc, clusters;
1102         u64 blkno;
1103         int i, ret = 0;
1104         size_t cplen, blocksize;
1105         struct buffer_head *bh = NULL;
1106         struct ocfs2_extent_list *el;
1107
1108         el = &xv->xr_list;
1109         clusters = le32_to_cpu(xv->xr_clusters);
1110         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1111         blocksize = inode->i_sb->s_blocksize;
1112
1113         cpos = 0;
1114         while (cpos < clusters) {
1115                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1116                                                &num_clusters, el, NULL);
1117                 if (ret) {
1118                         mlog_errno(ret);
1119                         goto out;
1120                 }
1121
1122                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1123                 /* Copy ocfs2_xattr_value */
1124                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1125                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1126                                                &bh, NULL);
1127                         if (ret) {
1128                                 mlog_errno(ret);
1129                                 goto out;
1130                         }
1131
1132                         cplen = len >= blocksize ? blocksize : len;
1133                         memcpy(buffer, bh->b_data, cplen);
1134                         len -= cplen;
1135                         buffer += cplen;
1136
1137                         brelse(bh);
1138                         bh = NULL;
1139                         if (len == 0)
1140                                 break;
1141                 }
1142                 cpos += num_clusters;
1143         }
1144 out:
1145         return ret;
1146 }
1147
1148 static int ocfs2_xattr_ibody_get(struct inode *inode,
1149                                  int name_index,
1150                                  const char *name,
1151                                  void *buffer,
1152                                  size_t buffer_size,
1153                                  struct ocfs2_xattr_search *xs)
1154 {
1155         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1156         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1157         struct ocfs2_xattr_value_root *xv;
1158         size_t size;
1159         int ret = 0;
1160
1161         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1162                 return -ENODATA;
1163
1164         xs->end = (void *)di + inode->i_sb->s_blocksize;
1165         xs->header = (struct ocfs2_xattr_header *)
1166                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1167         xs->base = (void *)xs->header;
1168         xs->here = xs->header->xh_entries;
1169
1170         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1171         if (ret)
1172                 return ret;
1173         size = le64_to_cpu(xs->here->xe_value_size);
1174         if (buffer) {
1175                 if (size > buffer_size)
1176                         return -ERANGE;
1177                 if (ocfs2_xattr_is_local(xs->here)) {
1178                         memcpy(buffer, (void *)xs->base +
1179                                le16_to_cpu(xs->here->xe_name_offset) +
1180                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1181                 } else {
1182                         xv = (struct ocfs2_xattr_value_root *)
1183                                 (xs->base + le16_to_cpu(
1184                                  xs->here->xe_name_offset) +
1185                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1186                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1187                                                             buffer, size);
1188                         if (ret < 0) {
1189                                 mlog_errno(ret);
1190                                 return ret;
1191                         }
1192                 }
1193         }
1194
1195         return size;
1196 }
1197
1198 static int ocfs2_xattr_block_get(struct inode *inode,
1199                                  int name_index,
1200                                  const char *name,
1201                                  void *buffer,
1202                                  size_t buffer_size,
1203                                  struct ocfs2_xattr_search *xs)
1204 {
1205         struct ocfs2_xattr_block *xb;
1206         struct ocfs2_xattr_value_root *xv;
1207         size_t size;
1208         int ret = -ENODATA, name_offset, name_len, i;
1209         int uninitialized_var(block_off);
1210
1211         xs->bucket = ocfs2_xattr_bucket_new(inode);
1212         if (!xs->bucket) {
1213                 ret = -ENOMEM;
1214                 mlog_errno(ret);
1215                 goto cleanup;
1216         }
1217
1218         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1219         if (ret) {
1220                 mlog_errno(ret);
1221                 goto cleanup;
1222         }
1223
1224         if (xs->not_found) {
1225                 ret = -ENODATA;
1226                 goto cleanup;
1227         }
1228
1229         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1230         size = le64_to_cpu(xs->here->xe_value_size);
1231         if (buffer) {
1232                 ret = -ERANGE;
1233                 if (size > buffer_size)
1234                         goto cleanup;
1235
1236                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1237                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1238                 i = xs->here - xs->header->xh_entries;
1239
1240                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1241                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1242                                                                 bucket_xh(xs->bucket),
1243                                                                 i,
1244                                                                 &block_off,
1245                                                                 &name_offset);
1246                         xs->base = bucket_block(xs->bucket, block_off);
1247                 }
1248                 if (ocfs2_xattr_is_local(xs->here)) {
1249                         memcpy(buffer, (void *)xs->base +
1250                                name_offset + name_len, size);
1251                 } else {
1252                         xv = (struct ocfs2_xattr_value_root *)
1253                                 (xs->base + name_offset + name_len);
1254                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1255                                                             buffer, size);
1256                         if (ret < 0) {
1257                                 mlog_errno(ret);
1258                                 goto cleanup;
1259                         }
1260                 }
1261         }
1262         ret = size;
1263 cleanup:
1264         ocfs2_xattr_bucket_free(xs->bucket);
1265
1266         brelse(xs->xattr_bh);
1267         xs->xattr_bh = NULL;
1268         return ret;
1269 }
1270
1271 int ocfs2_xattr_get_nolock(struct inode *inode,
1272                            struct buffer_head *di_bh,
1273                            int name_index,
1274                            const char *name,
1275                            void *buffer,
1276                            size_t buffer_size)
1277 {
1278         int ret;
1279         struct ocfs2_dinode *di = NULL;
1280         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1281         struct ocfs2_xattr_search xis = {
1282                 .not_found = -ENODATA,
1283         };
1284         struct ocfs2_xattr_search xbs = {
1285                 .not_found = -ENODATA,
1286         };
1287
1288         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1289                 return -EOPNOTSUPP;
1290
1291         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1292                 ret = -ENODATA;
1293
1294         xis.inode_bh = xbs.inode_bh = di_bh;
1295         di = (struct ocfs2_dinode *)di_bh->b_data;
1296
1297         down_read(&oi->ip_xattr_sem);
1298         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1299                                     buffer_size, &xis);
1300         if (ret == -ENODATA && di->i_xattr_loc)
1301                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1302                                             buffer_size, &xbs);
1303         up_read(&oi->ip_xattr_sem);
1304
1305         return ret;
1306 }
1307
1308 /* ocfs2_xattr_get()
1309  *
1310  * Copy an extended attribute into the buffer provided.
1311  * Buffer is NULL to compute the size of buffer required.
1312  */
1313 static int ocfs2_xattr_get(struct inode *inode,
1314                            int name_index,
1315                            const char *name,
1316                            void *buffer,
1317                            size_t buffer_size)
1318 {
1319         int ret;
1320         struct buffer_head *di_bh = NULL;
1321
1322         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1323         if (ret < 0) {
1324                 mlog_errno(ret);
1325                 return ret;
1326         }
1327         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1328                                      name, buffer, buffer_size);
1329
1330         ocfs2_inode_unlock(inode, 0);
1331
1332         brelse(di_bh);
1333
1334         return ret;
1335 }
1336
1337 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1338                                            handle_t *handle,
1339                                            struct ocfs2_xattr_value_buf *vb,
1340                                            const void *value,
1341                                            int value_len)
1342 {
1343         int ret = 0, i, cp_len;
1344         u16 blocksize = inode->i_sb->s_blocksize;
1345         u32 p_cluster, num_clusters;
1346         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1347         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1348         u64 blkno;
1349         struct buffer_head *bh = NULL;
1350         unsigned int ext_flags;
1351         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1352
1353         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1354
1355         while (cpos < clusters) {
1356                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1357                                                &num_clusters, &xv->xr_list,
1358                                                &ext_flags);
1359                 if (ret) {
1360                         mlog_errno(ret);
1361                         goto out;
1362                 }
1363
1364                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1365
1366                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1367
1368                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1369                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1370                                                &bh, NULL);
1371                         if (ret) {
1372                                 mlog_errno(ret);
1373                                 goto out;
1374                         }
1375
1376                         ret = ocfs2_journal_access(handle,
1377                                                    INODE_CACHE(inode),
1378                                                    bh,
1379                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1380                         if (ret < 0) {
1381                                 mlog_errno(ret);
1382                                 goto out;
1383                         }
1384
1385                         cp_len = value_len > blocksize ? blocksize : value_len;
1386                         memcpy(bh->b_data, value, cp_len);
1387                         value_len -= cp_len;
1388                         value += cp_len;
1389                         if (cp_len < blocksize)
1390                                 memset(bh->b_data + cp_len, 0,
1391                                        blocksize - cp_len);
1392
1393                         ret = ocfs2_journal_dirty(handle, bh);
1394                         if (ret < 0) {
1395                                 mlog_errno(ret);
1396                                 goto out;
1397                         }
1398                         brelse(bh);
1399                         bh = NULL;
1400
1401                         /*
1402                          * XXX: do we need to empty all the following
1403                          * blocks in this cluster?
1404                          */
1405                         if (!value_len)
1406                                 break;
1407                 }
1408                 cpos += num_clusters;
1409         }
1410 out:
1411         brelse(bh);
1412
1413         return ret;
1414 }
1415
1416 static int ocfs2_xattr_cleanup(struct inode *inode,
1417                                handle_t *handle,
1418                                struct ocfs2_xattr_info *xi,
1419                                struct ocfs2_xattr_search *xs,
1420                                struct ocfs2_xattr_value_buf *vb,
1421                                size_t offs)
1422 {
1423         int ret = 0;
1424         void *val = xs->base + offs;
1425         size_t size = namevalue_size_xi(xi);
1426
1427         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1428                             OCFS2_JOURNAL_ACCESS_WRITE);
1429         if (ret) {
1430                 mlog_errno(ret);
1431                 goto out;
1432         }
1433         /* Decrease xattr count */
1434         le16_add_cpu(&xs->header->xh_count, -1);
1435         /* Remove the xattr entry and tree root which has already be set*/
1436         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1437         memset(val, 0, size);
1438
1439         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1440         if (ret < 0)
1441                 mlog_errno(ret);
1442 out:
1443         return ret;
1444 }
1445
1446 static int ocfs2_xattr_update_entry(struct inode *inode,
1447                                     handle_t *handle,
1448                                     struct ocfs2_xattr_info *xi,
1449                                     struct ocfs2_xattr_search *xs,
1450                                     struct ocfs2_xattr_value_buf *vb,
1451                                     size_t offs)
1452 {
1453         int ret;
1454
1455         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1456                             OCFS2_JOURNAL_ACCESS_WRITE);
1457         if (ret) {
1458                 mlog_errno(ret);
1459                 goto out;
1460         }
1461
1462         xs->here->xe_name_offset = cpu_to_le16(offs);
1463         xs->here->xe_value_size = cpu_to_le64(xi->xi_value_len);
1464         if (xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE)
1465                 ocfs2_xattr_set_local(xs->here, 1);
1466         else
1467                 ocfs2_xattr_set_local(xs->here, 0);
1468         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1469
1470         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1471         if (ret < 0)
1472                 mlog_errno(ret);
1473 out:
1474         return ret;
1475 }
1476
1477 /*
1478  * ocfs2_xattr_set_value_outside()
1479  *
1480  * Set large size value in B tree.
1481  */
1482 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1483                                          struct ocfs2_xattr_info *xi,
1484                                          struct ocfs2_xattr_search *xs,
1485                                          struct ocfs2_xattr_set_ctxt *ctxt,
1486                                          struct ocfs2_xattr_value_buf *vb,
1487                                          size_t offs)
1488 {
1489         void *val = xs->base + offs;
1490         struct ocfs2_xattr_value_root *xv = NULL;
1491         size_t size = namevalue_size_xi(xi);
1492         int ret = 0;
1493
1494         memset(val, 0, size);
1495         memcpy(val, xi->xi_name, xi->xi_name_len);
1496         xv = (struct ocfs2_xattr_value_root *)
1497                 (val + OCFS2_XATTR_SIZE(xi->xi_name_len));
1498         xv->xr_clusters = 0;
1499         xv->xr_last_eb_blk = 0;
1500         xv->xr_list.l_tree_depth = 0;
1501         xv->xr_list.l_count = cpu_to_le16(1);
1502         xv->xr_list.l_next_free_rec = 0;
1503         vb->vb_xv = xv;
1504
1505         ret = ocfs2_xattr_value_truncate(inode, vb, xi->xi_value_len, ctxt);
1506         if (ret < 0) {
1507                 mlog_errno(ret);
1508                 return ret;
1509         }
1510         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1511         if (ret < 0) {
1512                 mlog_errno(ret);
1513                 return ret;
1514         }
1515         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1516                                               xi->xi_value, xi->xi_value_len);
1517         if (ret < 0)
1518                 mlog_errno(ret);
1519
1520         return ret;
1521 }
1522
1523 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1524                                        int num_entries)
1525 {
1526         int free_space;
1527
1528         if (!needed_space)
1529                 return 0;
1530
1531         free_space = free_start -
1532                 sizeof(struct ocfs2_xattr_header) -
1533                 (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1534                 OCFS2_XATTR_HEADER_GAP;
1535         if (free_space < 0)
1536                 return -EIO;
1537         if (free_space < needed_space)
1538                 return -ENOSPC;
1539
1540         return 0;
1541 }
1542
1543 /* Give a pointer into the storage for the given offset */
1544 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1545 {
1546         BUG_ON(offset >= loc->xl_size);
1547         return loc->xl_ops->xlo_offset_pointer(loc, offset);
1548 }
1549
1550 /*
1551  * Wipe the name+value pair and allow the storage to reclaim it.  This
1552  * must be followed by either removal of the entry or a call to
1553  * ocfs2_xa_add_namevalue().
1554  */
1555 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1556 {
1557         loc->xl_ops->xlo_wipe_namevalue(loc);
1558 }
1559
1560 /*
1561  * Find lowest offset to a name+value pair.  This is the start of our
1562  * downward-growing free space.
1563  */
1564 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1565 {
1566         return loc->xl_ops->xlo_get_free_start(loc);
1567 }
1568
1569 /* Can we reuse loc->xl_entry for xi? */
1570 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1571                                     struct ocfs2_xattr_info *xi)
1572 {
1573         return loc->xl_ops->xlo_can_reuse(loc, xi);
1574 }
1575
1576 /* How much free space is needed to set the new value */
1577 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1578                                 struct ocfs2_xattr_info *xi)
1579 {
1580         return loc->xl_ops->xlo_check_space(loc, xi);
1581 }
1582
1583 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1584 {
1585         loc->xl_ops->xlo_add_entry(loc, name_hash);
1586         loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1587         /*
1588          * We can't leave the new entry's xe_name_offset at zero or
1589          * add_namevalue() will go nuts.  We set it to the size of our
1590          * storage so that it can never be less than any other entry.
1591          */
1592         loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1593 }
1594
1595 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1596                                    struct ocfs2_xattr_info *xi)
1597 {
1598         int size = namevalue_size_xi(xi);
1599         int nameval_offset;
1600         char *nameval_buf;
1601
1602         loc->xl_ops->xlo_add_namevalue(loc, size);
1603         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1604         loc->xl_entry->xe_name_len = xi->xi_name_len;
1605         ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1606         ocfs2_xattr_set_local(loc->xl_entry,
1607                               xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1608
1609         nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1610         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1611         memset(nameval_buf, 0, size);
1612         memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1613 }
1614
1615 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1616                                     struct ocfs2_xattr_value_buf *vb)
1617 {
1618         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1619         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1620
1621         /* Value bufs are for value trees */
1622         BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1623                (name_size + OCFS2_XATTR_ROOT_SIZE));
1624
1625         loc->xl_ops->xlo_fill_value_buf(loc, vb);
1626         vb->vb_xv =
1627                 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1628                                                         nameval_offset +
1629                                                         name_size);
1630 }
1631
1632 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1633                                            int offset)
1634 {
1635         return (char *)loc->xl_header + offset;
1636 }
1637
1638 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1639                                     struct ocfs2_xattr_info *xi)
1640 {
1641         /*
1642          * Block storage is strict.  If the sizes aren't exact, we will
1643          * remove the old one and reinsert the new.
1644          */
1645         return namevalue_size_xe(loc->xl_entry) ==
1646                 namevalue_size_xi(xi);
1647 }
1648
1649 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1650 {
1651         struct ocfs2_xattr_header *xh = loc->xl_header;
1652         int i, count = le16_to_cpu(xh->xh_count);
1653         int offset, free_start = loc->xl_size;
1654
1655         for (i = 0; i < count; i++) {
1656                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1657                 if (offset < free_start)
1658                         free_start = offset;
1659         }
1660
1661         return free_start;
1662 }
1663
1664 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1665                                       struct ocfs2_xattr_info *xi)
1666 {
1667         int count = le16_to_cpu(loc->xl_header->xh_count);
1668         int free_start = ocfs2_xa_get_free_start(loc);
1669         int needed_space = ocfs2_xi_entry_usage(xi);
1670
1671         /*
1672          * Block storage will reclaim the original entry before inserting
1673          * the new value, so we only need the difference.  If the new
1674          * entry is smaller than the old one, we don't need anything.
1675          */
1676         if (loc->xl_entry) {
1677                 /* Don't need space if we're reusing! */
1678                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1679                         needed_space = 0;
1680                 else
1681                         needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1682         }
1683         if (needed_space < 0)
1684                 needed_space = 0;
1685         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1686 }
1687
1688 /*
1689  * Block storage for xattrs keeps the name+value pairs compacted.  When
1690  * we remove one, we have to shift any that preceded it towards the end.
1691  */
1692 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1693 {
1694         int i, offset;
1695         int namevalue_offset, first_namevalue_offset, namevalue_size;
1696         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1697         struct ocfs2_xattr_header *xh = loc->xl_header;
1698         int count = le16_to_cpu(xh->xh_count);
1699
1700         namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1701         namevalue_size = namevalue_size_xe(entry);
1702         first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1703
1704         /* Shift the name+value pairs */
1705         memmove((char *)xh + first_namevalue_offset + namevalue_size,
1706                 (char *)xh + first_namevalue_offset,
1707                 namevalue_offset - first_namevalue_offset);
1708         memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1709
1710         /* Now tell xh->xh_entries about it */
1711         for (i = 0; i < count; i++) {
1712                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1713                 if (offset < namevalue_offset)
1714                         le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1715                                      namevalue_size);
1716         }
1717
1718         /*
1719          * Note that we don't update xh_free_start or xh_name_value_len
1720          * because they're not used in block-stored xattrs.
1721          */
1722 }
1723
1724 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1725 {
1726         int count = le16_to_cpu(loc->xl_header->xh_count);
1727         loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1728         le16_add_cpu(&loc->xl_header->xh_count, 1);
1729         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1730 }
1731
1732 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1733 {
1734         int free_start = ocfs2_xa_get_free_start(loc);
1735
1736         loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1737 }
1738
1739 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1740                                           struct ocfs2_xattr_value_buf *vb)
1741 {
1742         struct buffer_head *bh = loc->xl_storage;
1743
1744         if (loc->xl_size == (bh->b_size -
1745                              offsetof(struct ocfs2_xattr_block,
1746                                       xb_attrs.xb_header)))
1747                 vb->vb_access = ocfs2_journal_access_xb;
1748         else
1749                 vb->vb_access = ocfs2_journal_access_di;
1750         vb->vb_bh = bh;
1751 }
1752
1753 /*
1754  * Operations for xattrs stored in blocks.  This includes inline inode
1755  * storage and unindexed ocfs2_xattr_blocks.
1756  */
1757 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1758         .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1759         .xlo_check_space        = ocfs2_xa_block_check_space,
1760         .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1761         .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1762         .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1763         .xlo_add_entry          = ocfs2_xa_block_add_entry,
1764         .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1765         .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1766 };
1767
1768 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1769                                             int offset)
1770 {
1771         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1772         int block, block_offset;
1773
1774         /* The header is at the front of the bucket */
1775         block = offset >> bucket->bu_inode->i_sb->s_blocksize_bits;
1776         block_offset = offset % bucket->bu_inode->i_sb->s_blocksize;
1777
1778         return bucket_block(bucket, block) + block_offset;
1779 }
1780
1781 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1782                                      struct ocfs2_xattr_info *xi)
1783 {
1784         return namevalue_size_xe(loc->xl_entry) >=
1785                 namevalue_size_xi(xi);
1786 }
1787
1788 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1789 {
1790         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1791         return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1792 }
1793
1794 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1795                                          int free_start, int size)
1796 {
1797         /*
1798          * We need to make sure that the name+value pair fits within
1799          * one block.
1800          */
1801         if (((free_start - size) >> sb->s_blocksize_bits) !=
1802             ((free_start - 1) >> sb->s_blocksize_bits))
1803                 free_start -= free_start % sb->s_blocksize;
1804
1805         return free_start;
1806 }
1807
1808 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1809                                        struct ocfs2_xattr_info *xi)
1810 {
1811         int rc;
1812         int count = le16_to_cpu(loc->xl_header->xh_count);
1813         int free_start = ocfs2_xa_get_free_start(loc);
1814         int needed_space = ocfs2_xi_entry_usage(xi);
1815         int size = namevalue_size_xi(xi);
1816         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1817         struct super_block *sb = bucket->bu_inode->i_sb;
1818
1819         /*
1820          * Bucket storage does not reclaim name+value pairs it cannot
1821          * reuse.  They live as holes until the bucket fills, and then
1822          * the bucket is defragmented.  However, the bucket can reclaim
1823          * the ocfs2_xattr_entry.
1824          */
1825         if (loc->xl_entry) {
1826                 /* Don't need space if we're reusing! */
1827                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1828                         needed_space = 0;
1829                 else
1830                         needed_space -= sizeof(struct ocfs2_xattr_entry);
1831         }
1832         BUG_ON(needed_space < 0);
1833
1834         if (free_start < size) {
1835                 if (needed_space)
1836                         return -ENOSPC;
1837         } else {
1838                 /*
1839                  * First we check if it would fit in the first place.
1840                  * Below, we align the free start to a block.  This may
1841                  * slide us below the minimum gap.  By checking unaligned
1842                  * first, we avoid that error.
1843                  */
1844                 rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1845                                                  count);
1846                 if (rc)
1847                         return rc;
1848                 free_start = ocfs2_bucket_align_free_start(sb, free_start,
1849                                                            size);
1850         }
1851         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1852 }
1853
1854 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1855 {
1856         le16_add_cpu(&loc->xl_header->xh_name_value_len,
1857                      -namevalue_size_xe(loc->xl_entry));
1858 }
1859
1860 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1861 {
1862         struct ocfs2_xattr_header *xh = loc->xl_header;
1863         int count = le16_to_cpu(xh->xh_count);
1864         int low = 0, high = count - 1, tmp;
1865         struct ocfs2_xattr_entry *tmp_xe;
1866
1867         /*
1868          * We keep buckets sorted by name_hash, so we need to find
1869          * our insert place.
1870          */
1871         while (low <= high && count) {
1872                 tmp = (low + high) / 2;
1873                 tmp_xe = &xh->xh_entries[tmp];
1874
1875                 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1876                         low = tmp + 1;
1877                 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1878                         high = tmp - 1;
1879                 else {
1880                         low = tmp;
1881                         break;
1882                 }
1883         }
1884
1885         if (low != count)
1886                 memmove(&xh->xh_entries[low + 1],
1887                         &xh->xh_entries[low],
1888                         ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1889
1890         le16_add_cpu(&xh->xh_count, 1);
1891         loc->xl_entry = &xh->xh_entries[low];
1892         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1893 }
1894
1895 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1896 {
1897         int free_start = ocfs2_xa_get_free_start(loc);
1898         struct ocfs2_xattr_header *xh = loc->xl_header;
1899         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1900         struct super_block *sb = bucket->bu_inode->i_sb;
1901         int nameval_offset;
1902
1903         free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1904         nameval_offset = free_start - size;
1905         loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1906         xh->xh_free_start = cpu_to_le16(nameval_offset);
1907         le16_add_cpu(&xh->xh_name_value_len, size);
1908
1909 }
1910
1911 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1912                                            struct ocfs2_xattr_value_buf *vb)
1913 {
1914         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1915         struct super_block *sb = bucket->bu_inode->i_sb;
1916         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1917         int size = namevalue_size_xe(loc->xl_entry);
1918         int block_offset = nameval_offset >> sb->s_blocksize_bits;
1919
1920         /* Values are not allowed to straddle block boundaries */
1921         BUG_ON(block_offset !=
1922                ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1923         /* We expect the bucket to be filled in */
1924         BUG_ON(!bucket->bu_bhs[block_offset]);
1925
1926         vb->vb_access = ocfs2_journal_access;
1927         vb->vb_bh = bucket->bu_bhs[block_offset];
1928 }
1929
1930 /* Operations for xattrs stored in buckets. */
1931 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1932         .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1933         .xlo_check_space        = ocfs2_xa_bucket_check_space,
1934         .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1935         .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1936         .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1937         .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1938         .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1939         .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1940 };
1941
1942 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1943 {
1944         int index, count;
1945         struct ocfs2_xattr_header *xh = loc->xl_header;
1946         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1947
1948         ocfs2_xa_wipe_namevalue(loc);
1949         loc->xl_entry = NULL;
1950
1951         le16_add_cpu(&xh->xh_count, -1);
1952         count = le16_to_cpu(xh->xh_count);
1953
1954         /*
1955          * Only zero out the entry if there are more remaining.  This is
1956          * important for an empty bucket, as it keeps track of the
1957          * bucket's hash value.  It doesn't hurt empty block storage.
1958          */
1959         if (count) {
1960                 index = ((char *)entry - (char *)&xh->xh_entries) /
1961                         sizeof(struct ocfs2_xattr_entry);
1962                 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1963                         (count - index) * sizeof(struct ocfs2_xattr_entry));
1964                 memset(&xh->xh_entries[count], 0,
1965                        sizeof(struct ocfs2_xattr_entry));
1966         }
1967 }
1968
1969 /*
1970  * Prepares loc->xl_entry to receive the new xattr.  This includes
1971  * properly setting up the name+value pair region.  If loc->xl_entry
1972  * already exists, it will take care of modifying it appropriately.
1973  * This also includes deleting entries, but don't call this to remove
1974  * a non-existant entry.  That's just a bug.
1975  *
1976  * Note that this modifies the data.  You did journal_access already,
1977  * right?
1978  */
1979 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
1980                                   struct ocfs2_xattr_info *xi,
1981                                   u32 name_hash)
1982 {
1983         int rc = 0;
1984         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
1985         char *nameval_buf;
1986
1987         if (!xi->xi_value) {
1988                 ocfs2_xa_remove_entry(loc);
1989                 goto out;
1990         }
1991
1992         rc = ocfs2_xa_check_space(loc, xi);
1993         if (rc)
1994                 goto out;
1995
1996         if (loc->xl_entry) {
1997                 if (ocfs2_xa_can_reuse_entry(loc, xi)) {
1998                         nameval_buf = ocfs2_xa_offset_pointer(loc,
1999                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
2000                         memset(nameval_buf + name_size, 0,
2001                                namevalue_size_xe(loc->xl_entry) - name_size);
2002                         loc->xl_entry->xe_value_size =
2003                                 cpu_to_le64(xi->xi_value_len);
2004                         goto out;
2005                 }
2006
2007                 ocfs2_xa_wipe_namevalue(loc);
2008         } else
2009                 ocfs2_xa_add_entry(loc, name_hash);
2010
2011         /*
2012          * If we get here, we have a blank entry.  Fill it.  We grow our
2013          * name+value pair back from the end.
2014          */
2015         ocfs2_xa_add_namevalue(loc, xi);
2016
2017 out:
2018         return rc;
2019 }
2020
2021 /*
2022  * Store the value portion of the name+value pair.  This is either an
2023  * inline value or the tree root of an external value.
2024  */
2025 static void ocfs2_xa_store_inline_value(struct ocfs2_xa_loc *loc,
2026                                         struct ocfs2_xattr_info *xi)
2027 {
2028         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2029         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2030         int inline_value_size = namevalue_size_xi(xi) - name_size;
2031         const void *value = xi->xi_value;
2032         char *nameval_buf;
2033
2034         if (!xi->xi_value)
2035                 return;
2036
2037         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2038                 value = &def_xv;
2039                 inline_value_size = OCFS2_XATTR_ROOT_SIZE;
2040         }
2041         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2042         memcpy(nameval_buf + name_size, value, inline_value_size);
2043 }
2044
2045 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2046                                      struct inode *inode,
2047                                      struct buffer_head *bh,
2048                                      struct ocfs2_xattr_entry *entry)
2049 {
2050         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2051
2052         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2053         loc->xl_storage = bh;
2054         loc->xl_entry = entry;
2055
2056         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2057                 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2058         else {
2059                 BUG_ON(entry);
2060                 loc->xl_size = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2061         }
2062         loc->xl_header =
2063                 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2064                                               loc->xl_size);
2065 }
2066
2067 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2068                                           struct buffer_head *bh,
2069                                           struct ocfs2_xattr_entry *entry)
2070 {
2071         struct ocfs2_xattr_block *xb =
2072                 (struct ocfs2_xattr_block *)bh->b_data;
2073
2074         BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2075
2076         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2077         loc->xl_storage = bh;
2078         loc->xl_header = &(xb->xb_attrs.xb_header);
2079         loc->xl_entry = entry;
2080         loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2081                                              xb_attrs.xb_header);
2082 }
2083
2084 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2085                                            struct ocfs2_xattr_bucket *bucket,
2086                                            struct ocfs2_xattr_entry *entry)
2087 {
2088         loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2089         loc->xl_storage = bucket;
2090         loc->xl_header = bucket_xh(bucket);
2091         loc->xl_entry = entry;
2092         loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2093 }
2094
2095
2096 /*
2097  * ocfs2_xattr_set_entry()
2098  *
2099  * Set extended attribute entry into inode or block.
2100  *
2101  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
2102  * We first insert tree root(ocfs2_xattr_value_root) like a normal value,
2103  * then set value in B tree with set_value_outside().
2104  */
2105 static int ocfs2_xattr_set_entry(struct inode *inode,
2106                                  struct ocfs2_xattr_info *xi,
2107                                  struct ocfs2_xattr_search *xs,
2108                                  struct ocfs2_xattr_set_ctxt *ctxt,
2109                                  int flag)
2110 {
2111         struct ocfs2_xattr_entry *last;
2112         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2113         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2114         size_t min_offs = xs->end - xs->base;
2115         size_t size_l = 0;
2116         handle_t *handle = ctxt->handle;
2117         int free, i, ret;
2118         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->xi_name,
2119                                               xi->xi_name_len);
2120         struct ocfs2_xa_loc loc;
2121         struct ocfs2_xattr_value_buf vb = {
2122                 .vb_bh = xs->xattr_bh,
2123                 .vb_access = ocfs2_journal_access_di,
2124         };
2125
2126         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
2127                 BUG_ON(xs->xattr_bh == xs->inode_bh);
2128                 vb.vb_access = ocfs2_journal_access_xb;
2129         } else
2130                 BUG_ON(xs->xattr_bh != xs->inode_bh);
2131
2132         /* Compute min_offs, last and free space. */
2133         last = xs->header->xh_entries;
2134
2135         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
2136                 size_t offs = le16_to_cpu(last->xe_name_offset);
2137                 if (offs < min_offs)
2138                         min_offs = offs;
2139                 last += 1;
2140         }
2141
2142         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2143         if (free < 0)
2144                 return -EIO;
2145
2146         if (!xs->not_found)
2147                 free += ocfs2_xe_entry_usage(xs->here);
2148
2149         /* Check free space in inode or block */
2150         if (xi->xi_value && (free < ocfs2_xi_entry_usage(xi))) {
2151                 ret = -ENOSPC;
2152                 goto out;
2153         }
2154
2155         if (!xs->not_found) {
2156                 /* For existing extended attribute */
2157                 size_t size = namevalue_size_xe(xs->here);
2158                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
2159                 void *val = xs->base + offs;
2160
2161                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
2162                         /* Replace existing local xattr with tree root */
2163                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
2164                                                             ctxt, &vb, offs);
2165                         if (ret < 0)
2166                                 mlog_errno(ret);
2167                         goto out;
2168                 } else if (!ocfs2_xattr_is_local(xs->here)) {
2169                         /* For existing xattr which has value outside */
2170                         vb.vb_xv = (struct ocfs2_xattr_value_root *)
2171                                 (val + OCFS2_XATTR_SIZE(xi->xi_name_len));
2172
2173                         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2174                                 /*
2175                                  * If new value need set outside also,
2176                                  * first truncate old value to new value,
2177                                  * then set new value with set_value_outside().
2178                                  */
2179                                 ret = ocfs2_xattr_value_truncate(inode,
2180                                                         &vb,
2181                                                         xi->xi_value_len,
2182                                                         ctxt);
2183                                 if (ret < 0) {
2184                                         mlog_errno(ret);
2185                                         goto out;
2186                                 }
2187
2188                                 ret = ocfs2_xattr_update_entry(inode,
2189                                                                handle,
2190                                                                xi,
2191                                                                xs,
2192                                                                &vb,
2193                                                                offs);
2194                                 if (ret < 0) {
2195                                         mlog_errno(ret);
2196                                         goto out;
2197                                 }
2198
2199                                 ret = __ocfs2_xattr_set_value_outside(inode,
2200                                                         handle,
2201                                                         &vb,
2202                                                         xi->xi_value,
2203                                                         xi->xi_value_len);
2204                                 if (ret < 0)
2205                                         mlog_errno(ret);
2206                                 goto out;
2207                         } else {
2208                                 /*
2209                                  * If new value need set in local,
2210                                  * just trucate old value to zero.
2211                                  */
2212                                  ret = ocfs2_xattr_value_truncate(inode,
2213                                                                   &vb,
2214                                                                   0,
2215                                                                   ctxt);
2216                                 if (ret < 0)
2217                                         mlog_errno(ret);
2218                         }
2219                 }
2220         }
2221
2222         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
2223                                       OCFS2_JOURNAL_ACCESS_WRITE);
2224         if (ret) {
2225                 mlog_errno(ret);
2226                 goto out;
2227         }
2228
2229         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
2230                 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
2231                                    OCFS2_JOURNAL_ACCESS_WRITE);
2232                 if (ret) {
2233                         mlog_errno(ret);
2234                         goto out;
2235                 }
2236         }
2237
2238         if (xs->xattr_bh == xs->inode_bh)
2239                 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2240                                          xs->not_found ? NULL : xs->here);
2241         else
2242                 ocfs2_init_xattr_block_xa_loc(&loc, xs->xattr_bh,
2243                                               xs->not_found ? NULL : xs->here);
2244
2245         /*
2246          * Prepare our entry and insert the inline value.  This will
2247          * be a value tree root for values that are larger than
2248          * OCFS2_XATTR_INLINE_SIZE.
2249          */
2250         ret = ocfs2_xa_prepare_entry(&loc, xi, name_hash);
2251         if (ret) {
2252                 if (ret != -ENOSPC)
2253                         mlog_errno(ret);
2254                 goto out;
2255         }
2256         /* XXX For now, until we make ocfs2_xa_prepare_entry() primary */
2257         BUG_ON(ret == -ENOSPC);
2258         ocfs2_xa_store_inline_value(&loc, xi);
2259         xs->here = loc.xl_entry;
2260
2261         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
2262                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
2263                 if (ret < 0) {
2264                         mlog_errno(ret);
2265                         goto out;
2266                 }
2267         }
2268
2269         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
2270             (flag & OCFS2_INLINE_XATTR_FL)) {
2271                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2272                 unsigned int xattrsize = osb->s_xattr_inline_size;
2273
2274                 /*
2275                  * Adjust extent record count or inline data size
2276                  * to reserve space for extended attribute.
2277                  */
2278                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2279                         struct ocfs2_inline_data *idata = &di->id2.i_data;
2280                         le16_add_cpu(&idata->id_count, -xattrsize);
2281                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2282                         struct ocfs2_extent_list *el = &di->id2.i_list;
2283                         le16_add_cpu(&el->l_count, -(xattrsize /
2284                                         sizeof(struct ocfs2_extent_rec)));
2285                 }
2286                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2287         }
2288         /* Update xattr flag */
2289         spin_lock(&oi->ip_lock);
2290         oi->ip_dyn_features |= flag;
2291         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2292         spin_unlock(&oi->ip_lock);
2293
2294         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
2295         if (ret < 0)
2296                 mlog_errno(ret);
2297
2298         if (!ret && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2299                 /*
2300                  * Set value outside in B tree.
2301                  * This is the second step for value size > INLINE_SIZE.
2302                  */
2303                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
2304                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
2305                                                     &vb, offs);
2306                 if (ret < 0) {
2307                         int ret2;
2308
2309                         mlog_errno(ret);
2310                         /*
2311                          * If set value outside failed, we have to clean
2312                          * the junk tree root we have already set in local.
2313                          */
2314                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
2315                                                    xi, xs, &vb, offs);
2316                         if (ret2 < 0)
2317                                 mlog_errno(ret2);
2318                 }
2319         }
2320 out:
2321         return ret;
2322 }
2323
2324 /*
2325  * In xattr remove, if it is stored outside and refcounted, we may have
2326  * the chance to split the refcount tree. So need the allocators.
2327  */
2328 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2329                                         struct ocfs2_xattr_value_root *xv,
2330                                         struct ocfs2_caching_info *ref_ci,
2331                                         struct buffer_head *ref_root_bh,
2332                                         struct ocfs2_alloc_context **meta_ac,
2333                                         int *ref_credits)
2334 {
2335         int ret, meta_add = 0;
2336         u32 p_cluster, num_clusters;
2337         unsigned int ext_flags;
2338
2339         *ref_credits = 0;
2340         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2341                                        &num_clusters,
2342                                        &xv->xr_list,
2343                                        &ext_flags);
2344         if (ret) {
2345                 mlog_errno(ret);
2346                 goto out;
2347         }
2348
2349         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2350                 goto out;
2351
2352         ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2353                                                  ref_root_bh, xv,
2354                                                  &meta_add, ref_credits);
2355         if (ret) {
2356                 mlog_errno(ret);
2357                 goto out;
2358         }
2359
2360         ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2361                                                 meta_add, meta_ac);
2362         if (ret)
2363                 mlog_errno(ret);
2364
2365 out:
2366         return ret;
2367 }
2368
2369 static int ocfs2_remove_value_outside(struct inode*inode,
2370                                       struct ocfs2_xattr_value_buf *vb,
2371                                       struct ocfs2_xattr_header *header,
2372                                       struct ocfs2_caching_info *ref_ci,
2373                                       struct buffer_head *ref_root_bh)
2374 {
2375         int ret = 0, i, ref_credits;
2376         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2377         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2378         void *val;
2379
2380         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2381
2382         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2383                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2384
2385                 if (ocfs2_xattr_is_local(entry))
2386                         continue;
2387
2388                 val = (void *)header +
2389                         le16_to_cpu(entry->xe_name_offset);
2390                 vb->vb_xv = (struct ocfs2_xattr_value_root *)
2391                         (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2392
2393                 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2394                                                          ref_ci, ref_root_bh,
2395                                                          &ctxt.meta_ac,
2396                                                          &ref_credits);
2397
2398                 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2399                                         ocfs2_remove_extent_credits(osb->sb));
2400                 if (IS_ERR(ctxt.handle)) {
2401                         ret = PTR_ERR(ctxt.handle);
2402                         mlog_errno(ret);
2403                         break;
2404                 }
2405
2406                 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2407                 if (ret < 0) {
2408                         mlog_errno(ret);
2409                         break;
2410                 }
2411
2412                 ocfs2_commit_trans(osb, ctxt.handle);
2413                 if (ctxt.meta_ac) {
2414                         ocfs2_free_alloc_context(ctxt.meta_ac);
2415                         ctxt.meta_ac = NULL;
2416                 }
2417         }
2418
2419         if (ctxt.meta_ac)
2420                 ocfs2_free_alloc_context(ctxt.meta_ac);
2421         ocfs2_schedule_truncate_log_flush(osb, 1);
2422         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2423         return ret;
2424 }
2425
2426 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2427                                     struct buffer_head *di_bh,
2428                                     struct ocfs2_caching_info *ref_ci,
2429                                     struct buffer_head *ref_root_bh)
2430 {
2431
2432         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2433         struct ocfs2_xattr_header *header;
2434         int ret;
2435         struct ocfs2_xattr_value_buf vb = {
2436                 .vb_bh = di_bh,
2437                 .vb_access = ocfs2_journal_access_di,
2438         };
2439
2440         header = (struct ocfs2_xattr_header *)
2441                  ((void *)di + inode->i_sb->s_blocksize -
2442                  le16_to_cpu(di->i_xattr_inline_size));
2443
2444         ret = ocfs2_remove_value_outside(inode, &vb, header,
2445                                          ref_ci, ref_root_bh);
2446
2447         return ret;
2448 }
2449
2450 struct ocfs2_rm_xattr_bucket_para {
2451         struct ocfs2_caching_info *ref_ci;
2452         struct buffer_head *ref_root_bh;
2453 };
2454
2455 static int ocfs2_xattr_block_remove(struct inode *inode,
2456                                     struct buffer_head *blk_bh,
2457                                     struct ocfs2_caching_info *ref_ci,
2458                                     struct buffer_head *ref_root_bh)
2459 {
2460         struct ocfs2_xattr_block *xb;
2461         int ret = 0;
2462         struct ocfs2_xattr_value_buf vb = {
2463                 .vb_bh = blk_bh,
2464                 .vb_access = ocfs2_journal_access_xb,
2465         };
2466         struct ocfs2_rm_xattr_bucket_para args = {
2467                 .ref_ci = ref_ci,
2468                 .ref_root_bh = ref_root_bh,
2469         };
2470
2471         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2472         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2473                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2474                 ret = ocfs2_remove_value_outside(inode, &vb, header,
2475                                                  ref_ci, ref_root_bh);
2476         } else
2477                 ret = ocfs2_iterate_xattr_index_block(inode,
2478                                                 blk_bh,
2479                                                 ocfs2_rm_xattr_cluster,
2480                                                 &args);
2481
2482         return ret;
2483 }
2484
2485 static int ocfs2_xattr_free_block(struct inode *inode,
2486                                   u64 block,
2487                                   struct ocfs2_caching_info *ref_ci,
2488                                   struct buffer_head *ref_root_bh)
2489 {
2490         struct inode *xb_alloc_inode;
2491         struct buffer_head *xb_alloc_bh = NULL;
2492         struct buffer_head *blk_bh = NULL;
2493         struct ocfs2_xattr_block *xb;
2494         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2495         handle_t *handle;
2496         int ret = 0;
2497         u64 blk, bg_blkno;
2498         u16 bit;
2499
2500         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2501         if (ret < 0) {
2502                 mlog_errno(ret);
2503                 goto out;
2504         }
2505
2506         ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2507         if (ret < 0) {
2508                 mlog_errno(ret);
2509                 goto out;
2510         }
2511
2512         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2513         blk = le64_to_cpu(xb->xb_blkno);
2514         bit = le16_to_cpu(xb->xb_suballoc_bit);
2515         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2516
2517         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2518                                 EXTENT_ALLOC_SYSTEM_INODE,
2519                                 le16_to_cpu(xb->xb_suballoc_slot));
2520         if (!xb_alloc_inode) {
2521                 ret = -ENOMEM;
2522                 mlog_errno(ret);
2523                 goto out;
2524         }
2525         mutex_lock(&xb_alloc_inode->i_mutex);
2526
2527         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2528         if (ret < 0) {
2529                 mlog_errno(ret);
2530                 goto out_mutex;
2531         }
2532
2533         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2534         if (IS_ERR(handle)) {
2535                 ret = PTR_ERR(handle);
2536                 mlog_errno(ret);
2537                 goto out_unlock;
2538         }
2539
2540         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2541                                        bit, bg_blkno, 1);
2542         if (ret < 0)
2543                 mlog_errno(ret);
2544
2545         ocfs2_commit_trans(osb, handle);
2546 out_unlock:
2547         ocfs2_inode_unlock(xb_alloc_inode, 1);
2548         brelse(xb_alloc_bh);
2549 out_mutex:
2550         mutex_unlock(&xb_alloc_inode->i_mutex);
2551         iput(xb_alloc_inode);
2552 out:
2553         brelse(blk_bh);
2554         return ret;
2555 }
2556
2557 /*
2558  * ocfs2_xattr_remove()
2559  *
2560  * Free extended attribute resources associated with this inode.
2561  */
2562 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2563 {
2564         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2565         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2566         struct ocfs2_refcount_tree *ref_tree = NULL;
2567         struct buffer_head *ref_root_bh = NULL;
2568         struct ocfs2_caching_info *ref_ci = NULL;
2569         handle_t *handle;
2570         int ret;
2571
2572         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2573                 return 0;
2574
2575         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2576                 return 0;
2577
2578         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2579                 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2580                                                le64_to_cpu(di->i_refcount_loc),
2581                                                1, &ref_tree, &ref_root_bh);
2582                 if (ret) {
2583                         mlog_errno(ret);
2584                         goto out;
2585                 }
2586                 ref_ci = &ref_tree->rf_ci;
2587
2588         }
2589
2590         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2591                 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2592                                                ref_ci, ref_root_bh);
2593                 if (ret < 0) {
2594                         mlog_errno(ret);
2595                         goto out;
2596                 }
2597         }
2598
2599         if (di->i_xattr_loc) {
2600                 ret = ocfs2_xattr_free_block(inode,
2601                                              le64_to_cpu(di->i_xattr_loc),
2602                                              ref_ci, ref_root_bh);
2603                 if (ret < 0) {
2604                         mlog_errno(ret);
2605                         goto out;
2606                 }
2607         }
2608
2609         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2610                                    OCFS2_INODE_UPDATE_CREDITS);
2611         if (IS_ERR(handle)) {
2612                 ret = PTR_ERR(handle);
2613                 mlog_errno(ret);
2614                 goto out;
2615         }
2616         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2617                                       OCFS2_JOURNAL_ACCESS_WRITE);
2618         if (ret) {
2619                 mlog_errno(ret);
2620                 goto out_commit;
2621         }
2622
2623         di->i_xattr_loc = 0;
2624
2625         spin_lock(&oi->ip_lock);
2626         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2627         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2628         spin_unlock(&oi->ip_lock);
2629
2630         ret = ocfs2_journal_dirty(handle, di_bh);
2631         if (ret < 0)
2632                 mlog_errno(ret);
2633 out_commit:
2634         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2635 out:
2636         if (ref_tree)
2637                 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2638         brelse(ref_root_bh);
2639         return ret;
2640 }
2641
2642 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2643                                         struct ocfs2_dinode *di)
2644 {
2645         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2646         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2647         int free;
2648
2649         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2650                 return 0;
2651
2652         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2653                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2654                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2655         } else if (ocfs2_inode_is_fast_symlink(inode)) {
2656                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2657                         le64_to_cpu(di->i_size);
2658         } else {
2659                 struct ocfs2_extent_list *el = &di->id2.i_list;
2660                 free = (le16_to_cpu(el->l_count) -
2661                         le16_to_cpu(el->l_next_free_rec)) *
2662                         sizeof(struct ocfs2_extent_rec);
2663         }
2664         if (free >= xattrsize)
2665                 return 1;
2666
2667         return 0;
2668 }
2669
2670 /*
2671  * ocfs2_xattr_ibody_find()
2672  *
2673  * Find extended attribute in inode block and
2674  * fill search info into struct ocfs2_xattr_search.
2675  */
2676 static int ocfs2_xattr_ibody_find(struct inode *inode,
2677                                   int name_index,
2678                                   const char *name,
2679                                   struct ocfs2_xattr_search *xs)
2680 {
2681         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2682         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2683         int ret;
2684         int has_space = 0;
2685
2686         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2687                 return 0;
2688
2689         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2690                 down_read(&oi->ip_alloc_sem);
2691                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2692                 up_read(&oi->ip_alloc_sem);
2693                 if (!has_space)
2694                         return 0;
2695         }
2696
2697         xs->xattr_bh = xs->inode_bh;
2698         xs->end = (void *)di + inode->i_sb->s_blocksize;
2699         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2700                 xs->header = (struct ocfs2_xattr_header *)
2701                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2702         else
2703                 xs->header = (struct ocfs2_xattr_header *)
2704                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2705         xs->base = (void *)xs->header;
2706         xs->here = xs->header->xh_entries;
2707
2708         /* Find the named attribute. */
2709         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2710                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2711                 if (ret && ret != -ENODATA)
2712                         return ret;
2713                 xs->not_found = ret;
2714         }
2715
2716         return 0;
2717 }
2718
2719 /*
2720  * ocfs2_xattr_ibody_set()
2721  *
2722  * Set, replace or remove an extended attribute into inode block.
2723  *
2724  */
2725 static int ocfs2_xattr_ibody_set(struct inode *inode,
2726                                  struct ocfs2_xattr_info *xi,
2727                                  struct ocfs2_xattr_search *xs,
2728                                  struct ocfs2_xattr_set_ctxt *ctxt)
2729 {
2730         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2731         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2732         int ret;
2733
2734         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2735                 return -ENOSPC;
2736
2737         down_write(&oi->ip_alloc_sem);
2738         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2739                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2740                         ret = -ENOSPC;
2741                         goto out;
2742                 }
2743         }
2744
2745         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2746                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2747 out:
2748         up_write(&oi->ip_alloc_sem);
2749
2750         return ret;
2751 }
2752
2753 /*
2754  * ocfs2_xattr_block_find()
2755  *
2756  * Find extended attribute in external block and
2757  * fill search info into struct ocfs2_xattr_search.
2758  */
2759 static int ocfs2_xattr_block_find(struct inode *inode,
2760                                   int name_index,
2761                                   const char *name,
2762                                   struct ocfs2_xattr_search *xs)
2763 {
2764         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2765         struct buffer_head *blk_bh = NULL;
2766         struct ocfs2_xattr_block *xb;
2767         int ret = 0;
2768
2769         if (!di->i_xattr_loc)
2770                 return ret;
2771
2772         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2773                                      &blk_bh);
2774         if (ret < 0) {
2775                 mlog_errno(ret);
2776                 return ret;
2777         }
2778
2779         xs->xattr_bh = blk_bh;
2780         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2781
2782         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2783                 xs->header = &xb->xb_attrs.xb_header;
2784                 xs->base = (void *)xs->header;
2785                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2786                 xs->here = xs->header->xh_entries;
2787
2788                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2789         } else
2790                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2791                                                    name_index,
2792                                                    name, xs);
2793
2794         if (ret && ret != -ENODATA) {
2795                 xs->xattr_bh = NULL;
2796                 goto cleanup;
2797         }
2798         xs->not_found = ret;
2799         return 0;
2800 cleanup:
2801         brelse(blk_bh);
2802
2803         return ret;
2804 }
2805
2806 static int ocfs2_create_xattr_block(handle_t *handle,
2807                                     struct inode *inode,
2808                                     struct buffer_head *inode_bh,
2809                                     struct ocfs2_alloc_context *meta_ac,
2810                                     struct buffer_head **ret_bh,
2811                                     int indexed)
2812 {
2813         int ret;
2814         u16 suballoc_bit_start;
2815         u32 num_got;
2816         u64 first_blkno;
2817         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2818         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2819         struct buffer_head *new_bh = NULL;
2820         struct ocfs2_xattr_block *xblk;
2821
2822         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2823                                       OCFS2_JOURNAL_ACCESS_CREATE);
2824         if (ret < 0) {
2825                 mlog_errno(ret);
2826                 goto end;
2827         }
2828
2829         ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2830                                    &suballoc_bit_start, &num_got,
2831                                    &first_blkno);
2832         if (ret < 0) {
2833                 mlog_errno(ret);
2834                 goto end;
2835         }
2836
2837         new_bh = sb_getblk(inode->i_sb, first_blkno);
2838         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2839
2840         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2841                                       new_bh,
2842                                       OCFS2_JOURNAL_ACCESS_CREATE);
2843         if (ret < 0) {
2844                 mlog_errno(ret);
2845                 goto end;
2846         }
2847
2848         /* Initialize ocfs2_xattr_block */
2849         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2850         memset(xblk, 0, inode->i_sb->s_blocksize);
2851         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2852         xblk->xb_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2853         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2854         xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2855         xblk->xb_blkno = cpu_to_le64(first_blkno);
2856
2857         if (indexed) {
2858                 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2859                 xr->xt_clusters = cpu_to_le32(1);
2860                 xr->xt_last_eb_blk = 0;
2861                 xr->xt_list.l_tree_depth = 0;
2862                 xr->xt_list.l_count = cpu_to_le16(
2863                                         ocfs2_xattr_recs_per_xb(inode->i_sb));
2864                 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2865                 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2866         }
2867
2868         ret = ocfs2_journal_dirty(handle, new_bh);
2869         if (ret < 0) {
2870                 mlog_errno(ret);
2871                 goto end;
2872         }
2873         di->i_xattr_loc = cpu_to_le64(first_blkno);
2874         ocfs2_journal_dirty(handle, inode_bh);
2875
2876         *ret_bh = new_bh;
2877         new_bh = NULL;
2878
2879 end:
2880         brelse(new_bh);
2881         return ret;
2882 }
2883
2884 /*
2885  * ocfs2_xattr_block_set()
2886  *
2887  * Set, replace or remove an extended attribute into external block.
2888  *
2889  */
2890 static int ocfs2_xattr_block_set(struct inode *inode,
2891                                  struct ocfs2_xattr_info *xi,
2892                                  struct ocfs2_xattr_search *xs,
2893                                  struct ocfs2_xattr_set_ctxt *ctxt)
2894 {
2895         struct buffer_head *new_bh = NULL;
2896         handle_t *handle = ctxt->handle;
2897         struct ocfs2_xattr_block *xblk = NULL;
2898         int ret;
2899
2900         if (!xs->xattr_bh) {
2901                 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2902                                                ctxt->meta_ac, &new_bh, 0);
2903                 if (ret) {
2904                         mlog_errno(ret);
2905                         goto end;
2906                 }
2907
2908                 xs->xattr_bh = new_bh;
2909                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2910                 xs->header = &xblk->xb_attrs.xb_header;
2911                 xs->base = (void *)xs->header;
2912                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2913                 xs->here = xs->header->xh_entries;
2914         } else
2915                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2916
2917         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2918                 /* Set extended attribute into external block */
2919                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2920                                             OCFS2_HAS_XATTR_FL);
2921                 if (!ret || ret != -ENOSPC)
2922                         goto end;
2923
2924                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2925                 if (ret)
2926                         goto end;
2927         }
2928
2929         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2930
2931 end:
2932
2933         return ret;
2934 }
2935
2936 /* Check whether the new xattr can be inserted into the inode. */
2937 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2938                                        struct ocfs2_xattr_info *xi,
2939                                        struct ocfs2_xattr_search *xs)
2940 {
2941         struct ocfs2_xattr_entry *last;
2942         int free, i;
2943         size_t min_offs = xs->end - xs->base;
2944
2945         if (!xs->header)
2946                 return 0;
2947
2948         last = xs->header->xh_entries;
2949
2950         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2951                 size_t offs = le16_to_cpu(last->xe_name_offset);
2952                 if (offs < min_offs)
2953                         min_offs = offs;
2954                 last += 1;
2955         }
2956
2957         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2958         if (free < 0)
2959                 return 0;
2960
2961         BUG_ON(!xs->not_found);
2962
2963         if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
2964                 return 1;
2965
2966         return 0;
2967 }
2968
2969 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2970                                      struct ocfs2_dinode *di,
2971                                      struct ocfs2_xattr_info *xi,
2972                                      struct ocfs2_xattr_search *xis,
2973                                      struct ocfs2_xattr_search *xbs,
2974                                      int *clusters_need,
2975                                      int *meta_need,
2976                                      int *credits_need)
2977 {
2978         int ret = 0, old_in_xb = 0;
2979         int clusters_add = 0, meta_add = 0, credits = 0;
2980         struct buffer_head *bh = NULL;
2981         struct ocfs2_xattr_block *xb = NULL;
2982         struct ocfs2_xattr_entry *xe = NULL;
2983         struct ocfs2_xattr_value_root *xv = NULL;
2984         char *base = NULL;
2985         int name_offset, name_len = 0;
2986         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2987                                                     xi->xi_value_len);
2988         u64 value_size;
2989
2990         /*
2991          * Calculate the clusters we need to write.
2992          * No matter whether we replace an old one or add a new one,
2993          * we need this for writing.
2994          */
2995         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2996                 credits += new_clusters *
2997                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2998
2999         if (xis->not_found && xbs->not_found) {
3000                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3001
3002                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3003                         clusters_add += new_clusters;
3004                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3005                                                         &def_xv.xv.xr_list,
3006                                                         new_clusters);
3007                 }
3008
3009                 goto meta_guess;
3010         }
3011
3012         if (!xis->not_found) {
3013                 xe = xis->here;
3014                 name_offset = le16_to_cpu(xe->xe_name_offset);
3015                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3016                 base = xis->base;
3017                 credits += OCFS2_INODE_UPDATE_CREDITS;
3018         } else {
3019                 int i, block_off = 0;
3020                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3021                 xe = xbs->here;
3022                 name_offset = le16_to_cpu(xe->xe_name_offset);
3023                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3024                 i = xbs->here - xbs->header->xh_entries;
3025                 old_in_xb = 1;
3026
3027                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3028                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3029                                                         bucket_xh(xbs->bucket),
3030                                                         i, &block_off,
3031                                                         &name_offset);
3032                         base = bucket_block(xbs->bucket, block_off);
3033                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3034                 } else {
3035                         base = xbs->base;
3036                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3037                 }
3038         }
3039
3040         /*
3041          * delete a xattr doesn't need metadata and cluster allocation.
3042          * so just calculate the credits and return.
3043          *
3044          * The credits for removing the value tree will be extended
3045          * by ocfs2_remove_extent itself.
3046          */
3047         if (!xi->xi_value) {
3048                 if (!ocfs2_xattr_is_local(xe))
3049                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3050
3051                 goto out;
3052         }
3053
3054         /* do cluster allocation guess first. */
3055         value_size = le64_to_cpu(xe->xe_value_size);
3056
3057         if (old_in_xb) {
3058                 /*
3059                  * In xattr set, we always try to set the xe in inode first,
3060                  * so if it can be inserted into inode successfully, the old
3061                  * one will be removed from the xattr block, and this xattr
3062                  * will be inserted into inode as a new xattr in inode.
3063                  */
3064                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3065                         clusters_add += new_clusters;
3066                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
3067                                     OCFS2_INODE_UPDATE_CREDITS;
3068                         if (!ocfs2_xattr_is_local(xe))
3069                                 credits += ocfs2_calc_extend_credits(
3070                                                         inode->i_sb,
3071                                                         &def_xv.xv.xr_list,
3072                                                         new_clusters);
3073                         goto out;
3074                 }
3075         }
3076
3077         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3078                 /* the new values will be stored outside. */
3079                 u32 old_clusters = 0;
3080
3081                 if (!ocfs2_xattr_is_local(xe)) {
3082                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3083                                                                  value_size);
3084                         xv = (struct ocfs2_xattr_value_root *)
3085                              (base + name_offset + name_len);
3086                         value_size = OCFS2_XATTR_ROOT_SIZE;
3087                 } else
3088                         xv = &def_xv.xv;
3089
3090                 if (old_clusters >= new_clusters) {
3091                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3092                         goto out;
3093                 } else {
3094                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3095                         clusters_add += new_clusters - old_clusters;
3096                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3097                                                              &xv->xr_list,
3098                                                              new_clusters -
3099                                                              old_clusters);
3100                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3101                                 goto out;
3102                 }
3103         } else {
3104                 /*
3105                  * Now the new value will be stored inside. So if the new
3106                  * value is smaller than the size of value root or the old
3107                  * value, we don't need any allocation, otherwise we have
3108                  * to guess metadata allocation.
3109                  */
3110                 if ((ocfs2_xattr_is_local(xe) &&
3111                      (value_size >= xi->xi_value_len)) ||
3112                     (!ocfs2_xattr_is_local(xe) &&
3113                      OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3114                         goto out;
3115         }
3116
3117 meta_guess:
3118         /* calculate metadata allocation. */
3119         if (di->i_xattr_loc) {
3120                 if (!xbs->xattr_bh) {
3121                         ret = ocfs2_read_xattr_block(inode,
3122                                                      le64_to_cpu(di->i_xattr_loc),
3123                                                      &bh);
3124                         if (ret) {
3125                                 mlog_errno(ret);
3126                                 goto out;
3127                         }
3128
3129                         xb = (struct ocfs2_xattr_block *)bh->b_data;
3130                 } else
3131                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3132
3133                 /*
3134                  * If there is already an xattr tree, good, we can calculate
3135                  * like other b-trees. Otherwise we may have the chance of
3136                  * create a tree, the credit calculation is borrowed from
3137                  * ocfs2_calc_extend_credits with root_el = NULL. And the
3138                  * new tree will be cluster based, so no meta is needed.
3139                  */
3140                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3141                         struct ocfs2_extent_list *el =
3142                                  &xb->xb_attrs.xb_root.xt_list;
3143                         meta_add += ocfs2_extend_meta_needed(el);
3144                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3145                                                              el, 1);
3146                 } else
3147                         credits += OCFS2_SUBALLOC_ALLOC + 1;
3148
3149                 /*
3150                  * This cluster will be used either for new bucket or for
3151                  * new xattr block.
3152                  * If the cluster size is the same as the bucket size, one
3153                  * more is needed since we may need to extend the bucket
3154                  * also.
3155                  */
3156                 clusters_add += 1;
3157                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3158                 if (OCFS2_XATTR_BUCKET_SIZE ==
3159                         OCFS2_SB(inode->i_sb)->s_clustersize) {
3160                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3161                         clusters_add += 1;
3162                 }
3163         } else {
3164                 meta_add += 1;
3165                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3166         }
3167 out:
3168         if (clusters_need)
3169                 *clusters_need = clusters_add;
3170         if (meta_need)
3171                 *meta_need = meta_add;
3172         if (credits_need)
3173                 *credits_need = credits;
3174         brelse(bh);
3175         return ret;
3176 }
3177
3178 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3179                                      struct ocfs2_dinode *di,
3180                                      struct ocfs2_xattr_info *xi,
3181                                      struct ocfs2_xattr_search *xis,
3182                                      struct ocfs2_xattr_search *xbs,
3183                                      struct ocfs2_xattr_set_ctxt *ctxt,
3184                                      int extra_meta,
3185                                      int *credits)
3186 {
3187         int clusters_add, meta_add, ret;
3188         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3189
3190         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3191
3192         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3193
3194         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3195                                         &clusters_add, &meta_add, credits);
3196         if (ret) {
3197                 mlog_errno(ret);
3198                 return ret;
3199         }
3200
3201         meta_add += extra_meta;
3202         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3203              "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3204
3205         if (meta_add) {
3206                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3207                                                         &ctxt->meta_ac);
3208                 if (ret) {
3209                         mlog_errno(ret);
3210                         goto out;
3211                 }
3212         }
3213
3214         if (clusters_add) {
3215                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3216                 if (ret)
3217                         mlog_errno(ret);
3218         }
3219 out:
3220         if (ret) {
3221                 if (ctxt->meta_ac) {
3222                         ocfs2_free_alloc_context(ctxt->meta_ac);
3223                         ctxt->meta_ac = NULL;
3224                 }
3225
3226                 /*
3227                  * We cannot have an error and a non null ctxt->data_ac.
3228                  */
3229         }
3230
3231         return ret;
3232 }
3233
3234 static int __ocfs2_xattr_set_handle(struct inode *inode,
3235                                     struct ocfs2_dinode *di,
3236                                     struct ocfs2_xattr_info *xi,
3237                                     struct ocfs2_xattr_search *xis,
3238                                     struct ocfs2_xattr_search *xbs,
3239                                     struct ocfs2_xattr_set_ctxt *ctxt)
3240 {
3241         int ret = 0, credits, old_found;
3242
3243         if (!xi->xi_value) {
3244                 /* Remove existing extended attribute */
3245                 if (!xis->not_found)
3246                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3247                 else if (!xbs->not_found)
3248                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3249         } else {
3250                 /* We always try to set extended attribute into inode first*/
3251                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3252                 if (!ret && !xbs->not_found) {
3253                         /*
3254                          * If succeed and that extended attribute existing in
3255                          * external block, then we will remove it.
3256                          */
3257                         xi->xi_value = NULL;
3258                         xi->xi_value_len = 0;
3259
3260                         old_found = xis->not_found;
3261                         xis->not_found = -ENODATA;
3262                         ret = ocfs2_calc_xattr_set_need(inode,
3263                                                         di,
3264                                                         xi,
3265                                                         xis,
3266                                                         xbs,
3267                                                         NULL,
3268                                                         NULL,
3269                                                         &credits);
3270                         xis->not_found = old_found;
3271                         if (ret) {
3272                                 mlog_errno(ret);
3273                                 goto out;
3274                         }
3275
3276                         ret = ocfs2_extend_trans(ctxt->handle, credits +
3277                                         ctxt->handle->h_buffer_credits);
3278                         if (ret) {
3279                                 mlog_errno(ret);
3280                                 goto out;
3281                         }
3282                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3283                 } else if (ret == -ENOSPC) {
3284                         if (di->i_xattr_loc && !xbs->xattr_bh) {
3285                                 ret = ocfs2_xattr_block_find(inode,
3286                                                              xi->xi_name_index,
3287                                                              xi->xi_name, xbs);
3288                                 if (ret)
3289                                         goto out;
3290
3291                                 old_found = xis->not_found;
3292                                 xis->not_found = -ENODATA;
3293                                 ret = ocfs2_calc_xattr_set_need(inode,
3294                                                                 di,
3295                                                                 xi,
3296                                                                 xis,
3297                                                                 xbs,
3298                                                                 NULL,
3299                                                                 NULL,
3300                                                                 &credits);
3301                                 xis->not_found = old_found;
3302                                 if (ret) {
3303                                         mlog_errno(ret);
3304                                         goto out;
3305                                 }
3306
3307                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3308                                         ctxt->handle->h_buffer_credits);
3309                                 if (ret) {
3310                                         mlog_errno(ret);
3311                                         goto out;
3312                                 }
3313                         }
3314                         /*
3315                          * If no space in inode, we will set extended attribute
3316                          * into external block.
3317                          */
3318                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3319                         if (ret)
3320                                 goto out;
3321                         if (!xis->not_found) {
3322                                 /*
3323                                  * If succeed and that extended attribute
3324                                  * existing in inode, we will remove it.
3325                                  */
3326                                 xi->xi_value = NULL;
3327                                 xi->xi_value_len = 0;
3328                                 xbs->not_found = -ENODATA;
3329                                 ret = ocfs2_calc_xattr_set_need(inode,
3330                                                                 di,
3331                                                                 xi,
3332                                                                 xis,
3333                                                                 xbs,
3334                                                                 NULL,
3335                                                                 NULL,
3336                                                                 &credits);
3337                                 if (ret) {
3338                                         mlog_errno(ret);
3339                                         goto out;
3340                                 }
3341
3342                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3343                                                 ctxt->handle->h_buffer_credits);
3344                                 if (ret) {
3345                                         mlog_errno(ret);
3346                                         goto out;
3347                                 }
3348                                 ret = ocfs2_xattr_ibody_set(inode, xi,
3349                                                             xis, ctxt);
3350                         }
3351                 }
3352         }
3353
3354         if (!ret) {
3355                 /* Update inode ctime. */
3356                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3357                                               xis->inode_bh,
3358                                               OCFS2_JOURNAL_ACCESS_WRITE);
3359                 if (ret) {
3360                         mlog_errno(ret);
3361                         goto out;
3362                 }
3363
3364                 inode->i_ctime = CURRENT_TIME;
3365                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3366                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3367                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3368         }
3369 out:
3370         return ret;
3371 }
3372
3373 /*
3374  * This function only called duing creating inode
3375  * for init security/acl xattrs of the new inode.
3376  * All transanction credits have been reserved in mknod.
3377  */
3378 int ocfs2_xattr_set_handle(handle_t *handle,
3379                            struct inode *inode,
3380                            struct buffer_head *di_bh,
3381                            int name_index,
3382                            const char *name,
3383                            const void *value,
3384                            size_t value_len,
3385                            int flags,
3386                            struct ocfs2_alloc_context *meta_ac,
3387                            struct ocfs2_alloc_context *data_ac)
3388 {
3389         struct ocfs2_dinode *di;
3390         int ret;
3391
3392         struct ocfs2_xattr_info xi = {
3393                 .xi_name_index = name_index,
3394                 .xi_name = name,
3395                 .xi_name_len = strlen(name),
3396                 .xi_value = value,
3397                 .xi_value_len = value_len,
3398         };
3399
3400         struct ocfs2_xattr_search xis = {
3401                 .not_found = -ENODATA,
3402         };
3403
3404         struct ocfs2_xattr_search xbs = {
3405                 .not_found = -ENODATA,
3406         };
3407
3408         struct ocfs2_xattr_set_ctxt ctxt = {
3409                 .handle = handle,
3410                 .meta_ac = meta_ac,
3411                 .data_ac = data_ac,
3412         };
3413
3414         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3415                 return -EOPNOTSUPP;
3416
3417         /*
3418          * In extreme situation, may need xattr bucket when
3419          * block size is too small. And we have already reserved
3420          * the credits for bucket in mknod.
3421          */
3422         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3423                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
3424                 if (!xbs.bucket) {
3425                         mlog_errno(-ENOMEM);
3426                         return -ENOMEM;
3427                 }
3428         }
3429
3430         xis.inode_bh = xbs.inode_bh = di_bh;
3431         di = (struct ocfs2_dinode *)di_bh->b_data;
3432
3433         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3434
3435         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3436         if (ret)
3437                 goto cleanup;
3438         if (xis.not_found) {
3439                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3440                 if (ret)
3441                         goto cleanup;
3442         }
3443
3444         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3445
3446 cleanup:
3447         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3448         brelse(xbs.xattr_bh);
3449         ocfs2_xattr_bucket_free(xbs.bucket);
3450
3451         return ret;
3452 }
3453
3454 /*
3455  * ocfs2_xattr_set()
3456  *
3457  * Set, replace or remove an extended attribute for this inode.
3458  * value is NULL to remove an existing extended attribute, else either
3459  * create or replace an extended attribute.
3460  */
3461 int ocfs2_xattr_set(struct inode *inode,
3462                     int name_index,
3463                     const char *name,
3464                     const void *value,
3465                     size_t value_len,
3466                     int flags)
3467 {
3468         struct buffer_head *di_bh = NULL;
3469         struct ocfs2_dinode *di;
3470         int ret, credits, ref_meta = 0, ref_credits = 0;
3471         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3472         struct inode *tl_inode = osb->osb_tl_inode;
3473         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3474         struct ocfs2_refcount_tree *ref_tree = NULL;
3475
3476         struct ocfs2_xattr_info xi = {
3477                 .xi_name_index = name_index,
3478                 .xi_name = name,
3479                 .xi_name_len = strlen(name),
3480                 .xi_value = value,
3481                 .xi_value_len = value_len,
3482         };
3483
3484         struct ocfs2_xattr_search xis = {
3485                 .not_found = -ENODATA,
3486         };
3487
3488         struct ocfs2_xattr_search xbs = {
3489                 .not_found = -ENODATA,
3490         };
3491
3492         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3493                 return -EOPNOTSUPP;
3494
3495         /*
3496          * Only xbs will be used on indexed trees.  xis doesn't need a
3497          * bucket.
3498          */
3499         xbs.bucket = ocfs2_xattr_bucket_new(inode);
3500         if (!xbs.bucket) {
3501                 mlog_errno(-ENOMEM);
3502                 return -ENOMEM;
3503         }
3504
3505         ret = ocfs2_inode_lock(inode, &di_bh, 1);
3506         if (ret < 0) {
3507                 mlog_errno(ret);
3508                 goto cleanup_nolock;
3509         }
3510         xis.inode_bh = xbs.inode_bh = di_bh;
3511         di = (struct ocfs2_dinode *)di_bh->b_data;
3512
3513         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3514         /*
3515          * Scan inode and external block to find the same name
3516          * extended attribute and collect search infomation.
3517          */
3518         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3519         if (ret)
3520                 goto cleanup;
3521         if (xis.not_found) {
3522                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3523                 if (ret)
3524                         goto cleanup;
3525         }
3526
3527         if (xis.not_found && xbs.not_found) {
3528                 ret = -ENODATA;
3529                 if (flags & XATTR_REPLACE)
3530                         goto cleanup;
3531                 ret = 0;
3532                 if (!value)
3533                         goto cleanup;
3534         } else {
3535                 ret = -EEXIST;
3536                 if (flags & XATTR_CREATE)
3537                         goto cleanup;
3538         }
3539
3540         /* Check whether the value is refcounted and do some prepartion. */
3541         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3542             (!xis.not_found || !xbs.not_found)) {
3543                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3544                                                    &xis, &xbs, &ref_tree,
3545                                                    &ref_meta, &ref_credits);
3546                 if (ret) {
3547                         mlog_errno(ret);
3548                         goto cleanup;
3549                 }
3550         }
3551
3552         mutex_lock(&tl_inode->i_mutex);
3553
3554         if (ocfs2_truncate_log_needs_flush(osb)) {
3555                 ret = __ocfs2_flush_truncate_log(osb);
3556                 if (ret < 0) {
3557                         mutex_unlock(&tl_inode->i_mutex);
3558                         mlog_errno(ret);
3559                         goto cleanup;
3560                 }
3561         }
3562         mutex_unlock(&tl_inode->i_mutex);
3563
3564         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3565                                         &xbs, &ctxt, ref_meta, &credits);
3566         if (ret) {
3567                 mlog_errno(ret);
3568                 goto cleanup;
3569         }
3570
3571         /* we need to update inode's ctime field, so add credit for it. */
3572         credits += OCFS2_INODE_UPDATE_CREDITS;
3573         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3574         if (IS_ERR(ctxt.handle)) {
3575                 ret = PTR_ERR(ctxt.handle);
3576                 mlog_errno(ret);
3577                 goto cleanup;
3578         }
3579
3580         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3581
3582         ocfs2_commit_trans(osb, ctxt.handle);
3583
3584         if (ctxt.data_ac)
3585                 ocfs2_free_alloc_context(ctxt.data_ac);
3586         if (ctxt.meta_ac)
3587                 ocfs2_free_alloc_context(ctxt.meta_ac);
3588         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3589                 ocfs2_schedule_truncate_log_flush(osb, 1);
3590         ocfs2_run_deallocs(osb, &ctxt.dealloc);
3591
3592 cleanup:
3593         if (ref_tree)
3594                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3595         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3596         if (!value && !ret) {
3597                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3598                 if (ret)
3599                         mlog_errno(ret);
3600         }
3601         ocfs2_inode_unlock(inode, 1);
3602 cleanup_nolock:
3603         brelse(di_bh);
3604         brelse(xbs.xattr_bh);
3605         ocfs2_xattr_bucket_free(xbs.bucket);
3606
3607         return ret;
3608 }
3609
3610 /*
3611  * Find the xattr extent rec which may contains name_hash.
3612  * e_cpos will be the first name hash of the xattr rec.
3613  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3614  */
3615 static int ocfs2_xattr_get_rec(struct inode *inode,
3616                                u32 name_hash,
3617                                u64 *p_blkno,
3618                                u32 *e_cpos,
3619                                u32 *num_clusters,
3620                                struct ocfs2_extent_list *el)
3621 {
3622         int ret = 0, i;
3623         struct buffer_head *eb_bh = NULL;
3624         struct ocfs2_extent_block *eb;
3625         struct ocfs2_extent_rec *rec = NULL;
3626         u64 e_blkno = 0;
3627
3628         if (el->l_tree_depth) {
3629                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3630                                       &eb_bh);
3631                 if (ret) {
3632                         mlog_errno(ret);
3633                         goto out;
3634                 }
3635
3636                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3637                 el = &eb->h_list;
3638
3639                 if (el->l_tree_depth) {
3640                         ocfs2_error(inode->i_sb,
3641                                     "Inode %lu has non zero tree depth in "
3642                                     "xattr tree block %llu\n", inode->i_ino,
3643                                     (unsigned long long)eb_bh->b_blocknr);
3644                         ret = -EROFS;
3645                         goto out;
3646                 }
3647         }
3648
3649         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3650                 rec = &el->l_recs[i];
3651
3652                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3653                         e_blkno = le64_to_cpu(rec->e_blkno);
3654                         break;
3655                 }
3656         }
3657
3658         if (!e_blkno) {
3659                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3660                             "record (%u, %u, 0) in xattr", inode->i_ino,
3661                             le32_to_cpu(rec->e_cpos),
3662                             ocfs2_rec_clusters(el, rec));
3663                 ret = -EROFS;
3664                 goto out;
3665         }
3666
3667         *p_blkno = le64_to_cpu(rec->e_blkno);
3668         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3669         if (e_cpos)
3670                 *e_cpos = le32_to_cpu(rec->e_cpos);
3671 out:
3672         brelse(eb_bh);
3673         return ret;
3674 }
3675
3676 typedef int (xattr_bucket_func)(struct inode *inode,
3677                                 struct ocfs2_xattr_bucket *bucket,
3678                                 void *para);
3679
3680 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3681                                    struct ocfs2_xattr_bucket *bucket,
3682                                    int name_index,
3683                                    const char *name,
3684                                    u32 name_hash,
3685                                    u16 *xe_index,
3686                                    int *found)
3687 {
3688         int i, ret = 0, cmp = 1, block_off, new_offset;
3689         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3690         size_t name_len = strlen(name);
3691         struct ocfs2_xattr_entry *xe = NULL;
3692         char *xe_name;
3693
3694         /*
3695          * We don't use binary search in the bucket because there
3696          * may be multiple entries with the same name hash.
3697          */
3698         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3699                 xe = &xh->xh_entries[i];
3700
3701                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3702                         continue;
3703                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3704                         break;
3705
3706                 cmp = name_index - ocfs2_xattr_get_type(xe);
3707                 if (!cmp)
3708                         cmp = name_len - xe->xe_name_len;
3709                 if (cmp)
3710                         continue;
3711
3712                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3713                                                         xh,
3714                                                         i,
3715                                                         &block_off,
3716                                                         &new_offset);
3717                 if (ret) {
3718                         mlog_errno(ret);
3719                         break;
3720                 }
3721
3722
3723                 xe_name = bucket_block(bucket, block_off) + new_offset;
3724                 if (!memcmp(name, xe_name, name_len)) {
3725                         *xe_index = i;
3726                         *found = 1;
3727                         ret = 0;
3728                         break;
3729                 }
3730         }
3731
3732         return ret;
3733 }
3734
3735 /*
3736  * Find the specified xattr entry in a series of buckets.
3737  * This series start from p_blkno and last for num_clusters.
3738  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3739  * the num of the valid buckets.
3740  *
3741  * Return the buffer_head this xattr should reside in. And if the xattr's
3742  * hash is in the gap of 2 buckets, return the lower bucket.
3743  */
3744 static int ocfs2_xattr_bucket_find(struct inode *inode,
3745                                    int name_index,
3746                                    const char *name,
3747                                    u32 name_hash,
3748                                    u64 p_blkno,
3749                                    u32 first_hash,
3750                                    u32 num_clusters,
3751                                    struct ocfs2_xattr_search *xs)
3752 {
3753         int ret, found = 0;
3754         struct ocfs2_xattr_header *xh = NULL;
3755         struct ocfs2_xattr_entry *xe = NULL;
3756         u16 index = 0;
3757         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3758         int low_bucket = 0, bucket, high_bucket;
3759         struct ocfs2_xattr_bucket *search;
3760         u32 last_hash;
3761         u64 blkno, lower_blkno = 0;
3762
3763         search = ocfs2_xattr_bucket_new(inode);
3764         if (!search) {
3765                 ret = -ENOMEM;
3766                 mlog_errno(ret);
3767                 goto out;
3768         }
3769
3770         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3771         if (ret) {
3772                 mlog_errno(ret);
3773                 goto out;
3774         }
3775
3776         xh = bucket_xh(search);
3777         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3778         while (low_bucket <= high_bucket) {
3779                 ocfs2_xattr_bucket_relse(search);
3780
3781                 bucket = (low_bucket + high_bucket) / 2;
3782                 blkno = p_blkno + bucket * blk_per_bucket;
3783                 ret = ocfs2_read_xattr_bucket(search, blkno);
3784                 if (ret) {
3785                         mlog_errno(ret);
3786                         goto out;
3787                 }
3788
3789                 xh = bucket_xh(search);
3790                 xe = &xh->xh_entries[0];
3791                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3792                         high_bucket = bucket - 1;
3793                         continue;
3794                 }
3795
3796                 /*
3797                  * Check whether the hash of the last entry in our
3798                  * bucket is larger than the search one. for an empty
3799                  * bucket, the last one is also the first one.
3800                  */
3801                 if (xh->xh_count)
3802                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3803
3804                 last_hash = le32_to_cpu(xe->xe_name_hash);
3805
3806                 /* record lower_blkno which may be the insert place. */
3807                 lower_blkno = blkno;
3808
3809                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3810                         low_bucket = bucket + 1;
3811                         continue;
3812                 }
3813
3814                 /* the searched xattr should reside in this bucket if exists. */
3815                 ret = ocfs2_find_xe_in_bucket(inode, search,
3816                                               name_index, name, name_hash,
3817                                               &index, &found);
3818                 if (ret) {
3819                         mlog_errno(ret);
3820                         goto out;
3821                 }
3822                 break;
3823         }
3824
3825         /*
3826          * Record the bucket we have found.
3827          * When the xattr's hash value is in the gap of 2 buckets, we will
3828          * always set it to the previous bucket.
3829          */
3830         if (!lower_blkno)
3831                 lower_blkno = p_blkno;
3832
3833         /* This should be in cache - we just read it during the search */
3834         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3835         if (ret) {
3836                 mlog_errno(ret);
3837                 goto out;
3838         }
3839
3840         xs->header = bucket_xh(xs->bucket);
3841         xs->base = bucket_block(xs->bucket, 0);
3842         xs->end = xs->base + inode->i_sb->s_blocksize;
3843
3844         if (found) {
3845                 xs->here = &xs->header->xh_entries[index];
3846                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3847                      (unsigned long long)bucket_blkno(xs->bucket), index);
3848         } else
3849                 ret = -ENODATA;
3850
3851 out:
3852         ocfs2_xattr_bucket_free(search);
3853         return ret;
3854 }
3855
3856 static int ocfs2_xattr_index_block_find(struct inode *inode,
3857                                         struct buffer_head *root_bh,
3858                                         int name_index,
3859                                         const char *name,
3860                                         struct ocfs2_xattr_search *xs)
3861 {
3862         int ret;
3863         struct ocfs2_xattr_block *xb =
3864                         (struct ocfs2_xattr_block *)root_bh->b_data;
3865         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3866         struct ocfs2_extent_list *el = &xb_root->xt_list;
3867         u64 p_blkno = 0;
3868         u32 first_hash, num_clusters = 0;
3869         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3870
3871         if (le16_to_cpu(el->l_next_free_rec) == 0)
3872                 return -ENODATA;
3873
3874         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3875              name, name_hash, name_index);
3876
3877         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3878                                   &num_clusters, el);
3879         if (ret) {
3880                 mlog_errno(ret);
3881                 goto out;
3882         }
3883
3884         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3885
3886         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3887              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3888              first_hash);
3889
3890         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3891                                       p_blkno, first_hash, num_clusters, xs);
3892
3893 out:
3894         return ret;
3895 }
3896
3897 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3898                                        u64 blkno,
3899                                        u32 clusters,
3900                                        xattr_bucket_func *func,
3901                                        void *para)
3902 {
3903         int i, ret = 0;
3904         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3905         u32 num_buckets = clusters * bpc;
3906         struct ocfs2_xattr_bucket *bucket;
3907
3908         bucket = ocfs2_xattr_bucket_new(inode);
3909         if (!bucket) {
3910                 mlog_errno(-ENOMEM);
3911                 return -ENOMEM;
3912         }
3913
3914         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3915              clusters, (unsigned long long)blkno);
3916
3917         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3918                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3919                 if (ret) {
3920                         mlog_errno(ret);
3921                         break;
3922                 }
3923
3924                 /*
3925                  * The real bucket num in this series of blocks is stored
3926                  * in the 1st bucket.
3927                  */
3928                 if (i == 0)
3929                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3930
3931                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3932                      (unsigned long long)blkno,
3933                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3934                 if (func) {
3935                         ret = func(inode, bucket, para);
3936                         if (ret && ret != -ERANGE)
3937                                 mlog_errno(ret);
3938                         /* Fall through to bucket_relse() */
3939                 }
3940
3941                 ocfs2_xattr_bucket_relse(bucket);
3942                 if (ret)
3943                         break;
3944         }
3945
3946         ocfs2_xattr_bucket_free(bucket);
3947         return ret;
3948 }
3949
3950 struct ocfs2_xattr_tree_list {
3951         char *buffer;
3952         size_t buffer_size;
3953         size_t result;
3954 };
3955
3956 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3957                                              struct ocfs2_xattr_header *xh,
3958                                              int index,
3959                                              int *block_off,
3960                                              int *new_offset)
3961 {
3962         u16 name_offset;
3963
3964         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3965                 return -EINVAL;
3966
3967         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3968
3969         *block_off = name_offset >> sb->s_blocksize_bits;
3970         *new_offset = name_offset % sb->s_blocksize;
3971
3972         return 0;
3973 }
3974
3975 static int ocfs2_list_xattr_bucket(struct inode *inode,
3976                                    struct ocfs2_xattr_bucket *bucket,
3977                                    void *para)
3978 {
3979         int ret = 0, type;
3980         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3981         int i, block_off, new_offset;
3982         const char *prefix, *name;
3983
3984         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3985                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3986                 type = ocfs2_xattr_get_type(entry);
3987                 prefix = ocfs2_xattr_prefix(type);
3988
3989                 if (prefix) {
3990                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3991                                                                 bucket_xh(bucket),
3992                                                                 i,
3993                                                                 &block_off,
3994                                                                 &new_offset);
3995                         if (ret)
3996                                 break;
3997
3998                         name = (const char *)bucket_block(bucket, block_off) +
3999                                 new_offset;
4000                         ret = ocfs2_xattr_list_entry(xl->buffer,
4001                                                      xl->buffer_size,
4002                                                      &xl->result,
4003                                                      prefix, name,
4004                                                      entry->xe_name_len);
4005                         if (ret)
4006                                 break;
4007                 }
4008         }
4009
4010         return ret;
4011 }
4012
4013 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4014                                            struct buffer_head *blk_bh,
4015                                            xattr_tree_rec_func *rec_func,
4016                                            void *para)
4017 {
4018         struct ocfs2_xattr_block *xb =
4019                         (struct ocfs2_xattr_block *)blk_bh->b_data;
4020         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4021         int ret = 0;
4022         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4023         u64 p_blkno = 0;
4024
4025         if (!el->l_next_free_rec || !rec_func)
4026                 return 0;
4027
4028         while (name_hash > 0) {
4029                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4030                                           &e_cpos, &num_clusters, el);
4031                 if (ret) {
4032                         mlog_errno(ret);
4033                         break;
4034                 }
4035
4036                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4037                                num_clusters, para);
4038                 if (ret) {
4039                         if (ret != -ERANGE)
4040                                 mlog_errno(ret);
4041                         break;
4042                 }
4043
4044                 if (e_cpos == 0)
4045                         break;
4046
4047                 name_hash = e_cpos - 1;
4048         }
4049
4050         return ret;
4051
4052 }
4053
4054 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4055                                      struct buffer_head *root_bh,
4056                                      u64 blkno, u32 cpos, u32 len, void *para)
4057 {
4058         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4059                                            ocfs2_list_xattr_bucket, para);
4060 }
4061
4062 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4063                                              struct buffer_head *blk_bh,
4064                                              char *buffer,
4065                                              size_t buffer_size)
4066 {
4067         int ret;
4068         struct ocfs2_xattr_tree_list xl = {
4069                 .buffer = buffer,
4070                 .buffer_size = buffer_size,
4071                 .result = 0,
4072         };
4073
4074         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4075                                               ocfs2_list_xattr_tree_rec, &xl);
4076         if (ret) {
4077                 mlog_errno(ret);
4078                 goto out;
4079         }
4080
4081         ret = xl.result;
4082 out:
4083         return ret;
4084 }
4085
4086 static int cmp_xe(const void *a, const void *b)
4087 {
4088         const struct ocfs2_xattr_entry *l = a, *r = b;
4089         u32 l_hash = le32_to_cpu(l->xe_name_hash);
4090         u32 r_hash = le32_to_cpu(r->xe_name_hash);
4091
4092         if (l_hash > r_hash)
4093                 return 1;
4094         if (l_hash < r_hash)
4095                 return -1;
4096         return 0;
4097 }
4098
4099 static void swap_xe(void *a, void *b, int size)
4100 {
4101         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4102
4103         tmp = *l;
4104         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4105         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4106 }
4107
4108 /*
4109  * When the ocfs2_xattr_block is filled up, new bucket will be created
4110  * and all the xattr entries will be moved to the new bucket.
4111  * The header goes at the start of the bucket, and the names+values are
4112  * filled from the end.  This is why *target starts as the last buffer.
4113  * Note: we need to sort the entries since they are not saved in order
4114  * in the ocfs2_xattr_block.
4115  */
4116 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4117                                            struct buffer_head *xb_bh,
4118                                            struct ocfs2_xattr_bucket *bucket)
4119 {
4120         int i, blocksize = inode->i_sb->s_blocksize;
4121         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4122         u16 offset, size, off_change;
4123         struct ocfs2_xattr_entry *xe;
4124         struct ocfs2_xattr_block *xb =
4125                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
4126         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4127         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4128         u16 count = le16_to_cpu(xb_xh->xh_count);
4129         char *src = xb_bh->b_data;
4130         char *target = bucket_block(bucket, blks - 1);
4131
4132         mlog(0, "cp xattr from block %llu to bucket %llu\n",
4133              (unsigned long long)xb_bh->b_blocknr,
4134              (unsigned long long)bucket_blkno(bucket));
4135
4136         for (i = 0; i < blks; i++)
4137                 memset(bucket_block(bucket, i), 0, blocksize);
4138
4139         /*
4140          * Since the xe_name_offset is based on ocfs2_xattr_header,
4141          * there is a offset change corresponding to the change of
4142          * ocfs2_xattr_header's position.
4143          */
4144         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4145         xe = &xb_xh->xh_entries[count - 1];
4146         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4147         size = blocksize - offset;
4148
4149         /* copy all the names and values. */
4150         memcpy(target + offset, src + offset, size);
4151
4152         /* Init new header now. */
4153         xh->xh_count = xb_xh->xh_count;
4154         xh->xh_num_buckets = cpu_to_le16(1);
4155         xh->xh_name_value_len = cpu_to_le16(size);
4156         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4157
4158         /* copy all the entries. */
4159         target = bucket_block(bucket, 0);
4160         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4161         size = count * sizeof(struct ocfs2_xattr_entry);
4162         memcpy(target + offset, (char *)xb_xh + offset, size);
4163
4164         /* Change the xe offset for all the xe because of the move. */
4165         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4166                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4167         for (i = 0; i < count; i++)
4168                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4169
4170         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4171              offset, size, off_change);
4172
4173         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4174              cmp_xe, swap_xe);
4175 }
4176
4177 /*
4178  * After we move xattr from block to index btree, we have to
4179  * update ocfs2_xattr_search to the new xe and base.
4180  *
4181  * When the entry is in xattr block, xattr_bh indicates the storage place.
4182  * While if the entry is in index b-tree, "bucket" indicates the
4183  * real place of the xattr.
4184  */
4185 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4186                                             struct ocfs2_xattr_search *xs,
4187                                             struct buffer_head *old_bh)
4188 {
4189         char *buf = old_bh->b_data;
4190         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4191         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4192         int i;
4193
4194         xs->header = bucket_xh(xs->bucket);
4195         xs->base = bucket_block(xs->bucket, 0);
4196         xs->end = xs->base + inode->i_sb->s_blocksize;
4197
4198         if (xs->not_found)
4199                 return;
4200
4201         i = xs->here - old_xh->xh_entries;
4202         xs->here = &xs->header->xh_entries[i];
4203 }
4204
4205 static int ocfs2_xattr_create_index_block(struct inode *inode,
4206                                           struct ocfs2_xattr_search *xs,
4207                                           struct ocfs2_xattr_set_ctxt *ctxt)
4208 {
4209         int ret;
4210         u32 bit_off, len;
4211         u64 blkno;
4212         handle_t *handle = ctxt->handle;
4213         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4214         struct ocfs2_inode_info *oi = OCFS2_I(inode);
4215         struct buffer_head *xb_bh = xs->xattr_bh;
4216         struct ocfs2_xattr_block *xb =
4217                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4218         struct ocfs2_xattr_tree_root *xr;
4219         u16 xb_flags = le16_to_cpu(xb->xb_flags);
4220
4221         mlog(0, "create xattr index block for %llu\n",
4222              (unsigned long long)xb_bh->b_blocknr);
4223
4224         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4225         BUG_ON(!xs->bucket);
4226
4227         /*
4228          * XXX:
4229          * We can use this lock for now, and maybe move to a dedicated mutex
4230          * if performance becomes a problem later.
4231          */
4232         down_write(&oi->ip_alloc_sem);
4233
4234         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4235                                       OCFS2_JOURNAL_ACCESS_WRITE);
4236         if (ret) {
4237                 mlog_errno(ret);
4238                 goto out;
4239         }
4240
4241         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
4242                                      1, 1, &bit_off, &len);
4243         if (ret) {
4244                 mlog_errno(ret);
4245                 goto out;
4246         }
4247
4248         /*
4249          * The bucket may spread in many blocks, and
4250          * we will only touch the 1st block and the last block
4251          * in the whole bucket(one for entry and one for data).
4252          */
4253         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4254
4255         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4256              (unsigned long long)blkno);
4257
4258         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4259         if (ret) {
4260                 mlog_errno(ret);
4261                 goto out;
4262         }
4263
4264         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4265                                                 OCFS2_JOURNAL_ACCESS_CREATE);
4266         if (ret) {
4267                 mlog_errno(ret);
4268                 goto out;
4269         }
4270
4271         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4272         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4273
4274         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4275
4276         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4277         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4278                offsetof(struct ocfs2_xattr_block, xb_attrs));
4279
4280         xr = &xb->xb_attrs.xb_root;
4281         xr->xt_clusters = cpu_to_le32(1);
4282         xr->xt_last_eb_blk = 0;
4283         xr->xt_list.l_tree_depth = 0;
4284         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4285         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4286
4287         xr->xt_list.l_recs[0].e_cpos = 0;
4288         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4289         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4290
4291         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4292
4293         ocfs2_journal_dirty(handle, xb_bh);
4294
4295 out:
4296         up_write(&oi->ip_alloc_sem);
4297
4298         return ret;
4299 }
4300
4301 static int cmp_xe_offset(const void *a, const void *b)
4302 {
4303         const struct ocfs2_xattr_entry *l = a, *r = b;
4304         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4305         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4306
4307         if (l_name_offset < r_name_offset)
4308                 return 1;
4309         if (l_name_offset > r_name_offset)
4310                 return -1;
4311         return 0;
4312 }
4313
4314 /*
4315  * defrag a xattr bucket if we find that the bucket has some
4316  * holes beteen name/value pairs.
4317  * We will move all the name/value pairs to the end of the bucket
4318  * so that we can spare some space for insertion.
4319  */
4320 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4321                                      handle_t *handle,
4322                                      struct ocfs2_xattr_bucket *bucket)
4323 {
4324         int ret, i;
4325         size_t end, offset, len;
4326         struct ocfs2_xattr_header *xh;
4327         char *entries, *buf, *bucket_buf = NULL;
4328         u64 blkno = bucket_blkno(bucket);
4329         u16 xh_free_start;
4330         size_t blocksize = inode->i_sb->s_blocksize;
4331         struct ocfs2_xattr_entry *xe;
4332
4333         /*
4334          * In order to make the operation more efficient and generic,
4335          * we copy all the blocks into a contiguous memory and do the
4336          * defragment there, so if anything is error, we will not touch
4337          * the real block.
4338          */
4339         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4340         if (!bucket_buf) {
4341                 ret = -EIO;
4342                 goto out;
4343         }
4344
4345         buf = bucket_buf;
4346         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4347                 memcpy(buf, bucket_block(bucket, i), blocksize);
4348
4349         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4350                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4351         if (ret < 0) {
4352                 mlog_errno(ret);
4353                 goto out;
4354         }
4355
4356         xh = (struct ocfs2_xattr_header *)bucket_buf;
4357         entries = (char *)xh->xh_entries;
4358         xh_free_start = le16_to_cpu(xh->xh_free_start);
4359
4360         mlog(0, "adjust xattr bucket in %llu, count = %u, "
4361              "xh_free_start = %u, xh_name_value_len = %u.\n",
4362              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4363              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4364
4365         /*
4366          * sort all the entries by their offset.
4367          * the largest will be the first, so that we can
4368          * move them to the end one by one.
4369          */
4370         sort(entries, le16_to_cpu(xh->xh_count),
4371              sizeof(struct ocfs2_xattr_entry),
4372              cmp_xe_offset, swap_xe);
4373
4374         /* Move all name/values to the end of the bucket. */
4375         xe = xh->xh_entries;
4376         end = OCFS2_XATTR_BUCKET_SIZE;
4377         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4378                 offset = le16_to_cpu(xe->xe_name_offset);
4379                 len = namevalue_size_xe(xe);
4380
4381                 /*
4382                  * We must make sure that the name/value pair
4383                  * exist in the same block. So adjust end to
4384                  * the previous block end if needed.
4385                  */
4386                 if (((end - len) / blocksize !=
4387                         (end - 1) / blocksize))
4388                         end = end - end % blocksize;
4389
4390                 if (end > offset + len) {
4391                         memmove(bucket_buf + end - len,
4392                                 bucket_buf + offset, len);
4393                         xe->xe_name_offset = cpu_to_le16(end - len);
4394                 }
4395
4396                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4397                                 "bucket %llu\n", (unsigned long long)blkno);
4398
4399                 end -= len;
4400         }
4401
4402         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4403                         "bucket %llu\n", (unsigned long long)blkno);
4404
4405         if (xh_free_start == end)
4406                 goto out;
4407
4408         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4409         xh->xh_free_start = cpu_to_le16(end);
4410
4411         /* sort the entries by their name_hash. */
4412         sort(entries, le16_to_cpu(xh->xh_count),
4413              sizeof(struct ocfs2_xattr_entry),
4414              cmp_xe, swap_xe);
4415
4416         buf = bucket_buf;
4417         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4418                 memcpy(bucket_block(bucket, i), buf, blocksize);
4419         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4420
4421 out:
4422         kfree(bucket_buf);
4423         return ret;
4424 }
4425
4426 /*
4427  * prev_blkno points to the start of an existing extent.  new_blkno
4428  * points to a newly allocated extent.  Because we know each of our
4429  * clusters contains more than bucket, we can easily split one cluster
4430  * at a bucket boundary.  So we take the last cluster of the existing
4431  * extent and split it down the middle.  We move the last half of the
4432  * buckets in the last cluster of the existing extent over to the new
4433  * extent.
4434  *
4435  * first_bh is the buffer at prev_blkno so we can update the existing
4436  * extent's bucket count.  header_bh is the bucket were we were hoping
4437  * to insert our xattr.  If the bucket move places the target in the new
4438  * extent, we'll update first_bh and header_bh after modifying the old
4439  * extent.
4440  *
4441  * first_hash will be set as the 1st xe's name_hash in the new extent.
4442  */
4443 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4444                                                handle_t *handle,
4445                                                struct ocfs2_xattr_bucket *first,
4446                                                struct ocfs2_xattr_bucket *target,
4447                                                u64 new_blkno,
4448                                                u32 num_clusters,
4449                                                u32 *first_hash)
4450 {
4451         int ret;
4452         struct super_block *sb = inode->i_sb;
4453         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4454         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4455         int to_move = num_buckets / 2;
4456         u64 src_blkno;
4457         u64 last_cluster_blkno = bucket_blkno(first) +
4458                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4459
4460         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4461         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4462
4463         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4464              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4465
4466         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4467                                      last_cluster_blkno, new_blkno,
4468                                      to_move, first_hash);
4469         if (ret) {
4470                 mlog_errno(ret);
4471                 goto out;
4472         }
4473
4474         /* This is the first bucket that got moved */
4475         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4476
4477         /*
4478          * If the target bucket was part of the moved buckets, we need to
4479          * update first and target.
4480          */
4481         if (bucket_blkno(target) >= src_blkno) {
4482                 /* Find the block for the new target bucket */
4483                 src_blkno = new_blkno +
4484                         (bucket_blkno(target) - src_blkno);
4485
4486                 ocfs2_xattr_bucket_relse(first);
4487                 ocfs2_xattr_bucket_relse(target);
4488
4489                 /*
4490                  * These shouldn't fail - the buffers are in the
4491                  * journal from ocfs2_cp_xattr_bucket().
4492                  */
4493                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
4494                 if (ret) {
4495                         mlog_errno(ret);
4496                         goto out;
4497                 }
4498                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
4499                 if (ret)
4500                         mlog_errno(ret);
4501
4502         }
4503
4504 out:
4505         return ret;
4506 }
4507
4508 /*
4509  * Find the suitable pos when we divide a bucket into 2.
4510  * We have to make sure the xattrs with the same hash value exist
4511  * in the same bucket.
4512  *
4513  * If this ocfs2_xattr_header covers more than one hash value, find a
4514  * place where the hash value changes.  Try to find the most even split.
4515  * The most common case is that all entries have different hash values,
4516  * and the first check we make will find a place to split.
4517  */
4518 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4519 {
4520         struct ocfs2_xattr_entry *entries = xh->xh_entries;
4521         int count = le16_to_cpu(xh->xh_count);
4522         int delta, middle = count / 2;
4523
4524         /*
4525          * We start at the middle.  Each step gets farther away in both
4526          * directions.  We therefore hit the change in hash value
4527          * nearest to the middle.  Note that this loop does not execute for
4528          * count < 2.
4529          */
4530         for (delta = 0; delta < middle; delta++) {
4531                 /* Let's check delta earlier than middle */
4532                 if (cmp_xe(&entries[middle - delta - 1],
4533                            &entries[middle - delta]))
4534                         return middle - delta;
4535
4536                 /* For even counts, don't walk off the end */
4537                 if ((middle + delta + 1) == count)
4538                         continue;
4539
4540                 /* Now try delta past middle */
4541                 if (cmp_xe(&entries[middle + delta],
4542                            &entries[middle + delta + 1]))
4543                         return middle + delta + 1;
4544         }
4545
4546         /* Every entry had the same hash */
4547         return count;
4548 }
4549
4550 /*
4551  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4552  * first_hash will record the 1st hash of the new bucket.
4553  *
4554  * Normally half of the xattrs will be moved.  But we have to make
4555  * sure that the xattrs with the same hash value are stored in the
4556  * same bucket. If all the xattrs in this bucket have the same hash
4557  * value, the new bucket will be initialized as an empty one and the
4558  * first_hash will be initialized as (hash_value+1).
4559  */
4560 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4561                                     handle_t *handle,
4562                                     u64 blk,
4563                                     u64 new_blk,
4564                                     u32 *first_hash,
4565                                     int new_bucket_head)
4566 {
4567         int ret, i;
4568         int count, start, len, name_value_len = 0, name_offset = 0;
4569         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4570         struct ocfs2_xattr_header *xh;
4571         struct ocfs2_xattr_entry *xe;
4572         int blocksize = inode->i_sb->s_blocksize;
4573
4574         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4575              (unsigned long long)blk, (unsigned long long)new_blk);
4576
4577         s_bucket = ocfs2_xattr_bucket_new(inode);
4578         t_bucket = ocfs2_xattr_bucket_new(inode);
4579         if (!s_bucket || !t_bucket) {
4580                 ret = -ENOMEM;
4581                 mlog_errno(ret);
4582                 goto out;
4583         }
4584
4585         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4586         if (ret) {
4587                 mlog_errno(ret);
4588                 goto out;
4589         }
4590
4591         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4592                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4593         if (ret) {
4594                 mlog_errno(ret);
4595                 goto out;
4596         }
4597
4598         /*
4599          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4600          * there's no need to read it.
4601          */
4602         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4603         if (ret) {
4604                 mlog_errno(ret);
4605                 goto out;
4606         }
4607
4608         /*
4609          * Hey, if we're overwriting t_bucket, what difference does
4610          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4611          * same part of ocfs2_cp_xattr_bucket().
4612          */
4613         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4614                                                 new_bucket_head ?
4615                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4616                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4617         if (ret) {
4618                 mlog_errno(ret);
4619                 goto out;
4620         }
4621
4622         xh = bucket_xh(s_bucket);
4623         count = le16_to_cpu(xh->xh_count);
4624         start = ocfs2_xattr_find_divide_pos(xh);
4625
4626         if (start == count) {
4627                 xe = &xh->xh_entries[start-1];
4628
4629                 /*
4630                  * initialized a new empty bucket here.
4631                  * The hash value is set as one larger than
4632                  * that of the last entry in the previous bucket.
4633                  */
4634                 for (i = 0; i < t_bucket->bu_blocks; i++)
4635                         memset(bucket_block(t_bucket, i), 0, blocksize);
4636
4637                 xh = bucket_xh(t_bucket);
4638                 xh->xh_free_start = cpu_to_le16(blocksize);
4639                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4640                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4641
4642                 goto set_num_buckets;
4643         }
4644
4645         /* copy the whole bucket to the new first. */
4646         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4647
4648         /* update the new bucket. */
4649         xh = bucket_xh(t_bucket);
4650
4651         /*
4652          * Calculate the total name/value len and xh_free_start for
4653          * the old bucket first.
4654          */
4655         name_offset = OCFS2_XATTR_BUCKET_SIZE;
4656         name_value_len = 0;
4657         for (i = 0; i < start; i++) {
4658                 xe = &xh->xh_entries[i];
4659                 name_value_len += namevalue_size_xe(xe);
4660                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4661                         name_offset = le16_to_cpu(xe->xe_name_offset);
4662         }
4663
4664         /*
4665          * Now begin the modification to the new bucket.
4666          *
4667          * In the new bucket, We just move the xattr entry to the beginning
4668          * and don't touch the name/value. So there will be some holes in the
4669          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4670          * called.
4671          */
4672         xe = &xh->xh_entries[start];
4673         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4674         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4675              (int)((char *)xe - (char *)xh),
4676              (int)((char *)xh->xh_entries - (char *)xh));
4677         memmove((char *)xh->xh_entries, (char *)xe, len);
4678         xe = &xh->xh_entries[count - start];
4679         len = sizeof(struct ocfs2_xattr_entry) * start;
4680         memset((char *)xe, 0, len);
4681
4682         le16_add_cpu(&xh->xh_count, -start);
4683         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4684
4685         /* Calculate xh_free_start for the new bucket. */
4686         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4687         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4688                 xe = &xh->xh_entries[i];
4689                 if (le16_to_cpu(xe->xe_name_offset) <
4690                     le16_to_cpu(xh->xh_free_start))
4691                         xh->xh_free_start = xe->xe_name_offset;
4692         }
4693
4694 set_num_buckets:
4695         /* set xh->xh_num_buckets for the new xh. */
4696         if (new_bucket_head)
4697                 xh->xh_num_buckets = cpu_to_le16(1);
4698         else
4699                 xh->xh_num_buckets = 0;
4700
4701         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4702
4703         /* store the first_hash of the new bucket. */
4704         if (first_hash)
4705                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4706
4707         /*
4708          * Now only update the 1st block of the old bucket.  If we
4709          * just added a new empty bucket, there is no need to modify
4710          * it.
4711          */
4712         if (start == count)
4713                 goto out;
4714
4715         xh = bucket_xh(s_bucket);
4716         memset(&xh->xh_entries[start], 0,
4717                sizeof(struct ocfs2_xattr_entry) * (count - start));
4718         xh->xh_count = cpu_to_le16(start);
4719         xh->xh_free_start = cpu_to_le16(name_offset);
4720         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4721
4722         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4723
4724 out:
4725         ocfs2_xattr_bucket_free(s_bucket);
4726         ocfs2_xattr_bucket_free(t_bucket);
4727
4728         return ret;
4729 }
4730
4731 /*
4732  * Copy xattr from one bucket to another bucket.
4733  *
4734  * The caller must make sure that the journal transaction
4735  * has enough space for journaling.
4736  */
4737 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4738                                  handle_t *handle,
4739                                  u64 s_blkno,
4740                                  u64 t_blkno,
4741                                  int t_is_new)
4742 {
4743         int ret;
4744         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4745
4746         BUG_ON(s_blkno == t_blkno);
4747
4748         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4749              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4750              t_is_new);
4751
4752         s_bucket = ocfs2_xattr_bucket_new(inode);
4753         t_bucket = ocfs2_xattr_bucket_new(inode);
4754         if (!s_bucket || !t_bucket) {
4755                 ret = -ENOMEM;
4756                 mlog_errno(ret);
4757                 goto out;
4758         }
4759
4760         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4761         if (ret)
4762                 goto out;
4763
4764         /*
4765          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4766          * there's no need to read it.
4767          */
4768         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4769         if (ret)
4770                 goto out;
4771
4772         /*
4773          * Hey, if we're overwriting t_bucket, what difference does
4774          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4775          * cluster to fill, we came here from
4776          * ocfs2_mv_xattr_buckets(), and it is really new -
4777          * ACCESS_CREATE is required.  But we also might have moved data
4778          * out of t_bucket before extending back into it.
4779          * ocfs2_add_new_xattr_bucket() can do this - its call to
4780          * ocfs2_add_new_xattr_cluster() may have created a new extent
4781          * and copied out the end of the old extent.  Then it re-extends
4782          * the old extent back to create space for new xattrs.  That's
4783          * how we get here, and the bucket isn't really new.
4784          */
4785         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4786                                                 t_is_new ?
4787                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4788                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4789         if (ret)
4790                 goto out;
4791
4792         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4793         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4794
4795 out:
4796         ocfs2_xattr_bucket_free(t_bucket);
4797         ocfs2_xattr_bucket_free(s_bucket);
4798
4799         return ret;
4800 }
4801
4802 /*
4803  * src_blk points to the start of an existing extent.  last_blk points to
4804  * last cluster in that extent.  to_blk points to a newly allocated
4805  * extent.  We copy the buckets from the cluster at last_blk to the new
4806  * extent.  If start_bucket is non-zero, we skip that many buckets before
4807  * we start copying.  The new extent's xh_num_buckets gets set to the
4808  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4809  * by the same amount.
4810  */
4811 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4812                                   u64 src_blk, u64 last_blk, u64 to_blk,
4813                                   unsigned int start_bucket,
4814                                   u32 *first_hash)
4815 {
4816         int i, ret, credits;
4817         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4818         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4819         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4820         struct ocfs2_xattr_bucket *old_first, *new_first;
4821
4822         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4823              (unsigned long long)last_blk, (unsigned long long)to_blk);
4824
4825         BUG_ON(start_bucket >= num_buckets);
4826         if (start_bucket) {
4827                 num_buckets -= start_bucket;
4828                 last_blk += (start_bucket * blks_per_bucket);
4829         }
4830
4831         /* The first bucket of the original extent */
4832         old_first = ocfs2_xattr_bucket_new(inode);
4833         /* The first bucket of the new extent */
4834         new_first = ocfs2_xattr_bucket_new(inode);
4835         if (!old_first || !new_first) {
4836                 ret = -ENOMEM;
4837                 mlog_errno(ret);
4838                 goto out;
4839         }
4840
4841         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4842         if (ret) {
4843                 mlog_errno(ret);
4844                 goto out;
4845         }
4846
4847         /*
4848          * We need to update the first bucket of the old extent and all
4849          * the buckets going to the new extent.
4850          */
4851         credits = ((num_buckets + 1) * blks_per_bucket) +
4852                 handle->h_buffer_credits;
4853         ret = ocfs2_extend_trans(handle, credits);
4854         if (ret) {
4855                 mlog_errno(ret);
4856                 goto out;
4857         }
4858
4859         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4860                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4861         if (ret) {
4862                 mlog_errno(ret);
4863                 goto out;
4864         }
4865
4866         for (i = 0; i < num_buckets; i++) {
4867                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4868                                             last_blk + (i * blks_per_bucket),
4869                                             to_blk + (i * blks_per_bucket),
4870                                             1);
4871                 if (ret) {
4872                         mlog_errno(ret);
4873                         goto out;
4874                 }
4875         }
4876
4877         /*
4878          * Get the new bucket ready before we dirty anything
4879          * (This actually shouldn't fail, because we already dirtied
4880          * it once in ocfs2_cp_xattr_bucket()).
4881          */
4882         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4883         if (ret) {
4884                 mlog_errno(ret);
4885                 goto out;
4886         }
4887         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4888                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4889         if (ret) {
4890                 mlog_errno(ret);
4891                 goto out;
4892         }
4893
4894         /* Now update the headers */
4895         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4896         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4897
4898         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4899         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4900
4901         if (first_hash)
4902                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4903
4904 out:
4905         ocfs2_xattr_bucket_free(new_first);
4906         ocfs2_xattr_bucket_free(old_first);
4907         return ret;
4908 }
4909
4910 /*
4911  * Move some xattrs in this cluster to the new cluster.
4912  * This function should only be called when bucket size == cluster size.
4913  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4914  */
4915 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4916                                       handle_t *handle,
4917                                       u64 prev_blk,
4918                                       u64 new_blk,
4919                                       u32 *first_hash)
4920 {
4921         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4922         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4923
4924         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4925
4926         ret = ocfs2_extend_trans(handle, credits);
4927         if (ret) {
4928                 mlog_errno(ret);
4929                 return ret;
4930         }
4931
4932         /* Move half of the xattr in start_blk to the next bucket. */
4933         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4934                                           new_blk, first_hash, 1);
4935 }
4936
4937 /*
4938  * Move some xattrs from the old cluster to the new one since they are not
4939  * contiguous in ocfs2 xattr tree.
4940  *
4941  * new_blk starts a new separate cluster, and we will move some xattrs from
4942  * prev_blk to it. v_start will be set as the first name hash value in this
4943  * new cluster so that it can be used as e_cpos during tree insertion and
4944  * don't collide with our original b-tree operations. first_bh and header_bh
4945  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4946  * to extend the insert bucket.
4947  *
4948  * The problem is how much xattr should we move to the new one and when should
4949  * we update first_bh and header_bh?
4950  * 1. If cluster size > bucket size, that means the previous cluster has more
4951  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4952  *    update the first_bh and header_bh if the insert bucket has been moved
4953  *    to the new cluster.
4954  * 2. If cluster_size == bucket_size:
4955  *    a) If the previous extent rec has more than one cluster and the insert
4956  *       place isn't in the last cluster, copy the entire last cluster to the
4957  *       new one. This time, we don't need to upate the first_bh and header_bh
4958  *       since they will not be moved into the new cluster.
4959  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4960  *       the new one. And we set the extend flag to zero if the insert place is
4961  *       moved into the new allocated cluster since no extend is needed.
4962  */
4963 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4964                                             handle_t *handle,
4965                                             struct ocfs2_xattr_bucket *first,
4966                                             struct ocfs2_xattr_bucket *target,
4967                                             u64 new_blk,
4968                                             u32 prev_clusters,
4969                                             u32 *v_start,
4970                                             int *extend)
4971 {
4972         int ret;
4973
4974         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4975              (unsigned long long)bucket_blkno(first), prev_clusters,
4976              (unsigned long long)new_blk);
4977
4978         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4979                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4980                                                           handle,
4981                                                           first, target,
4982                                                           new_blk,
4983                                                           prev_clusters,
4984                                                           v_start);
4985                 if (ret)
4986                         mlog_errno(ret);
4987         } else {
4988                 /* The start of the last cluster in the first extent */
4989                 u64 last_blk = bucket_blkno(first) +
4990                         ((prev_clusters - 1) *
4991                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4992
4993                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4994                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4995                                                      bucket_blkno(first),
4996                                                      last_blk, new_blk, 0,
4997                                                      v_start);
4998                         if (ret)
4999                                 mlog_errno(ret);
5000                 } else {
5001                         ret = ocfs2_divide_xattr_cluster(inode, handle,
5002                                                          last_blk, new_blk,
5003                                                          v_start);
5004                         if (ret)
5005                                 mlog_errno(ret);
5006
5007                         if ((bucket_blkno(target) == last_blk) && extend)
5008                                 *extend = 0;
5009                 }
5010         }
5011
5012         return ret;
5013 }
5014
5015 /*
5016  * Add a new cluster for xattr storage.
5017  *
5018  * If the new cluster is contiguous with the previous one, it will be
5019  * appended to the same extent record, and num_clusters will be updated.
5020  * If not, we will insert a new extent for it and move some xattrs in
5021  * the last cluster into the new allocated one.
5022  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5023  * lose the benefits of hashing because we'll have to search large leaves.
5024  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5025  * if it's bigger).
5026  *
5027  * first_bh is the first block of the previous extent rec and header_bh
5028  * indicates the bucket we will insert the new xattrs. They will be updated
5029  * when the header_bh is moved into the new cluster.
5030  */
5031 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5032                                        struct buffer_head *root_bh,
5033                                        struct ocfs2_xattr_bucket *first,
5034                                        struct ocfs2_xattr_bucket *target,
5035                                        u32 *num_clusters,
5036                                        u32 prev_cpos,
5037                                        int *extend,
5038                                        struct ocfs2_xattr_set_ctxt *ctxt)
5039 {
5040         int ret;
5041         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5042         u32 prev_clusters = *num_clusters;
5043         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5044         u64 block;
5045         handle_t *handle = ctxt->handle;
5046         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5047         struct ocfs2_extent_tree et;
5048
5049         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
5050              "previous xattr blkno = %llu\n",
5051              (unsigned long long)OCFS2_I(inode)->ip_blkno,
5052              prev_cpos, (unsigned long long)bucket_blkno(first));
5053
5054         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5055
5056         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5057                                       OCFS2_JOURNAL_ACCESS_WRITE);
5058         if (ret < 0) {
5059                 mlog_errno(ret);
5060                 goto leave;
5061         }
5062
5063         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
5064                                      clusters_to_add, &bit_off, &num_bits);
5065         if (ret < 0) {
5066                 if (ret != -ENOSPC)
5067                         mlog_errno(ret);
5068                 goto leave;
5069         }
5070
5071         BUG_ON(num_bits > clusters_to_add);
5072
5073         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5074         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5075              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5076
5077         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5078             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5079              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5080                 /*
5081                  * If this cluster is contiguous with the old one and
5082                  * adding this new cluster, we don't surpass the limit of
5083                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5084                  * initialized and used like other buckets in the previous
5085                  * cluster.
5086                  * So add it as a contiguous one. The caller will handle
5087                  * its init process.
5088                  */
5089                 v_start = prev_cpos + prev_clusters;
5090                 *num_clusters = prev_clusters + num_bits;
5091                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5092                      num_bits);
5093         } else {
5094                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5095                                                        handle,
5096                                                        first,
5097                                                        target,
5098                                                        block,
5099                                                        prev_clusters,
5100                                                        &v_start,
5101                                                        extend);
5102                 if (ret) {
5103                         mlog_errno(ret);
5104                         goto leave;
5105                 }
5106         }
5107
5108         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5109              num_bits, (unsigned long long)block, v_start);
5110         ret = ocfs2_insert_extent(handle, &et, v_start, block,
5111                                   num_bits, 0, ctxt->meta_ac);
5112         if (ret < 0) {
5113                 mlog_errno(ret);
5114                 goto leave;
5115         }
5116
5117         ret = ocfs2_journal_dirty(handle, root_bh);
5118         if (ret < 0)
5119                 mlog_errno(ret);
5120
5121 leave:
5122         return ret;
5123 }
5124
5125 /*
5126  * We are given an extent.  'first' is the bucket at the very front of
5127  * the extent.  The extent has space for an additional bucket past
5128  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5129  * of the target bucket.  We wish to shift every bucket past the target
5130  * down one, filling in that additional space.  When we get back to the
5131  * target, we split the target between itself and the now-empty bucket
5132  * at target+1 (aka, target_blkno + blks_per_bucket).
5133  */
5134 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5135                                      handle_t *handle,
5136                                      struct ocfs2_xattr_bucket *first,
5137                                      u64 target_blk,
5138                                      u32 num_clusters)
5139 {
5140         int ret, credits;
5141         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5142         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5143         u64 end_blk;
5144         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5145
5146         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5147              "from %llu, len = %u\n", (unsigned long long)target_blk,
5148              (unsigned long long)bucket_blkno(first), num_clusters);
5149
5150         /* The extent must have room for an additional bucket */
5151         BUG_ON(new_bucket >=
5152                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5153
5154         /* end_blk points to the last existing bucket */
5155         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5156
5157         /*
5158          * end_blk is the start of the last existing bucket.
5159          * Thus, (end_blk - target_blk) covers the target bucket and
5160          * every bucket after it up to, but not including, the last
5161          * existing bucket.  Then we add the last existing bucket, the
5162          * new bucket, and the first bucket (3 * blk_per_bucket).
5163          */
5164         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
5165                   handle->h_buffer_credits;
5166         ret = ocfs2_extend_trans(handle, credits);
5167         if (ret) {
5168                 mlog_errno(ret);
5169                 goto out;
5170         }
5171
5172         ret = ocfs2_xattr_bucket_journal_access(handle, first,
5173                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5174         if (ret) {
5175                 mlog_errno(ret);
5176                 goto out;
5177         }
5178
5179         while (end_blk != target_blk) {
5180                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5181                                             end_blk + blk_per_bucket, 0);
5182                 if (ret)
5183                         goto out;
5184                 end_blk -= blk_per_bucket;
5185         }
5186
5187         /* Move half of the xattr in target_blkno to the next bucket. */
5188         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5189                                         target_blk + blk_per_bucket, NULL, 0);
5190
5191         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5192         ocfs2_xattr_bucket_journal_dirty(handle, first);
5193
5194 out:
5195         return ret;
5196 }
5197
5198 /*
5199  * Add new xattr bucket in an extent record and adjust the buckets
5200  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5201  * bucket we want to insert into.
5202  *
5203  * In the easy case, we will move all the buckets after target down by
5204  * one. Half of target's xattrs will be moved to the next bucket.
5205  *
5206  * If current cluster is full, we'll allocate a new one.  This may not
5207  * be contiguous.  The underlying calls will make sure that there is
5208  * space for the insert, shifting buckets around if necessary.
5209  * 'target' may be moved by those calls.
5210  */
5211 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5212                                       struct buffer_head *xb_bh,
5213                                       struct ocfs2_xattr_bucket *target,
5214                                       struct ocfs2_xattr_set_ctxt *ctxt)
5215 {
5216         struct ocfs2_xattr_block *xb =
5217                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5218         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5219         struct ocfs2_extent_list *el = &xb_root->xt_list;
5220         u32 name_hash =
5221                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5222         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5223         int ret, num_buckets, extend = 1;
5224         u64 p_blkno;
5225         u32 e_cpos, num_clusters;
5226         /* The bucket at the front of the extent */
5227         struct ocfs2_xattr_bucket *first;
5228
5229         mlog(0, "Add new xattr bucket starting from %llu\n",
5230              (unsigned long long)bucket_blkno(target));
5231
5232         /* The first bucket of the original extent */
5233         first = ocfs2_xattr_bucket_new(inode);
5234         if (!first) {
5235                 ret = -ENOMEM;
5236                 mlog_errno(ret);
5237                 goto out;
5238         }
5239
5240         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5241                                   &num_clusters, el);
5242         if (ret) {
5243                 mlog_errno(ret);
5244                 goto out;
5245         }
5246
5247         ret = ocfs2_read_xattr_bucket(first, p_blkno);
5248         if (ret) {
5249                 mlog_errno(ret);
5250                 goto out;
5251         }
5252
5253         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5254         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5255                 /*
5256                  * This can move first+target if the target bucket moves
5257                  * to the new extent.
5258                  */
5259                 ret = ocfs2_add_new_xattr_cluster(inode,
5260                                                   xb_bh,
5261                                                   first,
5262                                                   target,
5263                                                   &num_clusters,
5264                                                   e_cpos,
5265                                                   &extend,
5266                                                   ctxt);
5267                 if (ret) {
5268                         mlog_errno(ret);
5269                         goto out;
5270                 }
5271         }
5272
5273         if (extend) {
5274                 ret = ocfs2_extend_xattr_bucket(inode,
5275                                                 ctxt->handle,
5276                                                 first,
5277                                                 bucket_blkno(target),
5278                                                 num_clusters);
5279                 if (ret)
5280                         mlog_errno(ret);
5281         }
5282
5283 out:
5284         ocfs2_xattr_bucket_free(first);
5285
5286         return ret;
5287 }
5288
5289 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5290                                         struct ocfs2_xattr_bucket *bucket,
5291                                         int offs)
5292 {
5293         int block_off = offs >> inode->i_sb->s_blocksize_bits;
5294
5295         offs = offs % inode->i_sb->s_blocksize;
5296         return bucket_block(bucket, block_off) + offs;
5297 }
5298
5299 /*
5300  * Set the xattr entry in the specified bucket.
5301  * The bucket is indicated by xs->bucket and it should have the enough
5302  * space for the xattr insertion.
5303  */
5304 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
5305                                            handle_t *handle,
5306                                            struct ocfs2_xattr_info *xi,
5307                                            struct ocfs2_xattr_search *xs,
5308                                            u32 name_hash)
5309 {
5310         int ret;
5311         u64 blkno;
5312         struct ocfs2_xa_loc loc;
5313
5314         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
5315              (unsigned long)xi->xi_value_len, xi->xi_name_index,
5316              (unsigned long long)bucket_blkno(xs->bucket));
5317
5318         if (!xs->bucket->bu_bhs[1]) {
5319                 blkno = bucket_blkno(xs->bucket);
5320                 ocfs2_xattr_bucket_relse(xs->bucket);
5321                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
5322                 if (ret) {
5323                         mlog_errno(ret);
5324                         goto out;
5325                 }
5326         }
5327
5328         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5329                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5330         if (ret < 0) {
5331                 mlog_errno(ret);
5332                 goto out;
5333         }
5334
5335         ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5336                                        xs->not_found ? NULL : xs->here);
5337         ret = ocfs2_xa_prepare_entry(&loc, xi, name_hash);
5338         if (ret) {
5339                 if (ret != -ENOSPC)
5340                         mlog_errno(ret);
5341                 goto out;
5342         }
5343         /* XXX For now, until we make ocfs2_xa_prepare_entry() primary */
5344         BUG_ON(ret == -ENOSPC);
5345         ocfs2_xa_store_inline_value(&loc, xi);
5346         xs->here = loc.xl_entry;
5347
5348         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5349
5350 out:
5351         return ret;
5352 }
5353
5354 /*
5355  * Truncate the specified xe_off entry in xattr bucket.
5356  * bucket is indicated by header_bh and len is the new length.
5357  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5358  *
5359  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5360  */
5361 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5362                                              struct ocfs2_xattr_bucket *bucket,
5363                                              int xe_off,
5364                                              int len,
5365                                              struct ocfs2_xattr_set_ctxt *ctxt)
5366 {
5367         int ret, offset;
5368         u64 value_blk;
5369         struct ocfs2_xattr_entry *xe;
5370         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5371         size_t blocksize = inode->i_sb->s_blocksize;
5372         struct ocfs2_xattr_value_buf vb = {
5373                 .vb_access = ocfs2_journal_access,
5374         };
5375
5376         xe = &xh->xh_entries[xe_off];
5377
5378         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5379
5380         offset = le16_to_cpu(xe->xe_name_offset) +
5381                  OCFS2_XATTR_SIZE(xe->xe_name_len);
5382
5383         value_blk = offset / blocksize;
5384
5385         /* We don't allow ocfs2_xattr_value to be stored in different block. */
5386         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5387
5388         vb.vb_bh = bucket->bu_bhs[value_blk];
5389         BUG_ON(!vb.vb_bh);
5390
5391         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5392                 (vb.vb_bh->b_data + offset % blocksize);
5393
5394         /*
5395          * From here on out we have to dirty the bucket.  The generic
5396          * value calls only modify one of the bucket's bhs, but we need
5397          * to send the bucket at once.  So if they error, they *could* have
5398          * modified something.  We have to assume they did, and dirty
5399          * the whole bucket.  This leaves us in a consistent state.
5400          */
5401         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5402              xe_off, (unsigned long long)bucket_blkno(bucket), len);
5403         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5404         if (ret) {
5405                 mlog_errno(ret);
5406                 goto out;
5407         }
5408
5409         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5410                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5411         if (ret) {
5412                 mlog_errno(ret);
5413                 goto out;
5414         }
5415
5416         xe->xe_value_size = cpu_to_le64(len);
5417
5418         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5419
5420 out:
5421         return ret;
5422 }
5423
5424 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
5425                                         struct ocfs2_xattr_search *xs,
5426                                         int len,
5427                                         struct ocfs2_xattr_set_ctxt *ctxt)
5428 {
5429         int ret, offset;
5430         struct ocfs2_xattr_entry *xe = xs->here;
5431         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
5432
5433         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
5434
5435         offset = xe - xh->xh_entries;
5436         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
5437                                                 offset, len, ctxt);
5438         if (ret)
5439                 mlog_errno(ret);
5440
5441         return ret;
5442 }
5443
5444 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
5445                                                 handle_t *handle,
5446                                                 struct ocfs2_xattr_search *xs,
5447                                                 char *val,
5448                                                 int value_len)
5449 {
5450         int ret, offset, block_off;
5451         struct ocfs2_xattr_value_root *xv;
5452         struct ocfs2_xattr_entry *xe = xs->here;
5453         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5454         void *base;
5455         struct ocfs2_xattr_value_buf vb = {
5456                 .vb_access = ocfs2_journal_access,
5457         };
5458
5459         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
5460
5461         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
5462                                                 xe - xh->xh_entries,
5463                                                 &block_off,
5464                                                 &offset);
5465         if (ret) {
5466                 mlog_errno(ret);
5467                 goto out;
5468         }
5469
5470         base = bucket_block(xs->bucket, block_off);
5471         xv = (struct ocfs2_xattr_value_root *)(base + offset +
5472                  OCFS2_XATTR_SIZE(xe->xe_name_len));
5473
5474         vb.vb_xv = xv;
5475         vb.vb_bh = xs->bucket->bu_bhs[block_off];
5476         ret = __ocfs2_xattr_set_value_outside(inode, handle,
5477                                               &vb, val, value_len);
5478         if (ret)
5479                 mlog_errno(ret);
5480 out:
5481         return ret;
5482 }
5483
5484 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5485                                   struct buffer_head *root_bh,
5486                                   u64 blkno,
5487                                   u32 cpos,
5488                                   u32 len,
5489                                   void *para)
5490 {
5491         int ret;
5492         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5493         struct inode *tl_inode = osb->osb_tl_inode;
5494         handle_t *handle;
5495         struct ocfs2_xattr_block *xb =
5496                         (struct ocfs2_xattr_block *)root_bh->b_data;
5497         struct ocfs2_alloc_context *meta_ac = NULL;
5498         struct ocfs2_cached_dealloc_ctxt dealloc;
5499         struct ocfs2_extent_tree et;
5500
5501         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5502                                           ocfs2_delete_xattr_in_bucket, para);
5503         if (ret) {
5504                 mlog_errno(ret);
5505                 return ret;
5506         }
5507
5508         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5509
5510         ocfs2_init_dealloc_ctxt(&dealloc);
5511
5512         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5513              cpos, len, (unsigned long long)blkno);
5514
5515         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5516                                                len);
5517
5518         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5519         if (ret) {
5520                 mlog_errno(ret);
5521                 return ret;
5522         }
5523
5524         mutex_lock(&tl_inode->i_mutex);
5525
5526         if (ocfs2_truncate_log_needs_flush(osb)) {
5527                 ret = __ocfs2_flush_truncate_log(osb);
5528                 if (ret < 0) {
5529                         mlog_errno(ret);
5530                         goto out;
5531                 }
5532         }
5533
5534         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5535         if (IS_ERR(handle)) {
5536                 ret = -ENOMEM;
5537                 mlog_errno(ret);
5538                 goto out;
5539         }
5540
5541         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5542                                       OCFS2_JOURNAL_ACCESS_WRITE);
5543         if (ret) {
5544                 mlog_errno(ret);
5545                 goto out_commit;
5546         }
5547
5548         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5549                                   &dealloc);
5550         if (ret) {
5551                 mlog_errno(ret);
5552                 goto out_commit;
5553         }
5554
5555         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5556
5557         ret = ocfs2_journal_dirty(handle, root_bh);
5558         if (ret) {
5559                 mlog_errno(ret);
5560                 goto out_commit;
5561         }
5562
5563         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5564         if (ret)
5565                 mlog_errno(ret);
5566
5567 out_commit:
5568         ocfs2_commit_trans(osb, handle);
5569 out:
5570         ocfs2_schedule_truncate_log_flush(osb, 1);
5571
5572         mutex_unlock(&tl_inode->i_mutex);
5573
5574         if (meta_ac)
5575                 ocfs2_free_alloc_context(meta_ac);
5576
5577         ocfs2_run_deallocs(osb, &dealloc);
5578
5579         return ret;
5580 }
5581
5582 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
5583                                          handle_t *handle,
5584                                          struct ocfs2_xattr_search *xs)
5585 {
5586         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5587         struct ocfs2_xattr_entry *last = &xh->xh_entries[
5588                                                 le16_to_cpu(xh->xh_count) - 1];
5589         int ret = 0;
5590
5591         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5592                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5593         if (ret) {
5594                 mlog_errno(ret);
5595                 return;
5596         }
5597
5598         /* Remove the old entry. */
5599         memmove(xs->here, xs->here + 1,
5600                 (void *)last - (void *)xs->here);
5601         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
5602         le16_add_cpu(&xh->xh_count, -1);
5603
5604         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5605 }
5606
5607 /*
5608  * Set the xattr name/value in the bucket specified in xs.
5609  *
5610  * As the new value in xi may be stored in the bucket or in an outside cluster,
5611  * we divide the whole process into 3 steps:
5612  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
5613  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
5614  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
5615  * 4. If the clusters for the new outside value can't be allocated, we need
5616  *    to free the xattr we allocated in set.
5617  */
5618 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
5619                                      struct ocfs2_xattr_info *xi,
5620                                      struct ocfs2_xattr_search *xs,
5621                                      struct ocfs2_xattr_set_ctxt *ctxt)
5622 {
5623         int ret;
5624         size_t value_len;
5625         char *val = (char *)xi->xi_value;
5626         struct ocfs2_xattr_entry *xe = xs->here;
5627         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->xi_name,
5628                                               xi->xi_name_len);
5629
5630         value_len = xi->xi_value_len;
5631         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
5632                 /*
5633                  * We need to truncate the xattr storage first.
5634                  *
5635                  * If both the old and new value are stored to
5636                  * outside block, we only need to truncate
5637                  * the storage and then set the value outside.
5638                  *
5639                  * If the new value should be stored within block,
5640                  * we should free all the outside block first and
5641                  * the modification to the xattr block will be done
5642                  * by following steps.
5643                  */
5644                 if (xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE)
5645                         value_len = 0;
5646
5647                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5648                                                            value_len,
5649                                                            ctxt);
5650                 if (ret)
5651                         goto out;
5652
5653                 if (value_len)
5654                         goto set_value_outside;
5655         }
5656
5657         /* So we have to handle the inside block change now. */
5658         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5659                                               name_hash);
5660         if (ret) {
5661                 mlog_errno(ret);
5662                 goto out;
5663         }
5664
5665         if (xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE)
5666                 goto out;
5667
5668         /* allocate the space now for the outside block storage. */
5669         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5670                                                    value_len, ctxt);
5671         if (ret) {
5672                 mlog_errno(ret);
5673
5674                 if (xs->not_found) {
5675                         /*
5676                          * We can't allocate enough clusters for outside
5677                          * storage and we have allocated xattr already,
5678                          * so need to remove it.
5679                          */
5680                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5681                 }
5682                 goto out;
5683         }
5684
5685 set_value_outside:
5686         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5687                                                    xs, val, value_len);
5688 out:
5689         return ret;
5690 }
5691
5692 /*
5693  * check whether the xattr bucket is filled up with the same hash value.
5694  * If we want to insert the xattr with the same hash, return -ENOSPC.
5695  * If we want to insert a xattr with different hash value, go ahead
5696  * and ocfs2_divide_xattr_bucket will handle this.
5697  */
5698 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5699                                               struct ocfs2_xattr_bucket *bucket,
5700                                               const char *name)
5701 {
5702         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5703         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5704
5705         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5706                 return 0;
5707
5708         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5709             xh->xh_entries[0].xe_name_hash) {
5710                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5711                      "hash = %u\n",
5712                      (unsigned long long)bucket_blkno(bucket),
5713                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5714                 return -ENOSPC;
5715         }
5716
5717         return 0;
5718 }
5719
5720 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5721                                              struct ocfs2_xattr_info *xi,
5722                                              struct ocfs2_xattr_search *xs,
5723                                              struct ocfs2_xattr_set_ctxt *ctxt)
5724 {
5725         struct ocfs2_xattr_header *xh;
5726         struct ocfs2_xattr_entry *xe;
5727         u16 count, header_size, xh_free_start;
5728         int free, max_free, need, old;
5729         size_t value_size = 0;
5730         size_t blocksize = inode->i_sb->s_blocksize;
5731         int ret, allocation = 0;
5732
5733         mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5734
5735 try_again:
5736         xh = xs->header;
5737         count = le16_to_cpu(xh->xh_count);
5738         xh_free_start = le16_to_cpu(xh->xh_free_start);
5739         header_size = sizeof(struct ocfs2_xattr_header) +
5740                         count * sizeof(struct ocfs2_xattr_entry);
5741         max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5742                 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5743
5744         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5745                         "of %u which exceed block size\n",
5746                         (unsigned long long)bucket_blkno(xs->bucket),
5747                         header_size);
5748
5749         if (xi->xi_value && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
5750                 value_size = OCFS2_XATTR_ROOT_SIZE;
5751         else if (xi->xi_value)
5752                 value_size = OCFS2_XATTR_SIZE(xi->xi_value_len);
5753
5754         if (xs->not_found)
5755                 need = sizeof(struct ocfs2_xattr_entry) +
5756                         OCFS2_XATTR_SIZE(xi->xi_name_len) + value_size;
5757         else {
5758                 need = value_size + OCFS2_XATTR_SIZE(xi->xi_name_len);
5759
5760                 /*
5761                  * We only replace the old value if the new length is smaller
5762                  * than the old one. Otherwise we will allocate new space in the
5763                  * bucket to store it.
5764                  */
5765                 xe = xs->here;
5766                 if (ocfs2_xattr_is_local(xe))
5767                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5768                 else
5769                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5770
5771                 if (old >= value_size)
5772                         need = 0;
5773         }
5774
5775         free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5776         /*
5777          * We need to make sure the new name/value pair
5778          * can exist in the same block.
5779          */
5780         if (xh_free_start % blocksize < need)
5781                 free -= xh_free_start % blocksize;
5782
5783         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5784              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5785              " %u\n", xs->not_found,
5786              (unsigned long long)bucket_blkno(xs->bucket),
5787              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5788              le16_to_cpu(xh->xh_name_value_len));
5789
5790         if (free < need ||
5791             (xs->not_found &&
5792              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5793                 if (need <= max_free &&
5794                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5795                         /*
5796                          * We can create the space by defragment. Since only the
5797                          * name/value will be moved, the xe shouldn't be changed
5798                          * in xs.
5799                          */
5800                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5801                                                         xs->bucket);
5802                         if (ret) {
5803                                 mlog_errno(ret);
5804                                 goto out;
5805                         }
5806
5807                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5808                         free = xh_free_start - header_size
5809                                 - OCFS2_XATTR_HEADER_GAP;
5810                         if (xh_free_start % blocksize < need)
5811                                 free -= xh_free_start % blocksize;
5812
5813                         if (free >= need)
5814                                 goto xattr_set;
5815
5816                         mlog(0, "Can't get enough space for xattr insert by "
5817                              "defragment. Need %u bytes, but we have %d, so "
5818                              "allocate new bucket for it.\n", need, free);
5819                 }
5820
5821                 /*
5822                  * We have to add new buckets or clusters and one
5823                  * allocation should leave us enough space for insert.
5824                  */
5825                 BUG_ON(allocation);
5826
5827                 /*
5828                  * We do not allow for overlapping ranges between buckets. And
5829                  * the maximum number of collisions we will allow for then is
5830                  * one bucket's worth, so check it here whether we need to
5831                  * add a new bucket for the insert.
5832                  */
5833                 ret = ocfs2_check_xattr_bucket_collision(inode,
5834                                                          xs->bucket,
5835                                                          xi->xi_name);
5836                 if (ret) {
5837                         mlog_errno(ret);
5838                         goto out;
5839                 }
5840
5841                 ret = ocfs2_add_new_xattr_bucket(inode,
5842                                                  xs->xattr_bh,
5843                                                  xs->bucket,
5844                                                  ctxt);
5845                 if (ret) {
5846                         mlog_errno(ret);
5847                         goto out;
5848                 }
5849
5850                 /*
5851                  * ocfs2_add_new_xattr_bucket() will have updated
5852                  * xs->bucket if it moved, but it will not have updated
5853                  * any of the other search fields.  Thus, we drop it and
5854                  * re-search.  Everything should be cached, so it'll be
5855                  * quick.
5856                  */
5857                 ocfs2_xattr_bucket_relse(xs->bucket);
5858                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5859                                                    xi->xi_name_index,
5860                                                    xi->xi_name, xs);
5861                 if (ret && ret != -ENODATA)
5862                         goto out;
5863                 xs->not_found = ret;
5864                 allocation = 1;
5865                 goto try_again;
5866         }
5867
5868 xattr_set:
5869         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5870 out:
5871         mlog_exit(ret);
5872         return ret;
5873 }
5874
5875 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5876                                         struct ocfs2_xattr_bucket *bucket,
5877                                         void *para)
5878 {
5879         int ret = 0, ref_credits;
5880         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5881         u16 i;
5882         struct ocfs2_xattr_entry *xe;
5883         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5884         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5885         int credits = ocfs2_remove_extent_credits(osb->sb) +
5886                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5887         struct ocfs2_xattr_value_root *xv;
5888         struct ocfs2_rm_xattr_bucket_para *args =
5889                         (struct ocfs2_rm_xattr_bucket_para *)para;
5890
5891         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5892
5893         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5894                 xe = &xh->xh_entries[i];
5895                 if (ocfs2_xattr_is_local(xe))
5896                         continue;
5897
5898                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5899                                                       i, &xv, NULL);
5900
5901                 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5902                                                          args->ref_ci,
5903                                                          args->ref_root_bh,
5904                                                          &ctxt.meta_ac,
5905                                                          &ref_credits);
5906
5907                 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5908                 if (IS_ERR(ctxt.handle)) {
5909                         ret = PTR_ERR(ctxt.handle);
5910                         mlog_errno(ret);
5911                         break;
5912                 }
5913
5914                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5915                                                         i, 0, &ctxt);
5916
5917                 ocfs2_commit_trans(osb, ctxt.handle);
5918                 if (ctxt.meta_ac) {
5919                         ocfs2_free_alloc_context(ctxt.meta_ac);
5920                         ctxt.meta_ac = NULL;
5921                 }
5922                 if (ret) {
5923                         mlog_errno(ret);
5924                         break;
5925                 }
5926         }
5927
5928         if (ctxt.meta_ac)
5929                 ocfs2_free_alloc_context(ctxt.meta_ac);
5930         ocfs2_schedule_truncate_log_flush(osb, 1);
5931         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5932         return ret;
5933 }
5934
5935 /*
5936  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5937  * or change the extent record flag), we need to recalculate
5938  * the metaecc for the whole bucket. So it is done here.
5939  *
5940  * Note:
5941  * We have to give the extra credits for the caller.
5942  */
5943 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5944                                             handle_t *handle,
5945                                             void *para)
5946 {
5947         int ret;
5948         struct ocfs2_xattr_bucket *bucket =
5949                         (struct ocfs2_xattr_bucket *)para;
5950
5951         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5952                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5953         if (ret) {
5954                 mlog_errno(ret);
5955                 return ret;
5956         }
5957
5958         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5959
5960         return 0;
5961 }
5962
5963 /*
5964  * Special action we need if the xattr value is refcounted.
5965  *
5966  * 1. If the xattr is refcounted, lock the tree.
5967  * 2. CoW the xattr if we are setting the new value and the value
5968  *    will be stored outside.
5969  * 3. In other case, decrease_refcount will work for us, so just
5970  *    lock the refcount tree, calculate the meta and credits is OK.
5971  *
5972  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5973  * currently CoW is a completed transaction, while this function
5974  * will also lock the allocators and let us deadlock. So we will
5975  * CoW the whole xattr value.
5976  */
5977 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5978                                         struct ocfs2_dinode *di,
5979                                         struct ocfs2_xattr_info *xi,
5980                                         struct ocfs2_xattr_search *xis,
5981                                         struct ocfs2_xattr_search *xbs,
5982                                         struct ocfs2_refcount_tree **ref_tree,
5983                                         int *meta_add,
5984                                         int *credits)
5985 {
5986         int ret = 0;
5987         struct ocfs2_xattr_block *xb;
5988         struct ocfs2_xattr_entry *xe;
5989         char *base;
5990         u32 p_cluster, num_clusters;
5991         unsigned int ext_flags;
5992         int name_offset, name_len;
5993         struct ocfs2_xattr_value_buf vb;
5994         struct ocfs2_xattr_bucket *bucket = NULL;
5995         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5996         struct ocfs2_post_refcount refcount;
5997         struct ocfs2_post_refcount *p = NULL;
5998         struct buffer_head *ref_root_bh = NULL;
5999
6000         if (!xis->not_found) {
6001                 xe = xis->here;
6002                 name_offset = le16_to_cpu(xe->xe_name_offset);
6003                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
6004                 base = xis->base;
6005                 vb.vb_bh = xis->inode_bh;
6006                 vb.vb_access = ocfs2_journal_access_di;
6007         } else {
6008                 int i, block_off = 0;
6009                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
6010                 xe = xbs->here;
6011                 name_offset = le16_to_cpu(xe->xe_name_offset);
6012                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
6013                 i = xbs->here - xbs->header->xh_entries;
6014
6015                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
6016                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
6017                                                         bucket_xh(xbs->bucket),
6018                                                         i, &block_off,
6019                                                         &name_offset);
6020                         if (ret) {
6021                                 mlog_errno(ret);
6022                                 goto out;
6023                         }
6024                         base = bucket_block(xbs->bucket, block_off);
6025                         vb.vb_bh = xbs->bucket->bu_bhs[block_off];
6026                         vb.vb_access = ocfs2_journal_access;
6027
6028                         if (ocfs2_meta_ecc(osb)) {
6029                                 /*create parameters for ocfs2_post_refcount. */
6030                                 bucket = xbs->bucket;
6031                                 refcount.credits = bucket->bu_blocks;
6032                                 refcount.para = bucket;
6033                                 refcount.func =
6034                                         ocfs2_xattr_bucket_post_refcount;
6035                                 p = &refcount;
6036                         }
6037                 } else {
6038                         base = xbs->base;
6039                         vb.vb_bh = xbs->xattr_bh;
6040                         vb.vb_access = ocfs2_journal_access_xb;
6041                 }
6042         }
6043
6044         if (ocfs2_xattr_is_local(xe))
6045                 goto out;
6046
6047         vb.vb_xv = (struct ocfs2_xattr_value_root *)
6048                                 (base + name_offset + name_len);
6049
6050         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
6051                                        &num_clusters, &vb.vb_xv->xr_list,
6052                                        &ext_flags);
6053         if (ret) {
6054                 mlog_errno(ret);
6055                 goto out;
6056         }
6057
6058         /*
6059          * We just need to check the 1st extent record, since we always
6060          * CoW the whole xattr. So there shouldn't be a xattr with
6061          * some REFCOUNT extent recs after the 1st one.
6062          */
6063         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
6064                 goto out;
6065
6066         ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
6067                                        1, ref_tree, &ref_root_bh);
6068         if (ret) {
6069                 mlog_errno(ret);
6070                 goto out;
6071         }
6072
6073         /*
6074          * If we are deleting the xattr or the new size will be stored inside,
6075          * cool, leave it there, the xattr truncate process will remove them
6076          * for us(it still needs the refcount tree lock and the meta, credits).
6077          * And the worse case is that every cluster truncate will split the
6078          * refcount tree, and make the original extent become 3. So we will need
6079          * 2 * cluster more extent recs at most.
6080          */
6081         if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
6082
6083                 ret = ocfs2_refcounted_xattr_delete_need(inode,
6084                                                          &(*ref_tree)->rf_ci,
6085                                                          ref_root_bh, vb.vb_xv,
6086                                                          meta_add, credits);
6087                 if (ret)
6088                         mlog_errno(ret);
6089                 goto out;
6090         }
6091
6092         ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
6093                                        *ref_tree, ref_root_bh, 0,
6094                                        le32_to_cpu(vb.vb_xv->xr_clusters), p);
6095         if (ret)
6096                 mlog_errno(ret);
6097
6098 out:
6099         brelse(ref_root_bh);
6100         return ret;
6101 }
6102
6103 /*
6104  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
6105  * The physical clusters will be added to refcount tree.
6106  */
6107 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
6108                                 struct ocfs2_xattr_value_root *xv,
6109                                 struct ocfs2_extent_tree *value_et,
6110                                 struct ocfs2_caching_info *ref_ci,
6111                                 struct buffer_head *ref_root_bh,
6112                                 struct ocfs2_cached_dealloc_ctxt *dealloc,
6113                                 struct ocfs2_post_refcount *refcount)
6114 {
6115         int ret = 0;
6116         u32 clusters = le32_to_cpu(xv->xr_clusters);
6117         u32 cpos, p_cluster, num_clusters;
6118         struct ocfs2_extent_list *el = &xv->xr_list;
6119         unsigned int ext_flags;
6120
6121         cpos = 0;
6122         while (cpos < clusters) {
6123                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
6124                                                &num_clusters, el, &ext_flags);
6125
6126                 cpos += num_clusters;
6127                 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
6128                         continue;
6129
6130                 BUG_ON(!p_cluster);
6131
6132                 ret = ocfs2_add_refcount_flag(inode, value_et,
6133                                               ref_ci, ref_root_bh,
6134                                               cpos - num_clusters,
6135                                               p_cluster, num_clusters,
6136                                               dealloc, refcount);
6137                 if (ret) {
6138                         mlog_errno(ret);
6139                         break;
6140                 }
6141         }
6142
6143         return ret;
6144 }
6145
6146 /*
6147  * Given a normal ocfs2_xattr_header, refcount all the entries which
6148  * have value stored outside.
6149  * Used for xattrs stored in inode and ocfs2_xattr_block.
6150  */
6151 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
6152                                 struct ocfs2_xattr_value_buf *vb,
6153                                 struct ocfs2_xattr_header *header,
6154                                 struct ocfs2_caching_info *ref_ci,
6155                                 struct buffer_head *ref_root_bh,
6156                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6157 {
6158
6159         struct ocfs2_xattr_entry *xe;
6160         struct ocfs2_xattr_value_root *xv;
6161         struct ocfs2_extent_tree et;
6162         int i, ret = 0;
6163
6164         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
6165                 xe = &header->xh_entries[i];
6166
6167                 if (ocfs2_xattr_is_local(xe))
6168                         continue;
6169
6170                 xv = (struct ocfs2_xattr_value_root *)((void *)header +
6171                         le16_to_cpu(xe->xe_name_offset) +
6172                         OCFS2_XATTR_SIZE(xe->xe_name_len));
6173
6174                 vb->vb_xv = xv;
6175                 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
6176
6177                 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
6178                                                         ref_ci, ref_root_bh,
6179                                                         dealloc, NULL);
6180                 if (ret) {
6181                         mlog_errno(ret);
6182                         break;
6183                 }
6184         }
6185
6186         return ret;
6187 }
6188
6189 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
6190                                 struct buffer_head *fe_bh,
6191                                 struct ocfs2_caching_info *ref_ci,
6192                                 struct buffer_head *ref_root_bh,
6193                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6194 {
6195         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6196         struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
6197                                 (fe_bh->b_data + inode->i_sb->s_blocksize -
6198                                 le16_to_cpu(di->i_xattr_inline_size));
6199         struct ocfs2_xattr_value_buf vb = {
6200                 .vb_bh = fe_bh,
6201                 .vb_access = ocfs2_journal_access_di,
6202         };
6203
6204         return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6205                                                   ref_ci, ref_root_bh, dealloc);
6206 }
6207
6208 struct ocfs2_xattr_tree_value_refcount_para {
6209         struct ocfs2_caching_info *ref_ci;
6210         struct buffer_head *ref_root_bh;
6211         struct ocfs2_cached_dealloc_ctxt *dealloc;
6212 };
6213
6214 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
6215                                            struct ocfs2_xattr_bucket *bucket,
6216                                            int offset,
6217                                            struct ocfs2_xattr_value_root **xv,
6218                                            struct buffer_head **bh)
6219 {
6220         int ret, block_off, name_offset;
6221         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
6222         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6223         void *base;
6224
6225         ret = ocfs2_xattr_bucket_get_name_value(sb,
6226                                                 bucket_xh(bucket),
6227                                                 offset,
6228                                                 &block_off,
6229                                                 &name_offset);
6230         if (ret) {
6231                 mlog_errno(ret);
6232                 goto out;
6233         }
6234
6235         base = bucket_block(bucket, block_off);
6236
6237         *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6238                          OCFS2_XATTR_SIZE(xe->xe_name_len));
6239
6240         if (bh)
6241                 *bh = bucket->bu_bhs[block_off];
6242 out:
6243         return ret;
6244 }
6245
6246 /*
6247  * For a given xattr bucket, refcount all the entries which
6248  * have value stored outside.
6249  */
6250 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6251                                              struct ocfs2_xattr_bucket *bucket,
6252                                              void *para)
6253 {
6254         int i, ret = 0;
6255         struct ocfs2_extent_tree et;
6256         struct ocfs2_xattr_tree_value_refcount_para *ref =
6257                         (struct ocfs2_xattr_tree_value_refcount_para *)para;
6258         struct ocfs2_xattr_header *xh =
6259                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6260         struct ocfs2_xattr_entry *xe;
6261         struct ocfs2_xattr_value_buf vb = {
6262                 .vb_access = ocfs2_journal_access,
6263         };
6264         struct ocfs2_post_refcount refcount = {
6265                 .credits = bucket->bu_blocks,
6266                 .para = bucket,
6267                 .func = ocfs2_xattr_bucket_post_refcount,
6268         };
6269         struct ocfs2_post_refcount *p = NULL;
6270
6271         /* We only need post_refcount if we support metaecc. */
6272         if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6273                 p = &refcount;
6274
6275         mlog(0, "refcount bucket %llu, count = %u\n",
6276              (unsigned long long)bucket_blkno(bucket),
6277              le16_to_cpu(xh->xh_count));
6278         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6279                 xe = &xh->xh_entries[i];
6280
6281                 if (ocfs2_xattr_is_local(xe))
6282                         continue;
6283
6284                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6285                                                       &vb.vb_xv, &vb.vb_bh);
6286                 if (ret) {
6287                         mlog_errno(ret);
6288                         break;
6289                 }
6290
6291                 ocfs2_init_xattr_value_extent_tree(&et,
6292                                                    INODE_CACHE(inode), &vb);
6293
6294                 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6295                                                         &et, ref->ref_ci,
6296                                                         ref->ref_root_bh,
6297                                                         ref->dealloc, p);
6298                 if (ret) {
6299                         mlog_errno(ret);
6300                         break;
6301                 }
6302         }
6303
6304         return ret;
6305
6306 }
6307
6308 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6309                                      struct buffer_head *root_bh,
6310                                      u64 blkno, u32 cpos, u32 len, void *para)
6311 {
6312         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6313                                            ocfs2_xattr_bucket_value_refcount,
6314                                            para);
6315 }
6316
6317 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6318                                 struct buffer_head *blk_bh,
6319                                 struct ocfs2_caching_info *ref_ci,
6320                                 struct buffer_head *ref_root_bh,
6321                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6322 {
6323         int ret = 0;
6324         struct ocfs2_xattr_block *xb =
6325                                 (struct ocfs2_xattr_block *)blk_bh->b_data;
6326
6327         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6328                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6329                 struct ocfs2_xattr_value_buf vb = {
6330                         .vb_bh = blk_bh,
6331                         .vb_access = ocfs2_journal_access_xb,
6332                 };
6333
6334                 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6335                                                          ref_ci, ref_root_bh,
6336                                                          dealloc);
6337         } else {
6338                 struct ocfs2_xattr_tree_value_refcount_para para = {
6339                         .ref_ci = ref_ci,
6340                         .ref_root_bh = ref_root_bh,
6341                         .dealloc = dealloc,
6342                 };
6343
6344                 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6345                                                 ocfs2_refcount_xattr_tree_rec,
6346                                                 &para);
6347         }
6348
6349         return ret;
6350 }
6351
6352 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6353                                      struct buffer_head *fe_bh,
6354                                      struct ocfs2_caching_info *ref_ci,
6355                                      struct buffer_head *ref_root_bh,
6356                                      struct ocfs2_cached_dealloc_ctxt *dealloc)
6357 {
6358         int ret = 0;
6359         struct ocfs2_inode_info *oi = OCFS2_I(inode);
6360         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6361         struct buffer_head *blk_bh = NULL;
6362
6363         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6364                 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6365                                                          ref_ci, ref_root_bh,
6366                                                          dealloc);
6367                 if (ret) {
6368                         mlog_errno(ret);
6369                         goto out;
6370                 }
6371         }
6372
6373         if (!di->i_xattr_loc)
6374                 goto out;
6375
6376         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6377                                      &blk_bh);
6378         if (ret < 0) {
6379                 mlog_errno(ret);
6380                 goto out;
6381         }
6382
6383         ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6384                                                 ref_root_bh, dealloc);
6385         if (ret)
6386                 mlog_errno(ret);
6387
6388         brelse(blk_bh);
6389 out:
6390
6391         return ret;
6392 }
6393
6394 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6395 /*
6396  * Store the information we need in xattr reflink.
6397  * old_bh and new_bh are inode bh for the old and new inode.
6398  */
6399 struct ocfs2_xattr_reflink {
6400         struct inode *old_inode;
6401         struct inode *new_inode;
6402         struct buffer_head *old_bh;
6403         struct buffer_head *new_bh;
6404         struct ocfs2_caching_info *ref_ci;
6405         struct buffer_head *ref_root_bh;
6406         struct ocfs2_cached_dealloc_ctxt *dealloc;
6407         should_xattr_reflinked *xattr_reflinked;
6408 };
6409
6410 /*
6411  * Given a xattr header and xe offset,
6412  * return the proper xv and the corresponding bh.
6413  * xattr in inode, block and xattr tree have different implementaions.
6414  */
6415 typedef int (get_xattr_value_root)(struct super_block *sb,
6416                                    struct buffer_head *bh,
6417                                    struct ocfs2_xattr_header *xh,
6418                                    int offset,
6419                                    struct ocfs2_xattr_value_root **xv,
6420                                    struct buffer_head **ret_bh,
6421                                    void *para);
6422
6423 /*
6424  * Calculate all the xattr value root metadata stored in this xattr header and
6425  * credits we need if we create them from the scratch.
6426  * We use get_xattr_value_root so that all types of xattr container can use it.
6427  */
6428 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6429                                              struct buffer_head *bh,
6430                                              struct ocfs2_xattr_header *xh,
6431                                              int *metas, int *credits,
6432                                              int *num_recs,
6433                                              get_xattr_value_root *func,
6434                                              void *para)
6435 {
6436         int i, ret = 0;
6437         struct ocfs2_xattr_value_root *xv;
6438         struct ocfs2_xattr_entry *xe;
6439
6440         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6441                 xe = &xh->xh_entries[i];
6442                 if (ocfs2_xattr_is_local(xe))
6443                         continue;
6444
6445                 ret = func(sb, bh, xh, i, &xv, NULL, para);
6446                 if (ret) {
6447                         mlog_errno(ret);
6448                         break;
6449                 }
6450
6451                 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6452                           le16_to_cpu(xv->xr_list.l_next_free_rec);
6453
6454                 *credits += ocfs2_calc_extend_credits(sb,
6455                                                 &def_xv.xv.xr_list,
6456                                                 le32_to_cpu(xv->xr_clusters));
6457
6458                 /*
6459                  * If the value is a tree with depth > 1, We don't go deep
6460                  * to the extent block, so just calculate a maximum record num.
6461                  */
6462                 if (!xv->xr_list.l_tree_depth)
6463                         *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6464                 else
6465                         *num_recs += ocfs2_clusters_for_bytes(sb,
6466                                                               XATTR_SIZE_MAX);
6467         }
6468
6469         return ret;
6470 }
6471
6472 /* Used by xattr inode and block to return the right xv and buffer_head. */
6473 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6474                                       struct buffer_head *bh,
6475                                       struct ocfs2_xattr_header *xh,
6476                                       int offset,
6477                                       struct ocfs2_xattr_value_root **xv,
6478                                       struct buffer_head **ret_bh,
6479                                       void *para)
6480 {
6481         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6482
6483         *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6484                 le16_to_cpu(xe->xe_name_offset) +
6485                 OCFS2_XATTR_SIZE(xe->xe_name_len));
6486
6487         if (ret_bh)
6488                 *ret_bh = bh;
6489
6490         return 0;
6491 }
6492
6493 /*
6494  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6495  * It is only used for inline xattr and xattr block.
6496  */
6497 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6498                                         struct ocfs2_xattr_header *xh,
6499                                         struct buffer_head *ref_root_bh,
6500                                         int *credits,
6501                                         struct ocfs2_alloc_context **meta_ac)
6502 {
6503         int ret, meta_add = 0, num_recs = 0;
6504         struct ocfs2_refcount_block *rb =
6505                         (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6506
6507         *credits = 0;
6508
6509         ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6510                                                 &meta_add, credits, &num_recs,
6511                                                 ocfs2_get_xattr_value_root,
6512                                                 NULL);
6513         if (ret) {
6514                 mlog_errno(ret);
6515                 goto out;
6516         }
6517
6518         /*
6519          * We need to add/modify num_recs in refcount tree, so just calculate
6520          * an approximate number we need for refcount tree change.
6521          * Sometimes we need to split the tree, and after split,  half recs
6522          * will be moved to the new block, and a new block can only provide
6523          * half number of recs. So we multiple new blocks by 2.
6524          */
6525         num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6526         meta_add += num_recs;
6527         *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6528         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6529                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6530                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6531         else
6532                 *credits += 1;
6533
6534         ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6535         if (ret)
6536                 mlog_errno(ret);
6537
6538 out:
6539         return ret;
6540 }
6541
6542 /*
6543  * Given a xattr header, reflink all the xattrs in this container.
6544  * It can be used for inode, block and bucket.
6545  *
6546  * NOTE:
6547  * Before we call this function, the caller has memcpy the xattr in
6548  * old_xh to the new_xh.
6549  *
6550  * If args.xattr_reflinked is set, call it to decide whether the xe should
6551  * be reflinked or not. If not, remove it from the new xattr header.
6552  */
6553 static int ocfs2_reflink_xattr_header(handle_t *handle,
6554                                       struct ocfs2_xattr_reflink *args,
6555                                       struct buffer_head *old_bh,
6556                                       struct ocfs2_xattr_header *xh,
6557                                       struct buffer_head *new_bh,
6558                                       struct ocfs2_xattr_header *new_xh,
6559                                       struct ocfs2_xattr_value_buf *vb,
6560                                       struct ocfs2_alloc_context *meta_ac,
6561                                       get_xattr_value_root *func,
6562                                       void *para)
6563 {
6564         int ret = 0, i, j;
6565         struct super_block *sb = args->old_inode->i_sb;
6566         struct buffer_head *value_bh;
6567         struct ocfs2_xattr_entry *xe, *last;
6568         struct ocfs2_xattr_value_root *xv, *new_xv;
6569         struct ocfs2_extent_tree data_et;
6570         u32 clusters, cpos, p_cluster, num_clusters;
6571         unsigned int ext_flags = 0;
6572
6573         mlog(0, "reflink xattr in container %llu, count = %u\n",
6574              (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6575
6576         last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6577         for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6578                 xe = &xh->xh_entries[i];
6579
6580                 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6581                         xe = &new_xh->xh_entries[j];
6582
6583                         le16_add_cpu(&new_xh->xh_count, -1);
6584                         if (new_xh->xh_count) {
6585                                 memmove(xe, xe + 1,
6586                                         (void *)last - (void *)xe);
6587                                 memset(last, 0,
6588                                        sizeof(struct ocfs2_xattr_entry));
6589                         }
6590
6591                         /*
6592                          * We don't want j to increase in the next round since
6593                          * it is already moved ahead.
6594                          */
6595                         j--;
6596                         continue;
6597                 }
6598
6599                 if (ocfs2_xattr_is_local(xe))
6600                         continue;
6601
6602                 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6603                 if (ret) {
6604                         mlog_errno(ret);
6605                         break;
6606                 }
6607
6608                 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6609                 if (ret) {
6610                         mlog_errno(ret);
6611                         break;
6612                 }
6613
6614                 /*
6615                  * For the xattr which has l_tree_depth = 0, all the extent
6616                  * recs have already be copied to the new xh with the
6617                  * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6618                  * increase the refount count int the refcount tree.
6619                  *
6620                  * For the xattr which has l_tree_depth > 0, we need
6621                  * to initialize it to the empty default value root,
6622                  * and then insert the extents one by one.
6623                  */
6624                 if (xv->xr_list.l_tree_depth) {
6625                         memcpy(new_xv, &def_xv, sizeof(def_xv));
6626                         vb->vb_xv = new_xv;
6627                         vb->vb_bh = value_bh;
6628                         ocfs2_init_xattr_value_extent_tree(&data_et,
6629                                         INODE_CACHE(args->new_inode), vb);
6630                 }
6631
6632                 clusters = le32_to_cpu(xv->xr_clusters);
6633                 cpos = 0;
6634                 while (cpos < clusters) {
6635                         ret = ocfs2_xattr_get_clusters(args->old_inode,
6636                                                        cpos,
6637                                                        &p_cluster,
6638                                                        &num_clusters,
6639                                                        &xv->xr_list,
6640                                                        &ext_flags);
6641                         if (ret) {
6642                                 mlog_errno(ret);
6643                                 goto out;
6644                         }
6645
6646                         BUG_ON(!p_cluster);
6647
6648                         if (xv->xr_list.l_tree_depth) {
6649                                 ret = ocfs2_insert_extent(handle,
6650                                                 &data_et, cpos,
6651                                                 ocfs2_clusters_to_blocks(
6652                                                         args->old_inode->i_sb,
6653                                                         p_cluster),
6654                                                 num_clusters, ext_flags,
6655                                                 meta_ac);
6656                                 if (ret) {
6657                                         mlog_errno(ret);
6658                                         goto out;
6659                                 }
6660                         }
6661
6662                         ret = ocfs2_increase_refcount(handle, args->ref_ci,
6663                                                       args->ref_root_bh,
6664                                                       p_cluster, num_clusters,
6665                                                       meta_ac, args->dealloc);
6666                         if (ret) {
6667                                 mlog_errno(ret);
6668                                 goto out;
6669                         }
6670
6671                         cpos += num_clusters;
6672                 }
6673         }
6674
6675 out:
6676         return ret;
6677 }
6678
6679 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6680 {
6681         int ret = 0, credits = 0;
6682         handle_t *handle;
6683         struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6684         struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6685         int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6686         int header_off = osb->sb->s_blocksize - inline_size;
6687         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6688                                         (args->old_bh->b_data + header_off);
6689         struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6690                                         (args->new_bh->b_data + header_off);
6691         struct ocfs2_alloc_context *meta_ac = NULL;
6692         struct ocfs2_inode_info *new_oi;
6693         struct ocfs2_dinode *new_di;
6694         struct ocfs2_xattr_value_buf vb = {
6695                 .vb_bh = args->new_bh,
6696                 .vb_access = ocfs2_journal_access_di,
6697         };
6698
6699         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6700                                                   &credits, &meta_ac);
6701         if (ret) {
6702                 mlog_errno(ret);
6703                 goto out;
6704         }
6705
6706         handle = ocfs2_start_trans(osb, credits);
6707         if (IS_ERR(handle)) {
6708                 ret = PTR_ERR(handle);
6709                 mlog_errno(ret);
6710                 goto out;
6711         }
6712
6713         ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6714                                       args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6715         if (ret) {
6716                 mlog_errno(ret);
6717                 goto out_commit;
6718         }
6719
6720         memcpy(args->new_bh->b_data + header_off,
6721                args->old_bh->b_data + header_off, inline_size);
6722
6723         new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6724         new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6725
6726         ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6727                                          args->new_bh, new_xh, &vb, meta_ac,
6728                                          ocfs2_get_xattr_value_root, NULL);
6729         if (ret) {
6730                 mlog_errno(ret);
6731                 goto out_commit;
6732         }
6733
6734         new_oi = OCFS2_I(args->new_inode);
6735         spin_lock(&new_oi->ip_lock);
6736         new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6737         new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6738         spin_unlock(&new_oi->ip_lock);
6739
6740         ocfs2_journal_dirty(handle, args->new_bh);
6741
6742 out_commit:
6743         ocfs2_commit_trans(osb, handle);
6744
6745 out:
6746         if (meta_ac)
6747                 ocfs2_free_alloc_context(meta_ac);
6748         return ret;
6749 }
6750
6751 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6752                                           struct buffer_head *fe_bh,
6753                                           struct buffer_head **ret_bh,
6754                                           int indexed)
6755 {
6756         int ret;
6757         handle_t *handle;
6758         struct ocfs2_alloc_context *meta_ac;
6759         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6760
6761         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6762         if (ret < 0) {
6763                 mlog_errno(ret);
6764                 return ret;
6765         }
6766
6767         handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6768         if (IS_ERR(handle)) {
6769                 ret = PTR_ERR(handle);
6770                 mlog_errno(ret);
6771                 goto out;
6772         }
6773
6774         mlog(0, "create new xattr block for inode %llu, index = %d\n",
6775              (unsigned long long)fe_bh->b_blocknr, indexed);
6776         ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6777                                        meta_ac, ret_bh, indexed);
6778         if (ret)
6779                 mlog_errno(ret);
6780
6781         ocfs2_commit_trans(osb, handle);
6782 out:
6783         ocfs2_free_alloc_context(meta_ac);
6784         return ret;
6785 }
6786
6787 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6788                                      struct buffer_head *blk_bh,
6789                                      struct buffer_head *new_blk_bh)
6790 {
6791         int ret = 0, credits = 0;
6792         handle_t *handle;
6793         struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6794         struct ocfs2_dinode *new_di;
6795         struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6796         int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6797         struct ocfs2_xattr_block *xb =
6798                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6799         struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6800         struct ocfs2_xattr_block *new_xb =
6801                         (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6802         struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6803         struct ocfs2_alloc_context *meta_ac;
6804         struct ocfs2_xattr_value_buf vb = {
6805                 .vb_bh = new_blk_bh,
6806                 .vb_access = ocfs2_journal_access_xb,
6807         };
6808
6809         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6810                                                   &credits, &meta_ac);
6811         if (ret) {
6812                 mlog_errno(ret);
6813                 return ret;
6814         }
6815
6816         /* One more credits in case we need to add xattr flags in new inode. */
6817         handle = ocfs2_start_trans(osb, credits + 1);
6818         if (IS_ERR(handle)) {
6819                 ret = PTR_ERR(handle);
6820                 mlog_errno(ret);
6821                 goto out;
6822         }
6823
6824         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6825                 ret = ocfs2_journal_access_di(handle,
6826                                               INODE_CACHE(args->new_inode),
6827                                               args->new_bh,
6828                                               OCFS2_JOURNAL_ACCESS_WRITE);
6829                 if (ret) {
6830                         mlog_errno(ret);
6831                         goto out_commit;
6832                 }
6833         }
6834
6835         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6836                                       new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6837         if (ret) {
6838                 mlog_errno(ret);
6839                 goto out_commit;
6840         }
6841
6842         memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6843                osb->sb->s_blocksize - header_off);
6844
6845         ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6846                                          new_blk_bh, new_xh, &vb, meta_ac,
6847                                          ocfs2_get_xattr_value_root, NULL);
6848         if (ret) {
6849                 mlog_errno(ret);
6850                 goto out_commit;
6851         }
6852
6853         ocfs2_journal_dirty(handle, new_blk_bh);
6854
6855         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6856                 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6857                 spin_lock(&new_oi->ip_lock);
6858                 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6859                 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6860                 spin_unlock(&new_oi->ip_lock);
6861
6862                 ocfs2_journal_dirty(handle, args->new_bh);
6863         }
6864
6865 out_commit:
6866         ocfs2_commit_trans(osb, handle);
6867
6868 out:
6869         ocfs2_free_alloc_context(meta_ac);
6870         return ret;
6871 }
6872
6873 struct ocfs2_reflink_xattr_tree_args {
6874         struct ocfs2_xattr_reflink *reflink;
6875         struct buffer_head *old_blk_bh;
6876         struct buffer_head *new_blk_bh;
6877         struct ocfs2_xattr_bucket *old_bucket;
6878         struct ocfs2_xattr_bucket *new_bucket;
6879 };
6880
6881 /*
6882  * NOTE:
6883  * We have to handle the case that both old bucket and new bucket
6884  * will call this function to get the right ret_bh.
6885  * So The caller must give us the right bh.
6886  */
6887 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6888                                         struct buffer_head *bh,
6889                                         struct ocfs2_xattr_header *xh,
6890                                         int offset,
6891                                         struct ocfs2_xattr_value_root **xv,
6892                                         struct buffer_head **ret_bh,
6893                                         void *para)
6894 {
6895         struct ocfs2_reflink_xattr_tree_args *args =
6896                         (struct ocfs2_reflink_xattr_tree_args *)para;
6897         struct ocfs2_xattr_bucket *bucket;
6898
6899         if (bh == args->old_bucket->bu_bhs[0])
6900                 bucket = args->old_bucket;
6901         else
6902                 bucket = args->new_bucket;
6903
6904         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6905                                                xv, ret_bh);
6906 }
6907
6908 struct ocfs2_value_tree_metas {
6909         int num_metas;
6910         int credits;
6911         int num_recs;
6912 };
6913
6914 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6915                                         struct buffer_head *bh,
6916                                         struct ocfs2_xattr_header *xh,
6917                                         int offset,
6918                                         struct ocfs2_xattr_value_root **xv,
6919                                         struct buffer_head **ret_bh,
6920                                         void *para)
6921 {
6922         struct ocfs2_xattr_bucket *bucket =
6923                                 (struct ocfs2_xattr_bucket *)para;
6924
6925         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6926                                                xv, ret_bh);
6927 }
6928
6929 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6930                                       struct ocfs2_xattr_bucket *bucket,
6931                                       void *para)
6932 {
6933         struct ocfs2_value_tree_metas *metas =
6934                         (struct ocfs2_value_tree_metas *)para;
6935         struct ocfs2_xattr_header *xh =
6936                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6937
6938         /* Add the credits for this bucket first. */
6939         metas->credits += bucket->bu_blocks;
6940         return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6941                                         xh, &metas->num_metas,
6942                                         &metas->credits, &metas->num_recs,
6943                                         ocfs2_value_tree_metas_in_bucket,
6944                                         bucket);
6945 }
6946
6947 /*
6948  * Given a xattr extent rec starting from blkno and having len clusters,
6949  * iterate all the buckets calculate how much metadata we need for reflinking
6950  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6951  */
6952 static int ocfs2_lock_reflink_xattr_rec_allocators(
6953                                 struct ocfs2_reflink_xattr_tree_args *args,
6954                                 struct ocfs2_extent_tree *xt_et,
6955                                 u64 blkno, u32 len, int *credits,
6956                                 struct ocfs2_alloc_context **meta_ac,
6957                                 struct ocfs2_alloc_context **data_ac)
6958 {
6959         int ret, num_free_extents;
6960         struct ocfs2_value_tree_metas metas;
6961         struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6962         struct ocfs2_refcount_block *rb;
6963
6964         memset(&metas, 0, sizeof(metas));
6965
6966         ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6967                                           ocfs2_calc_value_tree_metas, &metas);
6968         if (ret) {
6969                 mlog_errno(ret);
6970                 goto out;
6971         }
6972
6973         *credits = metas.credits;
6974
6975         /*
6976          * Calculate we need for refcount tree change.
6977          *
6978          * We need to add/modify num_recs in refcount tree, so just calculate
6979          * an approximate number we need for refcount tree change.
6980          * Sometimes we need to split the tree, and after split,  half recs
6981          * will be moved to the new block, and a new block can only provide
6982          * half number of recs. So we multiple new blocks by 2.
6983          * In the end, we have to add credits for modifying the already
6984          * existed refcount block.
6985          */
6986         rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6987         metas.num_recs =
6988                 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6989                  ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6990         metas.num_metas += metas.num_recs;
6991         *credits += metas.num_recs +
6992                     metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6993         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6994                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6995                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6996         else
6997                 *credits += 1;
6998
6999         /* count in the xattr tree change. */
7000         num_free_extents = ocfs2_num_free_extents(osb, xt_et);
7001         if (num_free_extents < 0) {
7002                 ret = num_free_extents;
7003                 mlog_errno(ret);
7004                 goto out;
7005         }
7006
7007         if (num_free_extents < len)
7008                 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
7009
7010         *credits += ocfs2_calc_extend_credits(osb->sb,
7011                                               xt_et->et_root_el, len);
7012
7013         if (metas.num_metas) {
7014                 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
7015                                                         meta_ac);
7016                 if (ret) {
7017                         mlog_errno(ret);
7018                         goto out;
7019                 }
7020         }
7021
7022         if (len) {
7023                 ret = ocfs2_reserve_clusters(osb, len, data_ac);
7024                 if (ret)
7025                         mlog_errno(ret);
7026         }
7027 out:
7028         if (ret) {
7029                 if (*meta_ac) {
7030                         ocfs2_free_alloc_context(*meta_ac);
7031                         meta_ac = NULL;
7032                 }
7033         }
7034
7035         return ret;
7036 }
7037
7038 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
7039                                 u64 blkno, u64 new_blkno, u32 clusters,
7040                                 struct ocfs2_alloc_context *meta_ac,
7041                                 struct ocfs2_alloc_context *data_ac,
7042                                 struct ocfs2_reflink_xattr_tree_args *args)
7043 {
7044         int i, j, ret = 0;
7045         struct super_block *sb = args->reflink->old_inode->i_sb;
7046         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
7047         u32 num_buckets = clusters * bpc;
7048         int bpb = args->old_bucket->bu_blocks;
7049         struct ocfs2_xattr_value_buf vb = {
7050                 .vb_access = ocfs2_journal_access,
7051         };
7052
7053         for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
7054                 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
7055                 if (ret) {
7056                         mlog_errno(ret);
7057                         break;
7058                 }
7059
7060                 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
7061                 if (ret) {
7062                         mlog_errno(ret);
7063                         break;
7064                 }
7065
7066                 /*
7067                  * The real bucket num in this series of blocks is stored
7068                  * in the 1st bucket.
7069                  */
7070                 if (i == 0)
7071                         num_buckets = le16_to_cpu(
7072                                 bucket_xh(args->old_bucket)->xh_num_buckets);
7073
7074                 ret = ocfs2_xattr_bucket_journal_access(handle,
7075                                                 args->new_bucket,
7076                                                 OCFS2_JOURNAL_ACCESS_CREATE);
7077                 if (ret) {
7078                         mlog_errno(ret);
7079                         break;
7080                 }
7081
7082                 for (j = 0; j < bpb; j++)
7083                         memcpy(bucket_block(args->new_bucket, j),
7084                                bucket_block(args->old_bucket, j),
7085                                sb->s_blocksize);
7086
7087                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
7088
7089                 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
7090                                         args->old_bucket->bu_bhs[0],
7091                                         bucket_xh(args->old_bucket),
7092                                         args->new_bucket->bu_bhs[0],
7093                                         bucket_xh(args->new_bucket),
7094                                         &vb, meta_ac,
7095                                         ocfs2_get_reflink_xattr_value_root,
7096                                         args);
7097                 if (ret) {
7098                         mlog_errno(ret);
7099                         break;
7100                 }
7101
7102                 /*
7103                  * Re-access and dirty the bucket to calculate metaecc.
7104                  * Because we may extend the transaction in reflink_xattr_header
7105                  * which will let the already accessed block gone.
7106                  */
7107                 ret = ocfs2_xattr_bucket_journal_access(handle,
7108                                                 args->new_bucket,
7109                                                 OCFS2_JOURNAL_ACCESS_WRITE);
7110                 if (ret) {
7111                         mlog_errno(ret);
7112                         break;
7113                 }
7114
7115                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
7116                 ocfs2_xattr_bucket_relse(args->old_bucket);
7117                 ocfs2_xattr_bucket_relse(args->new_bucket);
7118         }
7119
7120         ocfs2_xattr_bucket_relse(args->old_bucket);
7121         ocfs2_xattr_bucket_relse(args->new_bucket);
7122         return ret;
7123 }
7124 /*
7125  * Create the same xattr extent record in the new inode's xattr tree.
7126  */
7127 static int ocfs2_reflink_xattr_rec(struct inode *inode,
7128                                    struct buffer_head *root_bh,
7129                                    u64 blkno,
7130                                    u32 cpos,
7131                                    u32 len,
7132                                    void *para)
7133 {
7134         int ret, credits = 0;
7135         u32 p_cluster, num_clusters;
7136         u64 new_blkno;
7137         handle_t *handle;
7138         struct ocfs2_reflink_xattr_tree_args *args =
7139                         (struct ocfs2_reflink_xattr_tree_args *)para;
7140         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7141         struct ocfs2_alloc_context *meta_ac = NULL;
7142         struct ocfs2_alloc_context *data_ac = NULL;
7143         struct ocfs2_extent_tree et;
7144
7145         ocfs2_init_xattr_tree_extent_tree(&et,
7146                                           INODE_CACHE(args->reflink->new_inode),
7147                                           args->new_blk_bh);
7148
7149         ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7150                                                       len, &credits,
7151                                                       &meta_ac, &data_ac);
7152         if (ret) {
7153                 mlog_errno(ret);
7154                 goto out;
7155         }
7156
7157         handle = ocfs2_start_trans(osb, credits);
7158         if (IS_ERR(handle)) {
7159                 ret = PTR_ERR(handle);
7160                 mlog_errno(ret);
7161                 goto out;
7162         }
7163
7164         ret = ocfs2_claim_clusters(osb, handle, data_ac,
7165                                    len, &p_cluster, &num_clusters);
7166         if (ret) {
7167                 mlog_errno(ret);
7168                 goto out_commit;
7169         }
7170
7171         new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
7172
7173         mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
7174              (unsigned long long)blkno, (unsigned long long)new_blkno, len);
7175         ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
7176                                           meta_ac, data_ac, args);
7177         if (ret) {
7178                 mlog_errno(ret);
7179                 goto out_commit;
7180         }
7181
7182         mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
7183              (unsigned long long)new_blkno, len, cpos);
7184         ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
7185                                   len, 0, meta_ac);
7186         if (ret)
7187                 mlog_errno(ret);
7188
7189 out_commit:
7190         ocfs2_commit_trans(osb, handle);
7191
7192 out:
7193         if (meta_ac)
7194                 ocfs2_free_alloc_context(meta_ac);
7195         if (data_ac)
7196                 ocfs2_free_alloc_context(data_ac);
7197         return ret;
7198 }
7199
7200 /*
7201  * Create reflinked xattr buckets.
7202  * We will add bucket one by one, and refcount all the xattrs in the bucket
7203  * if they are stored outside.
7204  */
7205 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7206                                     struct buffer_head *blk_bh,
7207                                     struct buffer_head *new_blk_bh)
7208 {
7209         int ret;
7210         struct ocfs2_reflink_xattr_tree_args para;
7211
7212         memset(&para, 0, sizeof(para));
7213         para.reflink = args;
7214         para.old_blk_bh = blk_bh;
7215         para.new_blk_bh = new_blk_bh;
7216
7217         para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7218         if (!para.old_bucket) {
7219                 mlog_errno(-ENOMEM);
7220                 return -ENOMEM;
7221         }
7222
7223         para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7224         if (!para.new_bucket) {
7225                 ret = -ENOMEM;
7226                 mlog_errno(ret);
7227                 goto out;
7228         }
7229
7230         ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7231                                               ocfs2_reflink_xattr_rec,
7232                                               &para);
7233         if (ret)
7234                 mlog_errno(ret);
7235
7236 out:
7237         ocfs2_xattr_bucket_free(para.old_bucket);
7238         ocfs2_xattr_bucket_free(para.new_bucket);
7239         return ret;
7240 }
7241
7242 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7243                                         struct buffer_head *blk_bh)
7244 {
7245         int ret, indexed = 0;
7246         struct buffer_head *new_blk_bh = NULL;
7247         struct ocfs2_xattr_block *xb =
7248                         (struct ocfs2_xattr_block *)blk_bh->b_data;
7249
7250
7251         if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7252                 indexed = 1;
7253
7254         ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7255                                              &new_blk_bh, indexed);
7256         if (ret) {
7257                 mlog_errno(ret);
7258                 goto out;
7259         }
7260
7261         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
7262                 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7263         else
7264                 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7265         if (ret)
7266                 mlog_errno(ret);
7267
7268 out:
7269         brelse(new_blk_bh);
7270         return ret;
7271 }
7272
7273 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7274 {
7275         int type = ocfs2_xattr_get_type(xe);
7276
7277         return type != OCFS2_XATTR_INDEX_SECURITY &&
7278                type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7279                type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7280 }
7281
7282 int ocfs2_reflink_xattrs(struct inode *old_inode,
7283                          struct buffer_head *old_bh,
7284                          struct inode *new_inode,
7285                          struct buffer_head *new_bh,
7286                          bool preserve_security)
7287 {
7288         int ret;
7289         struct ocfs2_xattr_reflink args;
7290         struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7291         struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7292         struct buffer_head *blk_bh = NULL;
7293         struct ocfs2_cached_dealloc_ctxt dealloc;
7294         struct ocfs2_refcount_tree *ref_tree;
7295         struct buffer_head *ref_root_bh = NULL;
7296
7297         ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7298                                        le64_to_cpu(di->i_refcount_loc),
7299                                        1, &ref_tree, &ref_root_bh);
7300         if (ret) {
7301                 mlog_errno(ret);
7302                 goto out;
7303         }
7304
7305         ocfs2_init_dealloc_ctxt(&dealloc);
7306
7307         args.old_inode = old_inode;
7308         args.new_inode = new_inode;
7309         args.old_bh = old_bh;
7310         args.new_bh = new_bh;
7311         args.ref_ci = &ref_tree->rf_ci;
7312         args.ref_root_bh = ref_root_bh;
7313         args.dealloc = &dealloc;
7314         if (preserve_security)
7315                 args.xattr_reflinked = NULL;
7316         else
7317                 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7318
7319         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7320                 ret = ocfs2_reflink_xattr_inline(&args);
7321                 if (ret) {
7322                         mlog_errno(ret);
7323                         goto out_unlock;
7324                 }
7325         }
7326
7327         if (!di->i_xattr_loc)
7328                 goto out_unlock;
7329
7330         ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7331                                      &blk_bh);
7332         if (ret < 0) {
7333                 mlog_errno(ret);
7334                 goto out_unlock;
7335         }
7336
7337         ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7338         if (ret)
7339                 mlog_errno(ret);
7340
7341         brelse(blk_bh);
7342
7343 out_unlock:
7344         ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7345                                    ref_tree, 1);
7346         brelse(ref_root_bh);
7347
7348         if (ocfs2_dealloc_has_cluster(&dealloc)) {
7349                 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7350                 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7351         }
7352
7353 out:
7354         return ret;
7355 }
7356
7357 /*
7358  * Initialize security and acl for a already created inode.
7359  * Used for reflink a non-preserve-security file.
7360  *
7361  * It uses common api like ocfs2_xattr_set, so the caller
7362  * must not hold any lock expect i_mutex.
7363  */
7364 int ocfs2_init_security_and_acl(struct inode *dir,
7365                                 struct inode *inode)
7366 {
7367         int ret = 0;
7368         struct buffer_head *dir_bh = NULL;
7369         struct ocfs2_security_xattr_info si = {
7370                 .enable = 1,
7371         };
7372
7373         ret = ocfs2_init_security_get(inode, dir, &si);
7374         if (!ret) {
7375                 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7376                                       si.name, si.value, si.value_len,
7377                                       XATTR_CREATE);
7378                 if (ret) {
7379                         mlog_errno(ret);
7380                         goto leave;
7381                 }
7382         } else if (ret != -EOPNOTSUPP) {
7383                 mlog_errno(ret);
7384                 goto leave;
7385         }
7386
7387         ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7388         if (ret) {
7389                 mlog_errno(ret);
7390                 goto leave;
7391         }
7392
7393         ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7394         if (ret)
7395                 mlog_errno(ret);
7396
7397         ocfs2_inode_unlock(dir, 0);
7398         brelse(dir_bh);
7399 leave:
7400         return ret;
7401 }
7402 /*
7403  * 'security' attributes support
7404  */
7405 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7406                                         size_t list_size, const char *name,
7407                                         size_t name_len, int type)
7408 {
7409         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7410         const size_t total_len = prefix_len + name_len + 1;
7411
7412         if (list && total_len <= list_size) {
7413                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7414                 memcpy(list + prefix_len, name, name_len);
7415                 list[prefix_len + name_len] = '\0';
7416         }
7417         return total_len;
7418 }
7419
7420 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7421                                     void *buffer, size_t size, int type)
7422 {
7423         if (strcmp(name, "") == 0)
7424                 return -EINVAL;
7425         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7426                                name, buffer, size);
7427 }
7428
7429 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7430                 const void *value, size_t size, int flags, int type)
7431 {
7432         if (strcmp(name, "") == 0)
7433                 return -EINVAL;
7434
7435         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7436                                name, value, size, flags);
7437 }
7438
7439 int ocfs2_init_security_get(struct inode *inode,
7440                             struct inode *dir,
7441                             struct ocfs2_security_xattr_info *si)
7442 {
7443         /* check whether ocfs2 support feature xattr */
7444         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7445                 return -EOPNOTSUPP;
7446         return security_inode_init_security(inode, dir, &si->name, &si->value,
7447                                             &si->value_len);
7448 }
7449
7450 int ocfs2_init_security_set(handle_t *handle,
7451                             struct inode *inode,
7452                             struct buffer_head *di_bh,
7453                             struct ocfs2_security_xattr_info *si,
7454                             struct ocfs2_alloc_context *xattr_ac,
7455                             struct ocfs2_alloc_context *data_ac)
7456 {
7457         return ocfs2_xattr_set_handle(handle, inode, di_bh,
7458                                      OCFS2_XATTR_INDEX_SECURITY,
7459                                      si->name, si->value, si->value_len, 0,
7460                                      xattr_ac, data_ac);
7461 }
7462
7463 struct xattr_handler ocfs2_xattr_security_handler = {
7464         .prefix = XATTR_SECURITY_PREFIX,
7465         .list   = ocfs2_xattr_security_list,
7466         .get    = ocfs2_xattr_security_get,
7467         .set    = ocfs2_xattr_security_set,
7468 };
7469
7470 /*
7471  * 'trusted' attributes support
7472  */
7473 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7474                                        size_t list_size, const char *name,
7475                                        size_t name_len, int type)
7476 {
7477         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7478         const size_t total_len = prefix_len + name_len + 1;
7479
7480         if (list && total_len <= list_size) {
7481                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7482                 memcpy(list + prefix_len, name, name_len);
7483                 list[prefix_len + name_len] = '\0';
7484         }
7485         return total_len;
7486 }
7487
7488 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7489                 void *buffer, size_t size, int type)
7490 {
7491         if (strcmp(name, "") == 0)
7492                 return -EINVAL;
7493         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7494                                name, buffer, size);
7495 }
7496
7497 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7498                 const void *value, size_t size, int flags, int type)
7499 {
7500         if (strcmp(name, "") == 0)
7501                 return -EINVAL;
7502
7503         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7504                                name, value, size, flags);
7505 }
7506
7507 struct xattr_handler ocfs2_xattr_trusted_handler = {
7508         .prefix = XATTR_TRUSTED_PREFIX,
7509         .list   = ocfs2_xattr_trusted_list,
7510         .get    = ocfs2_xattr_trusted_get,
7511         .set    = ocfs2_xattr_trusted_set,
7512 };
7513
7514 /*
7515  * 'user' attributes support
7516  */
7517 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7518                                     size_t list_size, const char *name,
7519                                     size_t name_len, int type)
7520 {
7521         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7522         const size_t total_len = prefix_len + name_len + 1;
7523         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7524
7525         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7526                 return 0;
7527
7528         if (list && total_len <= list_size) {
7529                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7530                 memcpy(list + prefix_len, name, name_len);
7531                 list[prefix_len + name_len] = '\0';
7532         }
7533         return total_len;
7534 }
7535
7536 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7537                 void *buffer, size_t size, int type)
7538 {
7539         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7540
7541         if (strcmp(name, "") == 0)
7542                 return -EINVAL;
7543         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7544                 return -EOPNOTSUPP;
7545         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7546                                buffer, size);
7547 }
7548
7549 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7550                 const void *value, size_t size, int flags, int type)
7551 {
7552         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7553
7554         if (strcmp(name, "") == 0)
7555                 return -EINVAL;
7556         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7557                 return -EOPNOTSUPP;
7558
7559         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7560                                name, value, size, flags);
7561 }
7562
7563 struct xattr_handler ocfs2_xattr_user_handler = {
7564         .prefix = XATTR_USER_PREFIX,
7565         .list   = ocfs2_xattr_user_list,
7566         .get    = ocfs2_xattr_user_get,
7567         .set    = ocfs2_xattr_user_set,
7568 };