ocfs2: add ocfs2_init_acl in mknod
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "dlmglue.h"
46 #include "file.h"
47 #include "symlink.h"
48 #include "sysfile.h"
49 #include "inode.h"
50 #include "journal.h"
51 #include "ocfs2_fs.h"
52 #include "suballoc.h"
53 #include "uptodate.h"
54 #include "buffer_head_io.h"
55 #include "super.h"
56 #include "xattr.h"
57
58
59 struct ocfs2_xattr_def_value_root {
60         struct ocfs2_xattr_value_root   xv;
61         struct ocfs2_extent_rec         er;
62 };
63
64 struct ocfs2_xattr_bucket {
65         /* The inode these xattrs are associated with */
66         struct inode *bu_inode;
67
68         /* The actual buffers that make up the bucket */
69         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
70
71         /* How many blocks make up one bucket for this filesystem */
72         int bu_blocks;
73 };
74
75 struct ocfs2_xattr_set_ctxt {
76         handle_t *handle;
77         struct ocfs2_alloc_context *meta_ac;
78         struct ocfs2_alloc_context *data_ac;
79         struct ocfs2_cached_dealloc_ctxt dealloc;
80 };
81
82 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
83 #define OCFS2_XATTR_INLINE_SIZE 80
84 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
85                                          - sizeof(struct ocfs2_xattr_header) \
86                                          - sizeof(__u32))
87 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
88                                          - sizeof(struct ocfs2_xattr_block) \
89                                          - sizeof(struct ocfs2_xattr_header) \
90                                          - sizeof(__u32))
91
92 static struct ocfs2_xattr_def_value_root def_xv = {
93         .xv.xr_list.l_count = cpu_to_le16(1),
94 };
95
96 struct xattr_handler *ocfs2_xattr_handlers[] = {
97         &ocfs2_xattr_user_handler,
98 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
99         &ocfs2_xattr_acl_access_handler,
100         &ocfs2_xattr_acl_default_handler,
101 #endif
102         &ocfs2_xattr_trusted_handler,
103         &ocfs2_xattr_security_handler,
104         NULL
105 };
106
107 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
108         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
109 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114 #endif
115         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
116         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
117 };
118
119 struct ocfs2_xattr_info {
120         int name_index;
121         const char *name;
122         const void *value;
123         size_t value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
142                                              struct ocfs2_xattr_header *xh,
143                                              int index,
144                                              int *block_off,
145                                              int *new_offset);
146
147 static int ocfs2_xattr_block_find(struct inode *inode,
148                                   int name_index,
149                                   const char *name,
150                                   struct ocfs2_xattr_search *xs);
151 static int ocfs2_xattr_index_block_find(struct inode *inode,
152                                         struct buffer_head *root_bh,
153                                         int name_index,
154                                         const char *name,
155                                         struct ocfs2_xattr_search *xs);
156
157 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
158                                         struct ocfs2_xattr_tree_root *xt,
159                                         char *buffer,
160                                         size_t buffer_size);
161
162 static int ocfs2_xattr_create_index_block(struct inode *inode,
163                                           struct ocfs2_xattr_search *xs,
164                                           struct ocfs2_xattr_set_ctxt *ctxt);
165
166 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
167                                              struct ocfs2_xattr_info *xi,
168                                              struct ocfs2_xattr_search *xs,
169                                              struct ocfs2_xattr_set_ctxt *ctxt);
170
171 static int ocfs2_delete_xattr_index_block(struct inode *inode,
172                                           struct buffer_head *xb_bh);
173
174 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
175 {
176         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
177 }
178
179 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
180 {
181         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
182 }
183
184 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
185 {
186         u16 len = sb->s_blocksize -
187                  offsetof(struct ocfs2_xattr_header, xh_entries);
188
189         return len / sizeof(struct ocfs2_xattr_entry);
190 }
191
192 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
193 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
194 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
195
196 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
197 {
198         struct ocfs2_xattr_bucket *bucket;
199         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
200
201         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
202
203         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
204         if (bucket) {
205                 bucket->bu_inode = inode;
206                 bucket->bu_blocks = blks;
207         }
208
209         return bucket;
210 }
211
212 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
213 {
214         int i;
215
216         for (i = 0; i < bucket->bu_blocks; i++) {
217                 brelse(bucket->bu_bhs[i]);
218                 bucket->bu_bhs[i] = NULL;
219         }
220 }
221
222 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
223 {
224         if (bucket) {
225                 ocfs2_xattr_bucket_relse(bucket);
226                 bucket->bu_inode = NULL;
227                 kfree(bucket);
228         }
229 }
230
231 /*
232  * A bucket that has never been written to disk doesn't need to be
233  * read.  We just need the buffer_heads.  Don't call this for
234  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
235  * them fully.
236  */
237 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
238                                    u64 xb_blkno)
239 {
240         int i, rc = 0;
241
242         for (i = 0; i < bucket->bu_blocks; i++) {
243                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
244                                               xb_blkno + i);
245                 if (!bucket->bu_bhs[i]) {
246                         rc = -EIO;
247                         mlog_errno(rc);
248                         break;
249                 }
250
251                 if (!ocfs2_buffer_uptodate(bucket->bu_inode,
252                                            bucket->bu_bhs[i]))
253                         ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
254                                                       bucket->bu_bhs[i]);
255         }
256
257         if (rc)
258                 ocfs2_xattr_bucket_relse(bucket);
259         return rc;
260 }
261
262 /* Read the xattr bucket at xb_blkno */
263 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
264                                    u64 xb_blkno)
265 {
266         int rc;
267
268         rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
269                                bucket->bu_blocks, bucket->bu_bhs, 0);
270         if (rc)
271                 ocfs2_xattr_bucket_relse(bucket);
272         return rc;
273 }
274
275 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
276                                              struct ocfs2_xattr_bucket *bucket,
277                                              int type)
278 {
279         int i, rc = 0;
280
281         for (i = 0; i < bucket->bu_blocks; i++) {
282                 rc = ocfs2_journal_access(handle, bucket->bu_inode,
283                                           bucket->bu_bhs[i], type);
284                 if (rc) {
285                         mlog_errno(rc);
286                         break;
287                 }
288         }
289
290         return rc;
291 }
292
293 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
294                                              struct ocfs2_xattr_bucket *bucket)
295 {
296         int i;
297
298         for (i = 0; i < bucket->bu_blocks; i++)
299                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
300 }
301
302 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
303                                          struct ocfs2_xattr_bucket *src)
304 {
305         int i;
306         int blocksize = src->bu_inode->i_sb->s_blocksize;
307
308         BUG_ON(dest->bu_blocks != src->bu_blocks);
309         BUG_ON(dest->bu_inode != src->bu_inode);
310
311         for (i = 0; i < src->bu_blocks; i++) {
312                 memcpy(bucket_block(dest, i), bucket_block(src, i),
313                        blocksize);
314         }
315 }
316
317 static inline const char *ocfs2_xattr_prefix(int name_index)
318 {
319         struct xattr_handler *handler = NULL;
320
321         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
322                 handler = ocfs2_xattr_handler_map[name_index];
323
324         return handler ? handler->prefix : NULL;
325 }
326
327 static u32 ocfs2_xattr_name_hash(struct inode *inode,
328                                  const char *name,
329                                  int name_len)
330 {
331         /* Get hash value of uuid from super block */
332         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
333         int i;
334
335         /* hash extended attribute name */
336         for (i = 0; i < name_len; i++) {
337                 hash = (hash << OCFS2_HASH_SHIFT) ^
338                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
339                        *name++;
340         }
341
342         return hash;
343 }
344
345 /*
346  * ocfs2_xattr_hash_entry()
347  *
348  * Compute the hash of an extended attribute.
349  */
350 static void ocfs2_xattr_hash_entry(struct inode *inode,
351                                    struct ocfs2_xattr_header *header,
352                                    struct ocfs2_xattr_entry *entry)
353 {
354         u32 hash = 0;
355         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
356
357         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
358         entry->xe_name_hash = cpu_to_le32(hash);
359
360         return;
361 }
362
363 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
364 {
365         int size = 0;
366
367         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
368                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
369         else
370                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
371         size += sizeof(struct ocfs2_xattr_entry);
372
373         return size;
374 }
375
376 int ocfs2_calc_security_init(struct inode *dir,
377                              struct ocfs2_security_xattr_info *si,
378                              int *want_clusters,
379                              int *xattr_credits,
380                              struct ocfs2_alloc_context **xattr_ac)
381 {
382         int ret = 0;
383         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
384         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
385                                                  si->value_len);
386
387         /*
388          * The max space of security xattr taken inline is
389          * 256(name) + 80(value) + 16(entry) = 352 bytes,
390          * So reserve one metadata block for it is ok.
391          */
392         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
393             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
394                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
395                 if (ret) {
396                         mlog_errno(ret);
397                         return ret;
398                 }
399                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
400         }
401
402         /* reserve clusters for xattr value which will be set in B tree*/
403         if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
404                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
405                                                            si->value_len);
406         return ret;
407 }
408
409 int ocfs2_calc_xattr_init(struct inode *dir,
410                           struct buffer_head *dir_bh,
411                           int mode,
412                           struct ocfs2_security_xattr_info *si,
413                           int *want_clusters,
414                           int *xattr_credits,
415                           struct ocfs2_alloc_context **xattr_ac)
416 {
417         int ret = 0;
418         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
419         int s_size = 0;
420         int a_size = 0;
421         int acl_len = 0;
422
423         if (si->enable)
424                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
425                                                      si->value_len);
426
427         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
428                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
429                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
430                                         "", NULL, 0);
431                 if (acl_len > 0) {
432                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
433                         if (S_ISDIR(mode))
434                                 a_size <<= 1;
435                 } else if (acl_len != 0 && acl_len != -ENODATA) {
436                         mlog_errno(ret);
437                         return ret;
438                 }
439         }
440
441         if (!(s_size + a_size))
442                 return ret;
443
444         /*
445          * The max space of security xattr taken inline is
446          * 256(name) + 80(value) + 16(entry) = 352 bytes,
447          * The max space of acl xattr taken inline is
448          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
449          * when blocksize = 512, may reserve one more cluser for
450          * xattr bucket, otherwise reserve one metadata block
451          * for them is ok.
452          */
453         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
454             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
455                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
456                 if (ret) {
457                         mlog_errno(ret);
458                         return ret;
459                 }
460                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
461         }
462
463         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
464             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
465                 *want_clusters += 1;
466                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
467         }
468
469         /* reserve clusters for xattr value which will be set in B tree*/
470         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE)
471                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
472                                                            si->value_len);
473         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
474             acl_len > OCFS2_XATTR_INLINE_SIZE) {
475                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
476                 if (S_ISDIR(mode))
477                         *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
478                                                                    acl_len);
479         }
480
481         return ret;
482 }
483
484 static int ocfs2_xattr_extend_allocation(struct inode *inode,
485                                          u32 clusters_to_add,
486                                          struct buffer_head *xattr_bh,
487                                          struct ocfs2_xattr_value_root *xv,
488                                          struct ocfs2_xattr_set_ctxt *ctxt)
489 {
490         int status = 0;
491         handle_t *handle = ctxt->handle;
492         enum ocfs2_alloc_restarted why;
493         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
494         u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
495         struct ocfs2_extent_tree et;
496
497         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
498
499         ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
500
501         status = ocfs2_journal_access(handle, inode, xattr_bh,
502                                       OCFS2_JOURNAL_ACCESS_WRITE);
503         if (status < 0) {
504                 mlog_errno(status);
505                 goto leave;
506         }
507
508         prev_clusters = le32_to_cpu(xv->xr_clusters);
509         status = ocfs2_add_clusters_in_btree(osb,
510                                              inode,
511                                              &logical_start,
512                                              clusters_to_add,
513                                              0,
514                                              &et,
515                                              handle,
516                                              ctxt->data_ac,
517                                              ctxt->meta_ac,
518                                              &why);
519         if (status < 0) {
520                 mlog_errno(status);
521                 goto leave;
522         }
523
524         status = ocfs2_journal_dirty(handle, xattr_bh);
525         if (status < 0) {
526                 mlog_errno(status);
527                 goto leave;
528         }
529
530         clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
531
532         /*
533          * We should have already allocated enough space before the transaction,
534          * so no need to restart.
535          */
536         BUG_ON(why != RESTART_NONE || clusters_to_add);
537
538 leave:
539
540         return status;
541 }
542
543 static int __ocfs2_remove_xattr_range(struct inode *inode,
544                                       struct buffer_head *root_bh,
545                                       struct ocfs2_xattr_value_root *xv,
546                                       u32 cpos, u32 phys_cpos, u32 len,
547                                       struct ocfs2_xattr_set_ctxt *ctxt)
548 {
549         int ret;
550         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
551         handle_t *handle = ctxt->handle;
552         struct ocfs2_extent_tree et;
553
554         ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
555
556         ret = ocfs2_journal_access(handle, inode, root_bh,
557                                    OCFS2_JOURNAL_ACCESS_WRITE);
558         if (ret) {
559                 mlog_errno(ret);
560                 goto out;
561         }
562
563         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
564                                   &ctxt->dealloc);
565         if (ret) {
566                 mlog_errno(ret);
567                 goto out;
568         }
569
570         le32_add_cpu(&xv->xr_clusters, -len);
571
572         ret = ocfs2_journal_dirty(handle, root_bh);
573         if (ret) {
574                 mlog_errno(ret);
575                 goto out;
576         }
577
578         ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
579         if (ret)
580                 mlog_errno(ret);
581
582 out:
583         return ret;
584 }
585
586 static int ocfs2_xattr_shrink_size(struct inode *inode,
587                                    u32 old_clusters,
588                                    u32 new_clusters,
589                                    struct buffer_head *root_bh,
590                                    struct ocfs2_xattr_value_root *xv,
591                                    struct ocfs2_xattr_set_ctxt *ctxt)
592 {
593         int ret = 0;
594         u32 trunc_len, cpos, phys_cpos, alloc_size;
595         u64 block;
596
597         if (old_clusters <= new_clusters)
598                 return 0;
599
600         cpos = new_clusters;
601         trunc_len = old_clusters - new_clusters;
602         while (trunc_len) {
603                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
604                                                &alloc_size, &xv->xr_list);
605                 if (ret) {
606                         mlog_errno(ret);
607                         goto out;
608                 }
609
610                 if (alloc_size > trunc_len)
611                         alloc_size = trunc_len;
612
613                 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
614                                                  phys_cpos, alloc_size,
615                                                  ctxt);
616                 if (ret) {
617                         mlog_errno(ret);
618                         goto out;
619                 }
620
621                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
622                 ocfs2_remove_xattr_clusters_from_cache(inode, block,
623                                                        alloc_size);
624                 cpos += alloc_size;
625                 trunc_len -= alloc_size;
626         }
627
628 out:
629         return ret;
630 }
631
632 static int ocfs2_xattr_value_truncate(struct inode *inode,
633                                       struct buffer_head *root_bh,
634                                       struct ocfs2_xattr_value_root *xv,
635                                       int len,
636                                       struct ocfs2_xattr_set_ctxt *ctxt)
637 {
638         int ret;
639         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
640         u32 old_clusters = le32_to_cpu(xv->xr_clusters);
641
642         if (new_clusters == old_clusters)
643                 return 0;
644
645         if (new_clusters > old_clusters)
646                 ret = ocfs2_xattr_extend_allocation(inode,
647                                                     new_clusters - old_clusters,
648                                                     root_bh, xv, ctxt);
649         else
650                 ret = ocfs2_xattr_shrink_size(inode,
651                                               old_clusters, new_clusters,
652                                               root_bh, xv, ctxt);
653
654         return ret;
655 }
656
657 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
658                                   size_t *result, const char *prefix,
659                                   const char *name, int name_len)
660 {
661         char *p = buffer + *result;
662         int prefix_len = strlen(prefix);
663         int total_len = prefix_len + name_len + 1;
664
665         *result += total_len;
666
667         /* we are just looking for how big our buffer needs to be */
668         if (!size)
669                 return 0;
670
671         if (*result > size)
672                 return -ERANGE;
673
674         memcpy(p, prefix, prefix_len);
675         memcpy(p + prefix_len, name, name_len);
676         p[prefix_len + name_len] = '\0';
677
678         return 0;
679 }
680
681 static int ocfs2_xattr_list_entries(struct inode *inode,
682                                     struct ocfs2_xattr_header *header,
683                                     char *buffer, size_t buffer_size)
684 {
685         size_t result = 0;
686         int i, type, ret;
687         const char *prefix, *name;
688
689         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
690                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
691                 type = ocfs2_xattr_get_type(entry);
692                 prefix = ocfs2_xattr_prefix(type);
693
694                 if (prefix) {
695                         name = (const char *)header +
696                                 le16_to_cpu(entry->xe_name_offset);
697
698                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
699                                                      &result, prefix, name,
700                                                      entry->xe_name_len);
701                         if (ret)
702                                 return ret;
703                 }
704         }
705
706         return result;
707 }
708
709 static int ocfs2_xattr_ibody_list(struct inode *inode,
710                                   struct ocfs2_dinode *di,
711                                   char *buffer,
712                                   size_t buffer_size)
713 {
714         struct ocfs2_xattr_header *header = NULL;
715         struct ocfs2_inode_info *oi = OCFS2_I(inode);
716         int ret = 0;
717
718         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
719                 return ret;
720
721         header = (struct ocfs2_xattr_header *)
722                  ((void *)di + inode->i_sb->s_blocksize -
723                  le16_to_cpu(di->i_xattr_inline_size));
724
725         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
726
727         return ret;
728 }
729
730 static int ocfs2_xattr_block_list(struct inode *inode,
731                                   struct ocfs2_dinode *di,
732                                   char *buffer,
733                                   size_t buffer_size)
734 {
735         struct buffer_head *blk_bh = NULL;
736         struct ocfs2_xattr_block *xb;
737         int ret = 0;
738
739         if (!di->i_xattr_loc)
740                 return ret;
741
742         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
743         if (ret < 0) {
744                 mlog_errno(ret);
745                 return ret;
746         }
747
748         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
749         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
750                 ret = -EIO;
751                 goto cleanup;
752         }
753
754         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
755                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
756                 ret = ocfs2_xattr_list_entries(inode, header,
757                                                buffer, buffer_size);
758         } else {
759                 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
760                 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
761                                                    buffer, buffer_size);
762         }
763 cleanup:
764         brelse(blk_bh);
765
766         return ret;
767 }
768
769 ssize_t ocfs2_listxattr(struct dentry *dentry,
770                         char *buffer,
771                         size_t size)
772 {
773         int ret = 0, i_ret = 0, b_ret = 0;
774         struct buffer_head *di_bh = NULL;
775         struct ocfs2_dinode *di = NULL;
776         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
777
778         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
779                 return -EOPNOTSUPP;
780
781         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
782                 return ret;
783
784         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
785         if (ret < 0) {
786                 mlog_errno(ret);
787                 return ret;
788         }
789
790         di = (struct ocfs2_dinode *)di_bh->b_data;
791
792         down_read(&oi->ip_xattr_sem);
793         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
794         if (i_ret < 0)
795                 b_ret = 0;
796         else {
797                 if (buffer) {
798                         buffer += i_ret;
799                         size -= i_ret;
800                 }
801                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
802                                                buffer, size);
803                 if (b_ret < 0)
804                         i_ret = 0;
805         }
806         up_read(&oi->ip_xattr_sem);
807         ocfs2_inode_unlock(dentry->d_inode, 0);
808
809         brelse(di_bh);
810
811         return i_ret + b_ret;
812 }
813
814 static int ocfs2_xattr_find_entry(int name_index,
815                                   const char *name,
816                                   struct ocfs2_xattr_search *xs)
817 {
818         struct ocfs2_xattr_entry *entry;
819         size_t name_len;
820         int i, cmp = 1;
821
822         if (name == NULL)
823                 return -EINVAL;
824
825         name_len = strlen(name);
826         entry = xs->here;
827         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
828                 cmp = name_index - ocfs2_xattr_get_type(entry);
829                 if (!cmp)
830                         cmp = name_len - entry->xe_name_len;
831                 if (!cmp)
832                         cmp = memcmp(name, (xs->base +
833                                      le16_to_cpu(entry->xe_name_offset)),
834                                      name_len);
835                 if (cmp == 0)
836                         break;
837                 entry += 1;
838         }
839         xs->here = entry;
840
841         return cmp ? -ENODATA : 0;
842 }
843
844 static int ocfs2_xattr_get_value_outside(struct inode *inode,
845                                          struct ocfs2_xattr_value_root *xv,
846                                          void *buffer,
847                                          size_t len)
848 {
849         u32 cpos, p_cluster, num_clusters, bpc, clusters;
850         u64 blkno;
851         int i, ret = 0;
852         size_t cplen, blocksize;
853         struct buffer_head *bh = NULL;
854         struct ocfs2_extent_list *el;
855
856         el = &xv->xr_list;
857         clusters = le32_to_cpu(xv->xr_clusters);
858         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
859         blocksize = inode->i_sb->s_blocksize;
860
861         cpos = 0;
862         while (cpos < clusters) {
863                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
864                                                &num_clusters, el);
865                 if (ret) {
866                         mlog_errno(ret);
867                         goto out;
868                 }
869
870                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
871                 /* Copy ocfs2_xattr_value */
872                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
873                         ret = ocfs2_read_block(inode, blkno, &bh);
874                         if (ret) {
875                                 mlog_errno(ret);
876                                 goto out;
877                         }
878
879                         cplen = len >= blocksize ? blocksize : len;
880                         memcpy(buffer, bh->b_data, cplen);
881                         len -= cplen;
882                         buffer += cplen;
883
884                         brelse(bh);
885                         bh = NULL;
886                         if (len == 0)
887                                 break;
888                 }
889                 cpos += num_clusters;
890         }
891 out:
892         return ret;
893 }
894
895 static int ocfs2_xattr_ibody_get(struct inode *inode,
896                                  int name_index,
897                                  const char *name,
898                                  void *buffer,
899                                  size_t buffer_size,
900                                  struct ocfs2_xattr_search *xs)
901 {
902         struct ocfs2_inode_info *oi = OCFS2_I(inode);
903         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
904         struct ocfs2_xattr_value_root *xv;
905         size_t size;
906         int ret = 0;
907
908         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
909                 return -ENODATA;
910
911         xs->end = (void *)di + inode->i_sb->s_blocksize;
912         xs->header = (struct ocfs2_xattr_header *)
913                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
914         xs->base = (void *)xs->header;
915         xs->here = xs->header->xh_entries;
916
917         ret = ocfs2_xattr_find_entry(name_index, name, xs);
918         if (ret)
919                 return ret;
920         size = le64_to_cpu(xs->here->xe_value_size);
921         if (buffer) {
922                 if (size > buffer_size)
923                         return -ERANGE;
924                 if (ocfs2_xattr_is_local(xs->here)) {
925                         memcpy(buffer, (void *)xs->base +
926                                le16_to_cpu(xs->here->xe_name_offset) +
927                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
928                 } else {
929                         xv = (struct ocfs2_xattr_value_root *)
930                                 (xs->base + le16_to_cpu(
931                                  xs->here->xe_name_offset) +
932                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
933                         ret = ocfs2_xattr_get_value_outside(inode, xv,
934                                                             buffer, size);
935                         if (ret < 0) {
936                                 mlog_errno(ret);
937                                 return ret;
938                         }
939                 }
940         }
941
942         return size;
943 }
944
945 static int ocfs2_xattr_block_get(struct inode *inode,
946                                  int name_index,
947                                  const char *name,
948                                  void *buffer,
949                                  size_t buffer_size,
950                                  struct ocfs2_xattr_search *xs)
951 {
952         struct ocfs2_xattr_block *xb;
953         struct ocfs2_xattr_value_root *xv;
954         size_t size;
955         int ret = -ENODATA, name_offset, name_len, block_off, i;
956
957         xs->bucket = ocfs2_xattr_bucket_new(inode);
958         if (!xs->bucket) {
959                 ret = -ENOMEM;
960                 mlog_errno(ret);
961                 goto cleanup;
962         }
963
964         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
965         if (ret) {
966                 mlog_errno(ret);
967                 goto cleanup;
968         }
969
970         if (xs->not_found) {
971                 ret = -ENODATA;
972                 goto cleanup;
973         }
974
975         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
976         size = le64_to_cpu(xs->here->xe_value_size);
977         if (buffer) {
978                 ret = -ERANGE;
979                 if (size > buffer_size)
980                         goto cleanup;
981
982                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
983                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
984                 i = xs->here - xs->header->xh_entries;
985
986                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
987                         ret = ocfs2_xattr_bucket_get_name_value(inode,
988                                                                 bucket_xh(xs->bucket),
989                                                                 i,
990                                                                 &block_off,
991                                                                 &name_offset);
992                         xs->base = bucket_block(xs->bucket, block_off);
993                 }
994                 if (ocfs2_xattr_is_local(xs->here)) {
995                         memcpy(buffer, (void *)xs->base +
996                                name_offset + name_len, size);
997                 } else {
998                         xv = (struct ocfs2_xattr_value_root *)
999                                 (xs->base + name_offset + name_len);
1000                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1001                                                             buffer, size);
1002                         if (ret < 0) {
1003                                 mlog_errno(ret);
1004                                 goto cleanup;
1005                         }
1006                 }
1007         }
1008         ret = size;
1009 cleanup:
1010         ocfs2_xattr_bucket_free(xs->bucket);
1011
1012         brelse(xs->xattr_bh);
1013         xs->xattr_bh = NULL;
1014         return ret;
1015 }
1016
1017 int ocfs2_xattr_get_nolock(struct inode *inode,
1018                            struct buffer_head *di_bh,
1019                            int name_index,
1020                            const char *name,
1021                            void *buffer,
1022                            size_t buffer_size)
1023 {
1024         int ret;
1025         struct ocfs2_dinode *di = NULL;
1026         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1027         struct ocfs2_xattr_search xis = {
1028                 .not_found = -ENODATA,
1029         };
1030         struct ocfs2_xattr_search xbs = {
1031                 .not_found = -ENODATA,
1032         };
1033
1034         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1035                 return -EOPNOTSUPP;
1036
1037         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1038                 ret = -ENODATA;
1039
1040         xis.inode_bh = xbs.inode_bh = di_bh;
1041         di = (struct ocfs2_dinode *)di_bh->b_data;
1042
1043         down_read(&oi->ip_xattr_sem);
1044         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1045                                     buffer_size, &xis);
1046         if (ret == -ENODATA && di->i_xattr_loc)
1047                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1048                                             buffer_size, &xbs);
1049         up_read(&oi->ip_xattr_sem);
1050
1051         return ret;
1052 }
1053
1054 /* ocfs2_xattr_get()
1055  *
1056  * Copy an extended attribute into the buffer provided.
1057  * Buffer is NULL to compute the size of buffer required.
1058  */
1059 static int ocfs2_xattr_get(struct inode *inode,
1060                            int name_index,
1061                            const char *name,
1062                            void *buffer,
1063                            size_t buffer_size)
1064 {
1065         int ret;
1066         struct buffer_head *di_bh = NULL;
1067
1068         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1069         if (ret < 0) {
1070                 mlog_errno(ret);
1071                 return ret;
1072         }
1073         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1074                                      name, buffer, buffer_size);
1075
1076         ocfs2_inode_unlock(inode, 0);
1077
1078         brelse(di_bh);
1079
1080         return ret;
1081 }
1082
1083 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1084                                            handle_t *handle,
1085                                            struct ocfs2_xattr_value_root *xv,
1086                                            const void *value,
1087                                            int value_len)
1088 {
1089         int ret = 0, i, cp_len, credits;
1090         u16 blocksize = inode->i_sb->s_blocksize;
1091         u32 p_cluster, num_clusters;
1092         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1093         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1094         u64 blkno;
1095         struct buffer_head *bh = NULL;
1096
1097         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1098
1099         /*
1100          * In __ocfs2_xattr_set_value_outside has already been dirtied,
1101          * so we don't need to worry about whether ocfs2_extend_trans
1102          * will create a new transactio for us or not.
1103          */
1104         credits = clusters * bpc;
1105         ret = ocfs2_extend_trans(handle, credits);
1106         if (ret) {
1107                 mlog_errno(ret);
1108                 goto out;
1109         }
1110
1111         while (cpos < clusters) {
1112                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1113                                                &num_clusters, &xv->xr_list);
1114                 if (ret) {
1115                         mlog_errno(ret);
1116                         goto out;
1117                 }
1118
1119                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1120
1121                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1122                         ret = ocfs2_read_block(inode, blkno, &bh);
1123                         if (ret) {
1124                                 mlog_errno(ret);
1125                                 goto out;
1126                         }
1127
1128                         ret = ocfs2_journal_access(handle,
1129                                                    inode,
1130                                                    bh,
1131                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1132                         if (ret < 0) {
1133                                 mlog_errno(ret);
1134                                 goto out;
1135                         }
1136
1137                         cp_len = value_len > blocksize ? blocksize : value_len;
1138                         memcpy(bh->b_data, value, cp_len);
1139                         value_len -= cp_len;
1140                         value += cp_len;
1141                         if (cp_len < blocksize)
1142                                 memset(bh->b_data + cp_len, 0,
1143                                        blocksize - cp_len);
1144
1145                         ret = ocfs2_journal_dirty(handle, bh);
1146                         if (ret < 0) {
1147                                 mlog_errno(ret);
1148                                 goto out;
1149                         }
1150                         brelse(bh);
1151                         bh = NULL;
1152
1153                         /*
1154                          * XXX: do we need to empty all the following
1155                          * blocks in this cluster?
1156                          */
1157                         if (!value_len)
1158                                 break;
1159                 }
1160                 cpos += num_clusters;
1161         }
1162 out:
1163         brelse(bh);
1164
1165         return ret;
1166 }
1167
1168 static int ocfs2_xattr_cleanup(struct inode *inode,
1169                                handle_t *handle,
1170                                struct ocfs2_xattr_info *xi,
1171                                struct ocfs2_xattr_search *xs,
1172                                size_t offs)
1173 {
1174         int ret = 0;
1175         size_t name_len = strlen(xi->name);
1176         void *val = xs->base + offs;
1177         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1178
1179         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1180                                    OCFS2_JOURNAL_ACCESS_WRITE);
1181         if (ret) {
1182                 mlog_errno(ret);
1183                 goto out;
1184         }
1185         /* Decrease xattr count */
1186         le16_add_cpu(&xs->header->xh_count, -1);
1187         /* Remove the xattr entry and tree root which has already be set*/
1188         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1189         memset(val, 0, size);
1190
1191         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1192         if (ret < 0)
1193                 mlog_errno(ret);
1194 out:
1195         return ret;
1196 }
1197
1198 static int ocfs2_xattr_update_entry(struct inode *inode,
1199                                     handle_t *handle,
1200                                     struct ocfs2_xattr_info *xi,
1201                                     struct ocfs2_xattr_search *xs,
1202                                     size_t offs)
1203 {
1204         int ret;
1205
1206         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1207                                    OCFS2_JOURNAL_ACCESS_WRITE);
1208         if (ret) {
1209                 mlog_errno(ret);
1210                 goto out;
1211         }
1212
1213         xs->here->xe_name_offset = cpu_to_le16(offs);
1214         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1215         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1216                 ocfs2_xattr_set_local(xs->here, 1);
1217         else
1218                 ocfs2_xattr_set_local(xs->here, 0);
1219         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1220
1221         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1222         if (ret < 0)
1223                 mlog_errno(ret);
1224 out:
1225         return ret;
1226 }
1227
1228 /*
1229  * ocfs2_xattr_set_value_outside()
1230  *
1231  * Set large size value in B tree.
1232  */
1233 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1234                                          struct ocfs2_xattr_info *xi,
1235                                          struct ocfs2_xattr_search *xs,
1236                                          struct ocfs2_xattr_set_ctxt *ctxt,
1237                                          size_t offs)
1238 {
1239         size_t name_len = strlen(xi->name);
1240         void *val = xs->base + offs;
1241         struct ocfs2_xattr_value_root *xv = NULL;
1242         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1243         int ret = 0;
1244
1245         memset(val, 0, size);
1246         memcpy(val, xi->name, name_len);
1247         xv = (struct ocfs2_xattr_value_root *)
1248                 (val + OCFS2_XATTR_SIZE(name_len));
1249         xv->xr_clusters = 0;
1250         xv->xr_last_eb_blk = 0;
1251         xv->xr_list.l_tree_depth = 0;
1252         xv->xr_list.l_count = cpu_to_le16(1);
1253         xv->xr_list.l_next_free_rec = 0;
1254
1255         ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1256                                          xi->value_len, ctxt);
1257         if (ret < 0) {
1258                 mlog_errno(ret);
1259                 return ret;
1260         }
1261         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
1262         if (ret < 0) {
1263                 mlog_errno(ret);
1264                 return ret;
1265         }
1266         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
1267                                               xi->value, xi->value_len);
1268         if (ret < 0)
1269                 mlog_errno(ret);
1270
1271         return ret;
1272 }
1273
1274 /*
1275  * ocfs2_xattr_set_entry_local()
1276  *
1277  * Set, replace or remove extended attribute in local.
1278  */
1279 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1280                                         struct ocfs2_xattr_info *xi,
1281                                         struct ocfs2_xattr_search *xs,
1282                                         struct ocfs2_xattr_entry *last,
1283                                         size_t min_offs)
1284 {
1285         size_t name_len = strlen(xi->name);
1286         int i;
1287
1288         if (xi->value && xs->not_found) {
1289                 /* Insert the new xattr entry. */
1290                 le16_add_cpu(&xs->header->xh_count, 1);
1291                 ocfs2_xattr_set_type(last, xi->name_index);
1292                 ocfs2_xattr_set_local(last, 1);
1293                 last->xe_name_len = name_len;
1294         } else {
1295                 void *first_val;
1296                 void *val;
1297                 size_t offs, size;
1298
1299                 first_val = xs->base + min_offs;
1300                 offs = le16_to_cpu(xs->here->xe_name_offset);
1301                 val = xs->base + offs;
1302
1303                 if (le64_to_cpu(xs->here->xe_value_size) >
1304                     OCFS2_XATTR_INLINE_SIZE)
1305                         size = OCFS2_XATTR_SIZE(name_len) +
1306                                 OCFS2_XATTR_ROOT_SIZE;
1307                 else
1308                         size = OCFS2_XATTR_SIZE(name_len) +
1309                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1310
1311                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1312                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1313                         /* The old and the new value have the
1314                            same size. Just replace the value. */
1315                         ocfs2_xattr_set_local(xs->here, 1);
1316                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1317                         /* Clear value bytes. */
1318                         memset(val + OCFS2_XATTR_SIZE(name_len),
1319                                0,
1320                                OCFS2_XATTR_SIZE(xi->value_len));
1321                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1322                                xi->value,
1323                                xi->value_len);
1324                         return;
1325                 }
1326                 /* Remove the old name+value. */
1327                 memmove(first_val + size, first_val, val - first_val);
1328                 memset(first_val, 0, size);
1329                 xs->here->xe_name_hash = 0;
1330                 xs->here->xe_name_offset = 0;
1331                 ocfs2_xattr_set_local(xs->here, 1);
1332                 xs->here->xe_value_size = 0;
1333
1334                 min_offs += size;
1335
1336                 /* Adjust all value offsets. */
1337                 last = xs->header->xh_entries;
1338                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1339                         size_t o = le16_to_cpu(last->xe_name_offset);
1340
1341                         if (o < offs)
1342                                 last->xe_name_offset = cpu_to_le16(o + size);
1343                         last += 1;
1344                 }
1345
1346                 if (!xi->value) {
1347                         /* Remove the old entry. */
1348                         last -= 1;
1349                         memmove(xs->here, xs->here + 1,
1350                                 (void *)last - (void *)xs->here);
1351                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1352                         le16_add_cpu(&xs->header->xh_count, -1);
1353                 }
1354         }
1355         if (xi->value) {
1356                 /* Insert the new name+value. */
1357                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1358                                 OCFS2_XATTR_SIZE(xi->value_len);
1359                 void *val = xs->base + min_offs - size;
1360
1361                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1362                 memset(val, 0, size);
1363                 memcpy(val, xi->name, name_len);
1364                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1365                        xi->value,
1366                        xi->value_len);
1367                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1368                 ocfs2_xattr_set_local(xs->here, 1);
1369                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1370         }
1371
1372         return;
1373 }
1374
1375 /*
1376  * ocfs2_xattr_set_entry()
1377  *
1378  * Set extended attribute entry into inode or block.
1379  *
1380  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1381  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1382  * then set value in B tree with set_value_outside().
1383  */
1384 static int ocfs2_xattr_set_entry(struct inode *inode,
1385                                  struct ocfs2_xattr_info *xi,
1386                                  struct ocfs2_xattr_search *xs,
1387                                  struct ocfs2_xattr_set_ctxt *ctxt,
1388                                  int flag)
1389 {
1390         struct ocfs2_xattr_entry *last;
1391         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1392         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1393         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1394         size_t size_l = 0;
1395         handle_t *handle = ctxt->handle;
1396         int free, i, ret;
1397         struct ocfs2_xattr_info xi_l = {
1398                 .name_index = xi->name_index,
1399                 .name = xi->name,
1400                 .value = xi->value,
1401                 .value_len = xi->value_len,
1402         };
1403
1404         /* Compute min_offs, last and free space. */
1405         last = xs->header->xh_entries;
1406
1407         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1408                 size_t offs = le16_to_cpu(last->xe_name_offset);
1409                 if (offs < min_offs)
1410                         min_offs = offs;
1411                 last += 1;
1412         }
1413
1414         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1415         if (free < 0)
1416                 return -EIO;
1417
1418         if (!xs->not_found) {
1419                 size_t size = 0;
1420                 if (ocfs2_xattr_is_local(xs->here))
1421                         size = OCFS2_XATTR_SIZE(name_len) +
1422                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1423                 else
1424                         size = OCFS2_XATTR_SIZE(name_len) +
1425                                 OCFS2_XATTR_ROOT_SIZE;
1426                 free += (size + sizeof(struct ocfs2_xattr_entry));
1427         }
1428         /* Check free space in inode or block */
1429         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1430                 if (free < sizeof(struct ocfs2_xattr_entry) +
1431                            OCFS2_XATTR_SIZE(name_len) +
1432                            OCFS2_XATTR_ROOT_SIZE) {
1433                         ret = -ENOSPC;
1434                         goto out;
1435                 }
1436                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1437                 xi_l.value = (void *)&def_xv;
1438                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1439         } else if (xi->value) {
1440                 if (free < sizeof(struct ocfs2_xattr_entry) +
1441                            OCFS2_XATTR_SIZE(name_len) +
1442                            OCFS2_XATTR_SIZE(xi->value_len)) {
1443                         ret = -ENOSPC;
1444                         goto out;
1445                 }
1446         }
1447
1448         if (!xs->not_found) {
1449                 /* For existing extended attribute */
1450                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1451                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1452                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1453                 void *val = xs->base + offs;
1454
1455                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1456                         /* Replace existing local xattr with tree root */
1457                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1458                                                             ctxt, offs);
1459                         if (ret < 0)
1460                                 mlog_errno(ret);
1461                         goto out;
1462                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1463                         /* For existing xattr which has value outside */
1464                         struct ocfs2_xattr_value_root *xv = NULL;
1465                         xv = (struct ocfs2_xattr_value_root *)(val +
1466                                 OCFS2_XATTR_SIZE(name_len));
1467
1468                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1469                                 /*
1470                                  * If new value need set outside also,
1471                                  * first truncate old value to new value,
1472                                  * then set new value with set_value_outside().
1473                                  */
1474                                 ret = ocfs2_xattr_value_truncate(inode,
1475                                                                  xs->xattr_bh,
1476                                                                  xv,
1477                                                                  xi->value_len,
1478                                                                  ctxt);
1479                                 if (ret < 0) {
1480                                         mlog_errno(ret);
1481                                         goto out;
1482                                 }
1483
1484                                 ret = ocfs2_xattr_update_entry(inode,
1485                                                                handle,
1486                                                                xi,
1487                                                                xs,
1488                                                                offs);
1489                                 if (ret < 0) {
1490                                         mlog_errno(ret);
1491                                         goto out;
1492                                 }
1493
1494                                 ret = __ocfs2_xattr_set_value_outside(inode,
1495                                                                 handle,
1496                                                                 xv,
1497                                                                 xi->value,
1498                                                                 xi->value_len);
1499                                 if (ret < 0)
1500                                         mlog_errno(ret);
1501                                 goto out;
1502                         } else {
1503                                 /*
1504                                  * If new value need set in local,
1505                                  * just trucate old value to zero.
1506                                  */
1507                                  ret = ocfs2_xattr_value_truncate(inode,
1508                                                                   xs->xattr_bh,
1509                                                                   xv,
1510                                                                   0,
1511                                                                   ctxt);
1512                                 if (ret < 0)
1513                                         mlog_errno(ret);
1514                         }
1515                 }
1516         }
1517
1518         ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1519                                    OCFS2_JOURNAL_ACCESS_WRITE);
1520         if (ret) {
1521                 mlog_errno(ret);
1522                 goto out;
1523         }
1524
1525         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1526                 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1527                                            OCFS2_JOURNAL_ACCESS_WRITE);
1528                 if (ret) {
1529                         mlog_errno(ret);
1530                         goto out;
1531                 }
1532         }
1533
1534         /*
1535          * Set value in local, include set tree root in local.
1536          * This is the first step for value size >INLINE_SIZE.
1537          */
1538         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1539
1540         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1541                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1542                 if (ret < 0) {
1543                         mlog_errno(ret);
1544                         goto out;
1545                 }
1546         }
1547
1548         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1549             (flag & OCFS2_INLINE_XATTR_FL)) {
1550                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1551                 unsigned int xattrsize = osb->s_xattr_inline_size;
1552
1553                 /*
1554                  * Adjust extent record count or inline data size
1555                  * to reserve space for extended attribute.
1556                  */
1557                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1558                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1559                         le16_add_cpu(&idata->id_count, -xattrsize);
1560                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1561                         struct ocfs2_extent_list *el = &di->id2.i_list;
1562                         le16_add_cpu(&el->l_count, -(xattrsize /
1563                                         sizeof(struct ocfs2_extent_rec)));
1564                 }
1565                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1566         }
1567         /* Update xattr flag */
1568         spin_lock(&oi->ip_lock);
1569         oi->ip_dyn_features |= flag;
1570         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1571         spin_unlock(&oi->ip_lock);
1572         /* Update inode ctime */
1573         inode->i_ctime = CURRENT_TIME;
1574         di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1575         di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1576
1577         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1578         if (ret < 0)
1579                 mlog_errno(ret);
1580
1581         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1582                 /*
1583                  * Set value outside in B tree.
1584                  * This is the second step for value size > INLINE_SIZE.
1585                  */
1586                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1587                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
1588                 if (ret < 0) {
1589                         int ret2;
1590
1591                         mlog_errno(ret);
1592                         /*
1593                          * If set value outside failed, we have to clean
1594                          * the junk tree root we have already set in local.
1595                          */
1596                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1597                                                    xi, xs, offs);
1598                         if (ret2 < 0)
1599                                 mlog_errno(ret2);
1600                 }
1601         }
1602 out:
1603         return ret;
1604 }
1605
1606 static int ocfs2_remove_value_outside(struct inode*inode,
1607                                       struct buffer_head *bh,
1608                                       struct ocfs2_xattr_header *header)
1609 {
1610         int ret = 0, i;
1611         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1612         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1613
1614         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1615
1616         ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1617         if (IS_ERR(ctxt.handle)) {
1618                 ret = PTR_ERR(ctxt.handle);
1619                 mlog_errno(ret);
1620                 goto out;
1621         }
1622
1623         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1624                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1625
1626                 if (!ocfs2_xattr_is_local(entry)) {
1627                         struct ocfs2_xattr_value_root *xv;
1628                         void *val;
1629
1630                         val = (void *)header +
1631                                 le16_to_cpu(entry->xe_name_offset);
1632                         xv = (struct ocfs2_xattr_value_root *)
1633                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1634                         ret = ocfs2_xattr_value_truncate(inode, bh, xv,
1635                                                          0, &ctxt);
1636                         if (ret < 0) {
1637                                 mlog_errno(ret);
1638                                 break;
1639                         }
1640                 }
1641         }
1642
1643         ocfs2_commit_trans(osb, ctxt.handle);
1644         ocfs2_schedule_truncate_log_flush(osb, 1);
1645         ocfs2_run_deallocs(osb, &ctxt.dealloc);
1646 out:
1647         return ret;
1648 }
1649
1650 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1651                                     struct buffer_head *di_bh)
1652 {
1653
1654         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1655         struct ocfs2_xattr_header *header;
1656         int ret;
1657
1658         header = (struct ocfs2_xattr_header *)
1659                  ((void *)di + inode->i_sb->s_blocksize -
1660                  le16_to_cpu(di->i_xattr_inline_size));
1661
1662         ret = ocfs2_remove_value_outside(inode, di_bh, header);
1663
1664         return ret;
1665 }
1666
1667 static int ocfs2_xattr_block_remove(struct inode *inode,
1668                                     struct buffer_head *blk_bh)
1669 {
1670         struct ocfs2_xattr_block *xb;
1671         int ret = 0;
1672
1673         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1674         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1675                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1676                 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1677         } else
1678                 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1679
1680         return ret;
1681 }
1682
1683 static int ocfs2_xattr_free_block(struct inode *inode,
1684                                   u64 block)
1685 {
1686         struct inode *xb_alloc_inode;
1687         struct buffer_head *xb_alloc_bh = NULL;
1688         struct buffer_head *blk_bh = NULL;
1689         struct ocfs2_xattr_block *xb;
1690         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1691         handle_t *handle;
1692         int ret = 0;
1693         u64 blk, bg_blkno;
1694         u16 bit;
1695
1696         ret = ocfs2_read_block(inode, block, &blk_bh);
1697         if (ret < 0) {
1698                 mlog_errno(ret);
1699                 goto out;
1700         }
1701
1702         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1703         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1704                 ret = -EIO;
1705                 goto out;
1706         }
1707
1708         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1709         if (ret < 0) {
1710                 mlog_errno(ret);
1711                 goto out;
1712         }
1713
1714         blk = le64_to_cpu(xb->xb_blkno);
1715         bit = le16_to_cpu(xb->xb_suballoc_bit);
1716         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1717
1718         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1719                                 EXTENT_ALLOC_SYSTEM_INODE,
1720                                 le16_to_cpu(xb->xb_suballoc_slot));
1721         if (!xb_alloc_inode) {
1722                 ret = -ENOMEM;
1723                 mlog_errno(ret);
1724                 goto out;
1725         }
1726         mutex_lock(&xb_alloc_inode->i_mutex);
1727
1728         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1729         if (ret < 0) {
1730                 mlog_errno(ret);
1731                 goto out_mutex;
1732         }
1733
1734         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1735         if (IS_ERR(handle)) {
1736                 ret = PTR_ERR(handle);
1737                 mlog_errno(ret);
1738                 goto out_unlock;
1739         }
1740
1741         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1742                                        bit, bg_blkno, 1);
1743         if (ret < 0)
1744                 mlog_errno(ret);
1745
1746         ocfs2_commit_trans(osb, handle);
1747 out_unlock:
1748         ocfs2_inode_unlock(xb_alloc_inode, 1);
1749         brelse(xb_alloc_bh);
1750 out_mutex:
1751         mutex_unlock(&xb_alloc_inode->i_mutex);
1752         iput(xb_alloc_inode);
1753 out:
1754         brelse(blk_bh);
1755         return ret;
1756 }
1757
1758 /*
1759  * ocfs2_xattr_remove()
1760  *
1761  * Free extended attribute resources associated with this inode.
1762  */
1763 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1764 {
1765         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1766         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1767         handle_t *handle;
1768         int ret;
1769
1770         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1771                 return 0;
1772
1773         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1774                 return 0;
1775
1776         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1777                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1778                 if (ret < 0) {
1779                         mlog_errno(ret);
1780                         goto out;
1781                 }
1782         }
1783
1784         if (di->i_xattr_loc) {
1785                 ret = ocfs2_xattr_free_block(inode,
1786                                              le64_to_cpu(di->i_xattr_loc));
1787                 if (ret < 0) {
1788                         mlog_errno(ret);
1789                         goto out;
1790                 }
1791         }
1792
1793         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1794                                    OCFS2_INODE_UPDATE_CREDITS);
1795         if (IS_ERR(handle)) {
1796                 ret = PTR_ERR(handle);
1797                 mlog_errno(ret);
1798                 goto out;
1799         }
1800         ret = ocfs2_journal_access(handle, inode, di_bh,
1801                                    OCFS2_JOURNAL_ACCESS_WRITE);
1802         if (ret) {
1803                 mlog_errno(ret);
1804                 goto out_commit;
1805         }
1806
1807         di->i_xattr_loc = 0;
1808
1809         spin_lock(&oi->ip_lock);
1810         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1811         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1812         spin_unlock(&oi->ip_lock);
1813
1814         ret = ocfs2_journal_dirty(handle, di_bh);
1815         if (ret < 0)
1816                 mlog_errno(ret);
1817 out_commit:
1818         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1819 out:
1820         return ret;
1821 }
1822
1823 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1824                                         struct ocfs2_dinode *di)
1825 {
1826         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1827         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1828         int free;
1829
1830         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1831                 return 0;
1832
1833         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1834                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1835                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1836         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1837                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1838                         le64_to_cpu(di->i_size);
1839         } else {
1840                 struct ocfs2_extent_list *el = &di->id2.i_list;
1841                 free = (le16_to_cpu(el->l_count) -
1842                         le16_to_cpu(el->l_next_free_rec)) *
1843                         sizeof(struct ocfs2_extent_rec);
1844         }
1845         if (free >= xattrsize)
1846                 return 1;
1847
1848         return 0;
1849 }
1850
1851 /*
1852  * ocfs2_xattr_ibody_find()
1853  *
1854  * Find extended attribute in inode block and
1855  * fill search info into struct ocfs2_xattr_search.
1856  */
1857 static int ocfs2_xattr_ibody_find(struct inode *inode,
1858                                   int name_index,
1859                                   const char *name,
1860                                   struct ocfs2_xattr_search *xs)
1861 {
1862         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1863         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1864         int ret;
1865         int has_space = 0;
1866
1867         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1868                 return 0;
1869
1870         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1871                 down_read(&oi->ip_alloc_sem);
1872                 has_space = ocfs2_xattr_has_space_inline(inode, di);
1873                 up_read(&oi->ip_alloc_sem);
1874                 if (!has_space)
1875                         return 0;
1876         }
1877
1878         xs->xattr_bh = xs->inode_bh;
1879         xs->end = (void *)di + inode->i_sb->s_blocksize;
1880         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1881                 xs->header = (struct ocfs2_xattr_header *)
1882                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1883         else
1884                 xs->header = (struct ocfs2_xattr_header *)
1885                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1886         xs->base = (void *)xs->header;
1887         xs->here = xs->header->xh_entries;
1888
1889         /* Find the named attribute. */
1890         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1891                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1892                 if (ret && ret != -ENODATA)
1893                         return ret;
1894                 xs->not_found = ret;
1895         }
1896
1897         return 0;
1898 }
1899
1900 /*
1901  * ocfs2_xattr_ibody_set()
1902  *
1903  * Set, replace or remove an extended attribute into inode block.
1904  *
1905  */
1906 static int ocfs2_xattr_ibody_set(struct inode *inode,
1907                                  struct ocfs2_xattr_info *xi,
1908                                  struct ocfs2_xattr_search *xs,
1909                                  struct ocfs2_xattr_set_ctxt *ctxt)
1910 {
1911         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1912         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1913         int ret;
1914
1915         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1916                 return -ENOSPC;
1917
1918         down_write(&oi->ip_alloc_sem);
1919         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1920                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1921                         ret = -ENOSPC;
1922                         goto out;
1923                 }
1924         }
1925
1926         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1927                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1928 out:
1929         up_write(&oi->ip_alloc_sem);
1930
1931         return ret;
1932 }
1933
1934 /*
1935  * ocfs2_xattr_block_find()
1936  *
1937  * Find extended attribute in external block and
1938  * fill search info into struct ocfs2_xattr_search.
1939  */
1940 static int ocfs2_xattr_block_find(struct inode *inode,
1941                                   int name_index,
1942                                   const char *name,
1943                                   struct ocfs2_xattr_search *xs)
1944 {
1945         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1946         struct buffer_head *blk_bh = NULL;
1947         struct ocfs2_xattr_block *xb;
1948         int ret = 0;
1949
1950         if (!di->i_xattr_loc)
1951                 return ret;
1952
1953         ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
1954         if (ret < 0) {
1955                 mlog_errno(ret);
1956                 return ret;
1957         }
1958
1959         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1960         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1961                 ret = -EIO;
1962                 goto cleanup;
1963         }
1964
1965         xs->xattr_bh = blk_bh;
1966
1967         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1968                 xs->header = &xb->xb_attrs.xb_header;
1969                 xs->base = (void *)xs->header;
1970                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1971                 xs->here = xs->header->xh_entries;
1972
1973                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1974         } else
1975                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1976                                                    name_index,
1977                                                    name, xs);
1978
1979         if (ret && ret != -ENODATA) {
1980                 xs->xattr_bh = NULL;
1981                 goto cleanup;
1982         }
1983         xs->not_found = ret;
1984         return 0;
1985 cleanup:
1986         brelse(blk_bh);
1987
1988         return ret;
1989 }
1990
1991 /*
1992  * ocfs2_xattr_block_set()
1993  *
1994  * Set, replace or remove an extended attribute into external block.
1995  *
1996  */
1997 static int ocfs2_xattr_block_set(struct inode *inode,
1998                                  struct ocfs2_xattr_info *xi,
1999                                  struct ocfs2_xattr_search *xs,
2000                                  struct ocfs2_xattr_set_ctxt *ctxt)
2001 {
2002         struct buffer_head *new_bh = NULL;
2003         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2004         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
2005         handle_t *handle = ctxt->handle;
2006         struct ocfs2_xattr_block *xblk = NULL;
2007         u16 suballoc_bit_start;
2008         u32 num_got;
2009         u64 first_blkno;
2010         int ret;
2011
2012         if (!xs->xattr_bh) {
2013                 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
2014                                            OCFS2_JOURNAL_ACCESS_CREATE);
2015                 if (ret < 0) {
2016                         mlog_errno(ret);
2017                         goto end;
2018                 }
2019
2020                 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
2021                                            &suballoc_bit_start, &num_got,
2022                                            &first_blkno);
2023                 if (ret < 0) {
2024                         mlog_errno(ret);
2025                         goto end;
2026                 }
2027
2028                 new_bh = sb_getblk(inode->i_sb, first_blkno);
2029                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
2030
2031                 ret = ocfs2_journal_access(handle, inode, new_bh,
2032                                            OCFS2_JOURNAL_ACCESS_CREATE);
2033                 if (ret < 0) {
2034                         mlog_errno(ret);
2035                         goto end;
2036                 }
2037
2038                 /* Initialize ocfs2_xattr_block */
2039                 xs->xattr_bh = new_bh;
2040                 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2041                 memset(xblk, 0, inode->i_sb->s_blocksize);
2042                 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2043                 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2044                 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2045                 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2046                 xblk->xb_blkno = cpu_to_le64(first_blkno);
2047
2048                 xs->header = &xblk->xb_attrs.xb_header;
2049                 xs->base = (void *)xs->header;
2050                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2051                 xs->here = xs->header->xh_entries;
2052
2053                 ret = ocfs2_journal_dirty(handle, new_bh);
2054                 if (ret < 0) {
2055                         mlog_errno(ret);
2056                         goto end;
2057                 }
2058                 di->i_xattr_loc = cpu_to_le64(first_blkno);
2059                 ocfs2_journal_dirty(handle, xs->inode_bh);
2060         } else
2061                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2062
2063         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2064                 /* Set extended attribute into external block */
2065                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2066                                             OCFS2_HAS_XATTR_FL);
2067                 if (!ret || ret != -ENOSPC)
2068                         goto end;
2069
2070                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2071                 if (ret)
2072                         goto end;
2073         }
2074
2075         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2076
2077 end:
2078
2079         return ret;
2080 }
2081
2082 /* Check whether the new xattr can be inserted into the inode. */
2083 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2084                                        struct ocfs2_xattr_info *xi,
2085                                        struct ocfs2_xattr_search *xs)
2086 {
2087         u64 value_size;
2088         struct ocfs2_xattr_entry *last;
2089         int free, i;
2090         size_t min_offs = xs->end - xs->base;
2091
2092         if (!xs->header)
2093                 return 0;
2094
2095         last = xs->header->xh_entries;
2096
2097         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2098                 size_t offs = le16_to_cpu(last->xe_name_offset);
2099                 if (offs < min_offs)
2100                         min_offs = offs;
2101                 last += 1;
2102         }
2103
2104         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
2105         if (free < 0)
2106                 return 0;
2107
2108         BUG_ON(!xs->not_found);
2109
2110         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2111                 value_size = OCFS2_XATTR_ROOT_SIZE;
2112         else
2113                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2114
2115         if (free >= sizeof(struct ocfs2_xattr_entry) +
2116                    OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2117                 return 1;
2118
2119         return 0;
2120 }
2121
2122 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2123                                      struct ocfs2_dinode *di,
2124                                      struct ocfs2_xattr_info *xi,
2125                                      struct ocfs2_xattr_search *xis,
2126                                      struct ocfs2_xattr_search *xbs,
2127                                      int *clusters_need,
2128                                      int *meta_need,
2129                                      int *credits_need)
2130 {
2131         int ret = 0, old_in_xb = 0;
2132         int clusters_add = 0, meta_add = 0, credits = 0;
2133         struct buffer_head *bh = NULL;
2134         struct ocfs2_xattr_block *xb = NULL;
2135         struct ocfs2_xattr_entry *xe = NULL;
2136         struct ocfs2_xattr_value_root *xv = NULL;
2137         char *base = NULL;
2138         int name_offset, name_len = 0;
2139         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2140                                                     xi->value_len);
2141         u64 value_size;
2142
2143         if (xis->not_found && xbs->not_found) {
2144                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2145
2146                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2147                         clusters_add += new_clusters;
2148                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2149                                                         &def_xv.xv.xr_list,
2150                                                         new_clusters);
2151                 }
2152
2153                 goto meta_guess;
2154         }
2155
2156         if (!xis->not_found) {
2157                 xe = xis->here;
2158                 name_offset = le16_to_cpu(xe->xe_name_offset);
2159                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2160                 base = xis->base;
2161                 credits += OCFS2_INODE_UPDATE_CREDITS;
2162         } else {
2163                 int i, block_off;
2164                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2165                 xe = xbs->here;
2166                 name_offset = le16_to_cpu(xe->xe_name_offset);
2167                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2168                 i = xbs->here - xbs->header->xh_entries;
2169                 old_in_xb = 1;
2170
2171                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2172                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2173                                                         bucket_xh(xbs->bucket),
2174                                                         i, &block_off,
2175                                                         &name_offset);
2176                         base = bucket_block(xbs->bucket, block_off);
2177                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2178                 } else {
2179                         base = xbs->base;
2180                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2181                 }
2182         }
2183
2184         /*
2185          * delete a xattr doesn't need metadata and cluster allocation.
2186          * so just calculate the credits and return.
2187          *
2188          * The credits for removing the value tree will be extended
2189          * by ocfs2_remove_extent itself.
2190          */
2191         if (!xi->value) {
2192                 if (!ocfs2_xattr_is_local(xe))
2193                         credits += OCFS2_REMOVE_EXTENT_CREDITS;
2194
2195                 goto out;
2196         }
2197
2198         /* do cluster allocation guess first. */
2199         value_size = le64_to_cpu(xe->xe_value_size);
2200
2201         if (old_in_xb) {
2202                 /*
2203                  * In xattr set, we always try to set the xe in inode first,
2204                  * so if it can be inserted into inode successfully, the old
2205                  * one will be removed from the xattr block, and this xattr
2206                  * will be inserted into inode as a new xattr in inode.
2207                  */
2208                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2209                         clusters_add += new_clusters;
2210                         credits += OCFS2_REMOVE_EXTENT_CREDITS +
2211                                     OCFS2_INODE_UPDATE_CREDITS;
2212                         if (!ocfs2_xattr_is_local(xe))
2213                                 credits += ocfs2_calc_extend_credits(
2214                                                         inode->i_sb,
2215                                                         &def_xv.xv.xr_list,
2216                                                         new_clusters);
2217                         goto out;
2218                 }
2219         }
2220
2221         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2222                 /* the new values will be stored outside. */
2223                 u32 old_clusters = 0;
2224
2225                 if (!ocfs2_xattr_is_local(xe)) {
2226                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2227                                                                  value_size);
2228                         xv = (struct ocfs2_xattr_value_root *)
2229                              (base + name_offset + name_len);
2230                 } else
2231                         xv = &def_xv.xv;
2232
2233                 if (old_clusters >= new_clusters) {
2234                         credits += OCFS2_REMOVE_EXTENT_CREDITS;
2235                         goto out;
2236                 } else {
2237                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2238                         clusters_add += new_clusters - old_clusters;
2239                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2240                                                              &xv->xr_list,
2241                                                              new_clusters -
2242                                                              old_clusters);
2243                         goto out;
2244                 }
2245         } else {
2246                 /*
2247                  * Now the new value will be stored inside. So if the new
2248                  * value is smaller than the size of value root or the old
2249                  * value, we don't need any allocation, otherwise we have
2250                  * to guess metadata allocation.
2251                  */
2252                 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2253                     (!ocfs2_xattr_is_local(xe) &&
2254                      OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2255                         goto out;
2256         }
2257
2258 meta_guess:
2259         /* calculate metadata allocation. */
2260         if (di->i_xattr_loc) {
2261                 if (!xbs->xattr_bh) {
2262                         ret = ocfs2_read_block(inode,
2263                                                le64_to_cpu(di->i_xattr_loc),
2264                                                &bh);
2265                         if (ret) {
2266                                 mlog_errno(ret);
2267                                 goto out;
2268                         }
2269
2270                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2271                 } else
2272                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2273
2274                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2275                         struct ocfs2_extent_list *el =
2276                                  &xb->xb_attrs.xb_root.xt_list;
2277                         meta_add += ocfs2_extend_meta_needed(el);
2278                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2279                                                              el, 1);
2280                 }
2281
2282                 /*
2283                  * This cluster will be used either for new bucket or for
2284                  * new xattr block.
2285                  * If the cluster size is the same as the bucket size, one
2286                  * more is needed since we may need to extend the bucket
2287                  * also.
2288                  */
2289                 clusters_add += 1;
2290                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2291                 if (OCFS2_XATTR_BUCKET_SIZE ==
2292                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2293                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2294                         clusters_add += 1;
2295                 }
2296         } else {
2297                 meta_add += 1;
2298                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2299         }
2300 out:
2301         if (clusters_need)
2302                 *clusters_need = clusters_add;
2303         if (meta_need)
2304                 *meta_need = meta_add;
2305         if (credits_need)
2306                 *credits_need = credits;
2307         brelse(bh);
2308         return ret;
2309 }
2310
2311 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2312                                      struct ocfs2_dinode *di,
2313                                      struct ocfs2_xattr_info *xi,
2314                                      struct ocfs2_xattr_search *xis,
2315                                      struct ocfs2_xattr_search *xbs,
2316                                      struct ocfs2_xattr_set_ctxt *ctxt,
2317                                      int *credits)
2318 {
2319         int clusters_add, meta_add, ret;
2320         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2321
2322         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2323
2324         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2325
2326         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2327                                         &clusters_add, &meta_add, credits);
2328         if (ret) {
2329                 mlog_errno(ret);
2330                 return ret;
2331         }
2332
2333         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2334              "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2335
2336         if (meta_add) {
2337                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2338                                                         &ctxt->meta_ac);
2339                 if (ret) {
2340                         mlog_errno(ret);
2341                         goto out;
2342                 }
2343         }
2344
2345         if (clusters_add) {
2346                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2347                 if (ret)
2348                         mlog_errno(ret);
2349         }
2350 out:
2351         if (ret) {
2352                 if (ctxt->meta_ac) {
2353                         ocfs2_free_alloc_context(ctxt->meta_ac);
2354                         ctxt->meta_ac = NULL;
2355                 }
2356
2357                 /*
2358                  * We cannot have an error and a non null ctxt->data_ac.
2359                  */
2360         }
2361
2362         return ret;
2363 }
2364
2365 static int __ocfs2_xattr_set_handle(struct inode *inode,
2366                                     struct ocfs2_dinode *di,
2367                                     struct ocfs2_xattr_info *xi,
2368                                     struct ocfs2_xattr_search *xis,
2369                                     struct ocfs2_xattr_search *xbs,
2370                                     struct ocfs2_xattr_set_ctxt *ctxt)
2371 {
2372         int ret = 0, credits;
2373
2374         if (!xi->value) {
2375                 /* Remove existing extended attribute */
2376                 if (!xis->not_found)
2377                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2378                 else if (!xbs->not_found)
2379                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2380         } else {
2381                 /* We always try to set extended attribute into inode first*/
2382                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2383                 if (!ret && !xbs->not_found) {
2384                         /*
2385                          * If succeed and that extended attribute existing in
2386                          * external block, then we will remove it.
2387                          */
2388                         xi->value = NULL;
2389                         xi->value_len = 0;
2390
2391                         xis->not_found = -ENODATA;
2392                         ret = ocfs2_calc_xattr_set_need(inode,
2393                                                         di,
2394                                                         xi,
2395                                                         xis,
2396                                                         xbs,
2397                                                         NULL,
2398                                                         NULL,
2399                                                         &credits);
2400                         if (ret) {
2401                                 mlog_errno(ret);
2402                                 goto out;
2403                         }
2404
2405                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2406                                         ctxt->handle->h_buffer_credits);
2407                         if (ret) {
2408                                 mlog_errno(ret);
2409                                 goto out;
2410                         }
2411                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2412                 } else if (ret == -ENOSPC) {
2413                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2414                                 ret = ocfs2_xattr_block_find(inode,
2415                                                              xi->name_index,
2416                                                              xi->name, xbs);
2417                                 if (ret)
2418                                         goto out;
2419
2420                                 xis->not_found = -ENODATA;
2421                                 ret = ocfs2_calc_xattr_set_need(inode,
2422                                                                 di,
2423                                                                 xi,
2424                                                                 xis,
2425                                                                 xbs,
2426                                                                 NULL,
2427                                                                 NULL,
2428                                                                 &credits);
2429                                 if (ret) {
2430                                         mlog_errno(ret);
2431                                         goto out;
2432                                 }
2433
2434                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2435                                         ctxt->handle->h_buffer_credits);
2436                                 if (ret) {
2437                                         mlog_errno(ret);
2438                                         goto out;
2439                                 }
2440                         }
2441                         /*
2442                          * If no space in inode, we will set extended attribute
2443                          * into external block.
2444                          */
2445                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2446                         if (ret)
2447                                 goto out;
2448                         if (!xis->not_found) {
2449                                 /*
2450                                  * If succeed and that extended attribute
2451                                  * existing in inode, we will remove it.
2452                                  */
2453                                 xi->value = NULL;
2454                                 xi->value_len = 0;
2455                                 xbs->not_found = -ENODATA;
2456                                 ret = ocfs2_calc_xattr_set_need(inode,
2457                                                                 di,
2458                                                                 xi,
2459                                                                 xis,
2460                                                                 xbs,
2461                                                                 NULL,
2462                                                                 NULL,
2463                                                                 &credits);
2464                                 if (ret) {
2465                                         mlog_errno(ret);
2466                                         goto out;
2467                                 }
2468
2469                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2470                                                 ctxt->handle->h_buffer_credits);
2471                                 if (ret) {
2472                                         mlog_errno(ret);
2473                                         goto out;
2474                                 }
2475                                 ret = ocfs2_xattr_ibody_set(inode, xi,
2476                                                             xis, ctxt);
2477                         }
2478                 }
2479         }
2480
2481 out:
2482         return ret;
2483 }
2484
2485 /*
2486  * This function only called duing creating inode
2487  * for init security/acl xattrs of the new inode.
2488  * The xattrs could be put into ibody or extent block,
2489  * xattr bucket would not be use in this case.
2490  * transanction credits also be reserved in here.
2491  */
2492 int ocfs2_xattr_set_handle(handle_t *handle,
2493                            struct inode *inode,
2494                            struct buffer_head *di_bh,
2495                            int name_index,
2496                            const char *name,
2497                            const void *value,
2498                            size_t value_len,
2499                            int flags,
2500                            struct ocfs2_alloc_context *meta_ac,
2501                            struct ocfs2_alloc_context *data_ac)
2502 {
2503         struct ocfs2_dinode *di;
2504         int ret;
2505
2506         struct ocfs2_xattr_info xi = {
2507                 .name_index = name_index,
2508                 .name = name,
2509                 .value = value,
2510                 .value_len = value_len,
2511         };
2512
2513         struct ocfs2_xattr_search xis = {
2514                 .not_found = -ENODATA,
2515         };
2516
2517         struct ocfs2_xattr_search xbs = {
2518                 .not_found = -ENODATA,
2519         };
2520
2521         struct ocfs2_xattr_set_ctxt ctxt = {
2522                 .handle = handle,
2523                 .meta_ac = meta_ac,
2524                 .data_ac = data_ac,
2525         };
2526
2527         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2528                 return -EOPNOTSUPP;
2529
2530         xis.inode_bh = xbs.inode_bh = di_bh;
2531         di = (struct ocfs2_dinode *)di_bh->b_data;
2532
2533         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2534
2535         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2536         if (ret)
2537                 goto cleanup;
2538         if (xis.not_found) {
2539                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2540                 if (ret)
2541                         goto cleanup;
2542         }
2543
2544         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2545
2546 cleanup:
2547         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2548         brelse(xbs.xattr_bh);
2549
2550         return ret;
2551 }
2552
2553 /*
2554  * ocfs2_xattr_set()
2555  *
2556  * Set, replace or remove an extended attribute for this inode.
2557  * value is NULL to remove an existing extended attribute, else either
2558  * create or replace an extended attribute.
2559  */
2560 int ocfs2_xattr_set(struct inode *inode,
2561                     int name_index,
2562                     const char *name,
2563                     const void *value,
2564                     size_t value_len,
2565                     int flags)
2566 {
2567         struct buffer_head *di_bh = NULL;
2568         struct ocfs2_dinode *di;
2569         int ret, credits;
2570         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2571         struct inode *tl_inode = osb->osb_tl_inode;
2572         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2573
2574         struct ocfs2_xattr_info xi = {
2575                 .name_index = name_index,
2576                 .name = name,
2577                 .value = value,
2578                 .value_len = value_len,
2579         };
2580
2581         struct ocfs2_xattr_search xis = {
2582                 .not_found = -ENODATA,
2583         };
2584
2585         struct ocfs2_xattr_search xbs = {
2586                 .not_found = -ENODATA,
2587         };
2588
2589         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2590                 return -EOPNOTSUPP;
2591
2592         /*
2593          * Only xbs will be used on indexed trees.  xis doesn't need a
2594          * bucket.
2595          */
2596         xbs.bucket = ocfs2_xattr_bucket_new(inode);
2597         if (!xbs.bucket) {
2598                 mlog_errno(-ENOMEM);
2599                 return -ENOMEM;
2600         }
2601
2602         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2603         if (ret < 0) {
2604                 mlog_errno(ret);
2605                 goto cleanup_nolock;
2606         }
2607         xis.inode_bh = xbs.inode_bh = di_bh;
2608         di = (struct ocfs2_dinode *)di_bh->b_data;
2609
2610         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2611         /*
2612          * Scan inode and external block to find the same name
2613          * extended attribute and collect search infomation.
2614          */
2615         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2616         if (ret)
2617                 goto cleanup;
2618         if (xis.not_found) {
2619                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2620                 if (ret)
2621                         goto cleanup;
2622         }
2623
2624         if (xis.not_found && xbs.not_found) {
2625                 ret = -ENODATA;
2626                 if (flags & XATTR_REPLACE)
2627                         goto cleanup;
2628                 ret = 0;
2629                 if (!value)
2630                         goto cleanup;
2631         } else {
2632                 ret = -EEXIST;
2633                 if (flags & XATTR_CREATE)
2634                         goto cleanup;
2635         }
2636
2637
2638         mutex_lock(&tl_inode->i_mutex);
2639
2640         if (ocfs2_truncate_log_needs_flush(osb)) {
2641                 ret = __ocfs2_flush_truncate_log(osb);
2642                 if (ret < 0) {
2643                         mutex_unlock(&tl_inode->i_mutex);
2644                         mlog_errno(ret);
2645                         goto cleanup;
2646                 }
2647         }
2648         mutex_unlock(&tl_inode->i_mutex);
2649
2650         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2651                                         &xbs, &ctxt, &credits);
2652         if (ret) {
2653                 mlog_errno(ret);
2654                 goto cleanup;
2655         }
2656
2657         ctxt.handle = ocfs2_start_trans(osb, credits);
2658         if (IS_ERR(ctxt.handle)) {
2659                 ret = PTR_ERR(ctxt.handle);
2660                 mlog_errno(ret);
2661                 goto cleanup;
2662         }
2663
2664         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2665
2666         ocfs2_commit_trans(osb, ctxt.handle);
2667
2668         if (ctxt.data_ac)
2669                 ocfs2_free_alloc_context(ctxt.data_ac);
2670         if (ctxt.meta_ac)
2671                 ocfs2_free_alloc_context(ctxt.meta_ac);
2672         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2673                 ocfs2_schedule_truncate_log_flush(osb, 1);
2674         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2675 cleanup:
2676         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2677         ocfs2_inode_unlock(inode, 1);
2678 cleanup_nolock:
2679         brelse(di_bh);
2680         brelse(xbs.xattr_bh);
2681         ocfs2_xattr_bucket_free(xbs.bucket);
2682
2683         return ret;
2684 }
2685
2686 /*
2687  * Find the xattr extent rec which may contains name_hash.
2688  * e_cpos will be the first name hash of the xattr rec.
2689  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2690  */
2691 static int ocfs2_xattr_get_rec(struct inode *inode,
2692                                u32 name_hash,
2693                                u64 *p_blkno,
2694                                u32 *e_cpos,
2695                                u32 *num_clusters,
2696                                struct ocfs2_extent_list *el)
2697 {
2698         int ret = 0, i;
2699         struct buffer_head *eb_bh = NULL;
2700         struct ocfs2_extent_block *eb;
2701         struct ocfs2_extent_rec *rec = NULL;
2702         u64 e_blkno = 0;
2703
2704         if (el->l_tree_depth) {
2705                 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2706                 if (ret) {
2707                         mlog_errno(ret);
2708                         goto out;
2709                 }
2710
2711                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2712                 el = &eb->h_list;
2713
2714                 if (el->l_tree_depth) {
2715                         ocfs2_error(inode->i_sb,
2716                                     "Inode %lu has non zero tree depth in "
2717                                     "xattr tree block %llu\n", inode->i_ino,
2718                                     (unsigned long long)eb_bh->b_blocknr);
2719                         ret = -EROFS;
2720                         goto out;
2721                 }
2722         }
2723
2724         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2725                 rec = &el->l_recs[i];
2726
2727                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2728                         e_blkno = le64_to_cpu(rec->e_blkno);
2729                         break;
2730                 }
2731         }
2732
2733         if (!e_blkno) {
2734                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2735                             "record (%u, %u, 0) in xattr", inode->i_ino,
2736                             le32_to_cpu(rec->e_cpos),
2737                             ocfs2_rec_clusters(el, rec));
2738                 ret = -EROFS;
2739                 goto out;
2740         }
2741
2742         *p_blkno = le64_to_cpu(rec->e_blkno);
2743         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2744         if (e_cpos)
2745                 *e_cpos = le32_to_cpu(rec->e_cpos);
2746 out:
2747         brelse(eb_bh);
2748         return ret;
2749 }
2750
2751 typedef int (xattr_bucket_func)(struct inode *inode,
2752                                 struct ocfs2_xattr_bucket *bucket,
2753                                 void *para);
2754
2755 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2756                                    struct ocfs2_xattr_bucket *bucket,
2757                                    int name_index,
2758                                    const char *name,
2759                                    u32 name_hash,
2760                                    u16 *xe_index,
2761                                    int *found)
2762 {
2763         int i, ret = 0, cmp = 1, block_off, new_offset;
2764         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2765         size_t name_len = strlen(name);
2766         struct ocfs2_xattr_entry *xe = NULL;
2767         char *xe_name;
2768
2769         /*
2770          * We don't use binary search in the bucket because there
2771          * may be multiple entries with the same name hash.
2772          */
2773         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2774                 xe = &xh->xh_entries[i];
2775
2776                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2777                         continue;
2778                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2779                         break;
2780
2781                 cmp = name_index - ocfs2_xattr_get_type(xe);
2782                 if (!cmp)
2783                         cmp = name_len - xe->xe_name_len;
2784                 if (cmp)
2785                         continue;
2786
2787                 ret = ocfs2_xattr_bucket_get_name_value(inode,
2788                                                         xh,
2789                                                         i,
2790                                                         &block_off,
2791                                                         &new_offset);
2792                 if (ret) {
2793                         mlog_errno(ret);
2794                         break;
2795                 }
2796
2797                 xe_name = bucket_block(bucket, block_off) + new_offset;
2798                 if (!memcmp(name, xe_name, name_len)) {
2799                         *xe_index = i;
2800                         *found = 1;
2801                         ret = 0;
2802                         break;
2803                 }
2804         }
2805
2806         return ret;
2807 }
2808
2809 /*
2810  * Find the specified xattr entry in a series of buckets.
2811  * This series start from p_blkno and last for num_clusters.
2812  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2813  * the num of the valid buckets.
2814  *
2815  * Return the buffer_head this xattr should reside in. And if the xattr's
2816  * hash is in the gap of 2 buckets, return the lower bucket.
2817  */
2818 static int ocfs2_xattr_bucket_find(struct inode *inode,
2819                                    int name_index,
2820                                    const char *name,
2821                                    u32 name_hash,
2822                                    u64 p_blkno,
2823                                    u32 first_hash,
2824                                    u32 num_clusters,
2825                                    struct ocfs2_xattr_search *xs)
2826 {
2827         int ret, found = 0;
2828         struct ocfs2_xattr_header *xh = NULL;
2829         struct ocfs2_xattr_entry *xe = NULL;
2830         u16 index = 0;
2831         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2832         int low_bucket = 0, bucket, high_bucket;
2833         struct ocfs2_xattr_bucket *search;
2834         u32 last_hash;
2835         u64 blkno, lower_blkno = 0;
2836
2837         search = ocfs2_xattr_bucket_new(inode);
2838         if (!search) {
2839                 ret = -ENOMEM;
2840                 mlog_errno(ret);
2841                 goto out;
2842         }
2843
2844         ret = ocfs2_read_xattr_bucket(search, p_blkno);
2845         if (ret) {
2846                 mlog_errno(ret);
2847                 goto out;
2848         }
2849
2850         xh = bucket_xh(search);
2851         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2852         while (low_bucket <= high_bucket) {
2853                 ocfs2_xattr_bucket_relse(search);
2854
2855                 bucket = (low_bucket + high_bucket) / 2;
2856                 blkno = p_blkno + bucket * blk_per_bucket;
2857                 ret = ocfs2_read_xattr_bucket(search, blkno);
2858                 if (ret) {
2859                         mlog_errno(ret);
2860                         goto out;
2861                 }
2862
2863                 xh = bucket_xh(search);
2864                 xe = &xh->xh_entries[0];
2865                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2866                         high_bucket = bucket - 1;
2867                         continue;
2868                 }
2869
2870                 /*
2871                  * Check whether the hash of the last entry in our
2872                  * bucket is larger than the search one. for an empty
2873                  * bucket, the last one is also the first one.
2874                  */
2875                 if (xh->xh_count)
2876                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2877
2878                 last_hash = le32_to_cpu(xe->xe_name_hash);
2879
2880                 /* record lower_blkno which may be the insert place. */
2881                 lower_blkno = blkno;
2882
2883                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2884                         low_bucket = bucket + 1;
2885                         continue;
2886                 }
2887
2888                 /* the searched xattr should reside in this bucket if exists. */
2889                 ret = ocfs2_find_xe_in_bucket(inode, search,
2890                                               name_index, name, name_hash,
2891                                               &index, &found);
2892                 if (ret) {
2893                         mlog_errno(ret);
2894                         goto out;
2895                 }
2896                 break;
2897         }
2898
2899         /*
2900          * Record the bucket we have found.
2901          * When the xattr's hash value is in the gap of 2 buckets, we will
2902          * always set it to the previous bucket.
2903          */
2904         if (!lower_blkno)
2905                 lower_blkno = p_blkno;
2906
2907         /* This should be in cache - we just read it during the search */
2908         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
2909         if (ret) {
2910                 mlog_errno(ret);
2911                 goto out;
2912         }
2913
2914         xs->header = bucket_xh(xs->bucket);
2915         xs->base = bucket_block(xs->bucket, 0);
2916         xs->end = xs->base + inode->i_sb->s_blocksize;
2917
2918         if (found) {
2919                 xs->here = &xs->header->xh_entries[index];
2920                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2921                      (unsigned long long)bucket_blkno(xs->bucket), index);
2922         } else
2923                 ret = -ENODATA;
2924
2925 out:
2926         ocfs2_xattr_bucket_free(search);
2927         return ret;
2928 }
2929
2930 static int ocfs2_xattr_index_block_find(struct inode *inode,
2931                                         struct buffer_head *root_bh,
2932                                         int name_index,
2933                                         const char *name,
2934                                         struct ocfs2_xattr_search *xs)
2935 {
2936         int ret;
2937         struct ocfs2_xattr_block *xb =
2938                         (struct ocfs2_xattr_block *)root_bh->b_data;
2939         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2940         struct ocfs2_extent_list *el = &xb_root->xt_list;
2941         u64 p_blkno = 0;
2942         u32 first_hash, num_clusters = 0;
2943         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
2944
2945         if (le16_to_cpu(el->l_next_free_rec) == 0)
2946                 return -ENODATA;
2947
2948         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2949              name, name_hash, name_index);
2950
2951         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2952                                   &num_clusters, el);
2953         if (ret) {
2954                 mlog_errno(ret);
2955                 goto out;
2956         }
2957
2958         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2959
2960         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2961              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
2962              first_hash);
2963
2964         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2965                                       p_blkno, first_hash, num_clusters, xs);
2966
2967 out:
2968         return ret;
2969 }
2970
2971 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2972                                        u64 blkno,
2973                                        u32 clusters,
2974                                        xattr_bucket_func *func,
2975                                        void *para)
2976 {
2977         int i, ret = 0;
2978         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2979         u32 num_buckets = clusters * bpc;
2980         struct ocfs2_xattr_bucket *bucket;
2981
2982         bucket = ocfs2_xattr_bucket_new(inode);
2983         if (!bucket) {
2984                 mlog_errno(-ENOMEM);
2985                 return -ENOMEM;
2986         }
2987
2988         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2989              clusters, (unsigned long long)blkno);
2990
2991         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
2992                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
2993                 if (ret) {
2994                         mlog_errno(ret);
2995                         break;
2996                 }
2997
2998                 /*
2999                  * The real bucket num in this series of blocks is stored
3000                  * in the 1st bucket.
3001                  */
3002                 if (i == 0)
3003                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3004
3005                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3006                      (unsigned long long)blkno,
3007                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3008                 if (func) {
3009                         ret = func(inode, bucket, para);
3010                         if (ret)
3011                                 mlog_errno(ret);
3012                         /* Fall through to bucket_relse() */
3013                 }
3014
3015                 ocfs2_xattr_bucket_relse(bucket);
3016                 if (ret)
3017                         break;
3018         }
3019
3020         ocfs2_xattr_bucket_free(bucket);
3021         return ret;
3022 }
3023
3024 struct ocfs2_xattr_tree_list {
3025         char *buffer;
3026         size_t buffer_size;
3027         size_t result;
3028 };
3029
3030 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
3031                                              struct ocfs2_xattr_header *xh,
3032                                              int index,
3033                                              int *block_off,
3034                                              int *new_offset)
3035 {
3036         u16 name_offset;
3037
3038         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3039                 return -EINVAL;
3040
3041         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3042
3043         *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
3044         *new_offset = name_offset % inode->i_sb->s_blocksize;
3045
3046         return 0;
3047 }
3048
3049 static int ocfs2_list_xattr_bucket(struct inode *inode,
3050                                    struct ocfs2_xattr_bucket *bucket,
3051                                    void *para)
3052 {
3053         int ret = 0, type;
3054         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3055         int i, block_off, new_offset;
3056         const char *prefix, *name;
3057
3058         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3059                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3060                 type = ocfs2_xattr_get_type(entry);
3061                 prefix = ocfs2_xattr_prefix(type);
3062
3063                 if (prefix) {
3064                         ret = ocfs2_xattr_bucket_get_name_value(inode,
3065                                                                 bucket_xh(bucket),
3066                                                                 i,
3067                                                                 &block_off,
3068                                                                 &new_offset);
3069                         if (ret)
3070                                 break;
3071
3072                         name = (const char *)bucket_block(bucket, block_off) +
3073                                 new_offset;
3074                         ret = ocfs2_xattr_list_entry(xl->buffer,
3075                                                      xl->buffer_size,
3076                                                      &xl->result,
3077                                                      prefix, name,
3078                                                      entry->xe_name_len);
3079                         if (ret)
3080                                 break;
3081                 }
3082         }
3083
3084         return ret;
3085 }
3086
3087 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3088                                              struct ocfs2_xattr_tree_root *xt,
3089                                              char *buffer,
3090                                              size_t buffer_size)
3091 {
3092         struct ocfs2_extent_list *el = &xt->xt_list;
3093         int ret = 0;
3094         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3095         u64 p_blkno = 0;
3096         struct ocfs2_xattr_tree_list xl = {
3097                 .buffer = buffer,
3098                 .buffer_size = buffer_size,
3099                 .result = 0,
3100         };
3101
3102         if (le16_to_cpu(el->l_next_free_rec) == 0)
3103                 return 0;
3104
3105         while (name_hash > 0) {
3106                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3107                                           &e_cpos, &num_clusters, el);
3108                 if (ret) {
3109                         mlog_errno(ret);
3110                         goto out;
3111                 }
3112
3113                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
3114                                                   ocfs2_list_xattr_bucket,
3115                                                   &xl);
3116                 if (ret) {
3117                         mlog_errno(ret);
3118                         goto out;
3119                 }
3120
3121                 if (e_cpos == 0)
3122                         break;
3123
3124                 name_hash = e_cpos - 1;
3125         }
3126
3127         ret = xl.result;
3128 out:
3129         return ret;
3130 }
3131
3132 static int cmp_xe(const void *a, const void *b)
3133 {
3134         const struct ocfs2_xattr_entry *l = a, *r = b;
3135         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3136         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3137
3138         if (l_hash > r_hash)
3139                 return 1;
3140         if (l_hash < r_hash)
3141                 return -1;
3142         return 0;
3143 }
3144
3145 static void swap_xe(void *a, void *b, int size)
3146 {
3147         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3148
3149         tmp = *l;
3150         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3151         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3152 }
3153
3154 /*
3155  * When the ocfs2_xattr_block is filled up, new bucket will be created
3156  * and all the xattr entries will be moved to the new bucket.
3157  * The header goes at the start of the bucket, and the names+values are
3158  * filled from the end.  This is why *target starts as the last buffer.
3159  * Note: we need to sort the entries since they are not saved in order
3160  * in the ocfs2_xattr_block.
3161  */
3162 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3163                                            struct buffer_head *xb_bh,
3164                                            struct ocfs2_xattr_bucket *bucket)
3165 {
3166         int i, blocksize = inode->i_sb->s_blocksize;
3167         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3168         u16 offset, size, off_change;
3169         struct ocfs2_xattr_entry *xe;
3170         struct ocfs2_xattr_block *xb =
3171                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3172         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3173         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3174         u16 count = le16_to_cpu(xb_xh->xh_count);
3175         char *src = xb_bh->b_data;
3176         char *target = bucket_block(bucket, blks - 1);
3177
3178         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3179              (unsigned long long)xb_bh->b_blocknr,
3180              (unsigned long long)bucket_blkno(bucket));
3181
3182         for (i = 0; i < blks; i++)
3183                 memset(bucket_block(bucket, i), 0, blocksize);
3184
3185         /*
3186          * Since the xe_name_offset is based on ocfs2_xattr_header,
3187          * there is a offset change corresponding to the change of
3188          * ocfs2_xattr_header's position.
3189          */
3190         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3191         xe = &xb_xh->xh_entries[count - 1];
3192         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3193         size = blocksize - offset;
3194
3195         /* copy all the names and values. */
3196         memcpy(target + offset, src + offset, size);
3197
3198         /* Init new header now. */
3199         xh->xh_count = xb_xh->xh_count;
3200         xh->xh_num_buckets = cpu_to_le16(1);
3201         xh->xh_name_value_len = cpu_to_le16(size);
3202         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3203
3204         /* copy all the entries. */
3205         target = bucket_block(bucket, 0);
3206         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3207         size = count * sizeof(struct ocfs2_xattr_entry);
3208         memcpy(target + offset, (char *)xb_xh + offset, size);
3209
3210         /* Change the xe offset for all the xe because of the move. */
3211         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3212                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3213         for (i = 0; i < count; i++)
3214                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3215
3216         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3217              offset, size, off_change);
3218
3219         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3220              cmp_xe, swap_xe);
3221 }
3222
3223 /*
3224  * After we move xattr from block to index btree, we have to
3225  * update ocfs2_xattr_search to the new xe and base.
3226  *
3227  * When the entry is in xattr block, xattr_bh indicates the storage place.
3228  * While if the entry is in index b-tree, "bucket" indicates the
3229  * real place of the xattr.
3230  */
3231 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3232                                             struct ocfs2_xattr_search *xs,
3233                                             struct buffer_head *old_bh)
3234 {
3235         char *buf = old_bh->b_data;
3236         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3237         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3238         int i;
3239
3240         xs->header = bucket_xh(xs->bucket);
3241         xs->base = bucket_block(xs->bucket, 0);
3242         xs->end = xs->base + inode->i_sb->s_blocksize;
3243
3244         if (xs->not_found)
3245                 return;
3246
3247         i = xs->here - old_xh->xh_entries;
3248         xs->here = &xs->header->xh_entries[i];
3249 }
3250
3251 static int ocfs2_xattr_create_index_block(struct inode *inode,
3252                                           struct ocfs2_xattr_search *xs,
3253                                           struct ocfs2_xattr_set_ctxt *ctxt)
3254 {
3255         int ret;
3256         u32 bit_off, len;
3257         u64 blkno;
3258         handle_t *handle = ctxt->handle;
3259         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3260         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3261         struct buffer_head *xb_bh = xs->xattr_bh;
3262         struct ocfs2_xattr_block *xb =
3263                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3264         struct ocfs2_xattr_tree_root *xr;
3265         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3266
3267         mlog(0, "create xattr index block for %llu\n",
3268              (unsigned long long)xb_bh->b_blocknr);
3269
3270         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3271         BUG_ON(!xs->bucket);
3272
3273         /*
3274          * XXX:
3275          * We can use this lock for now, and maybe move to a dedicated mutex
3276          * if performance becomes a problem later.
3277          */
3278         down_write(&oi->ip_alloc_sem);
3279
3280         ret = ocfs2_journal_access(handle, inode, xb_bh,
3281                                    OCFS2_JOURNAL_ACCESS_WRITE);
3282         if (ret) {
3283                 mlog_errno(ret);
3284                 goto out;
3285         }
3286
3287         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3288                                      1, 1, &bit_off, &len);
3289         if (ret) {
3290                 mlog_errno(ret);
3291                 goto out;
3292         }
3293
3294         /*
3295          * The bucket may spread in many blocks, and
3296          * we will only touch the 1st block and the last block
3297          * in the whole bucket(one for entry and one for data).
3298          */
3299         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3300
3301         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3302              (unsigned long long)blkno);
3303
3304         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3305         if (ret) {
3306                 mlog_errno(ret);
3307                 goto out;
3308         }
3309
3310         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3311                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3312         if (ret) {
3313                 mlog_errno(ret);
3314                 goto out;
3315         }
3316
3317         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3318         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3319
3320         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3321
3322         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3323         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3324                offsetof(struct ocfs2_xattr_block, xb_attrs));
3325
3326         xr = &xb->xb_attrs.xb_root;
3327         xr->xt_clusters = cpu_to_le32(1);
3328         xr->xt_last_eb_blk = 0;
3329         xr->xt_list.l_tree_depth = 0;
3330         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3331         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3332
3333         xr->xt_list.l_recs[0].e_cpos = 0;
3334         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3335         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3336
3337         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3338
3339         ocfs2_journal_dirty(handle, xb_bh);
3340
3341 out:
3342         up_write(&oi->ip_alloc_sem);
3343
3344         return ret;
3345 }
3346
3347 static int cmp_xe_offset(const void *a, const void *b)
3348 {
3349         const struct ocfs2_xattr_entry *l = a, *r = b;
3350         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3351         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3352
3353         if (l_name_offset < r_name_offset)
3354                 return 1;
3355         if (l_name_offset > r_name_offset)
3356                 return -1;
3357         return 0;
3358 }
3359
3360 /*
3361  * defrag a xattr bucket if we find that the bucket has some
3362  * holes beteen name/value pairs.
3363  * We will move all the name/value pairs to the end of the bucket
3364  * so that we can spare some space for insertion.
3365  */
3366 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3367                                      handle_t *handle,
3368                                      struct ocfs2_xattr_bucket *bucket)
3369 {
3370         int ret, i;
3371         size_t end, offset, len, value_len;
3372         struct ocfs2_xattr_header *xh;
3373         char *entries, *buf, *bucket_buf = NULL;
3374         u64 blkno = bucket_blkno(bucket);
3375         u16 xh_free_start;
3376         size_t blocksize = inode->i_sb->s_blocksize;
3377         struct ocfs2_xattr_entry *xe;
3378
3379         /*
3380          * In order to make the operation more efficient and generic,
3381          * we copy all the blocks into a contiguous memory and do the
3382          * defragment there, so if anything is error, we will not touch
3383          * the real block.
3384          */
3385         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3386         if (!bucket_buf) {
3387                 ret = -EIO;
3388                 goto out;
3389         }
3390
3391         buf = bucket_buf;
3392         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3393                 memcpy(buf, bucket_block(bucket, i), blocksize);
3394
3395         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3396                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3397         if (ret < 0) {
3398                 mlog_errno(ret);
3399                 goto out;
3400         }
3401
3402         xh = (struct ocfs2_xattr_header *)bucket_buf;
3403         entries = (char *)xh->xh_entries;
3404         xh_free_start = le16_to_cpu(xh->xh_free_start);
3405
3406         mlog(0, "adjust xattr bucket in %llu, count = %u, "
3407              "xh_free_start = %u, xh_name_value_len = %u.\n",
3408              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3409              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3410
3411         /*
3412          * sort all the entries by their offset.
3413          * the largest will be the first, so that we can
3414          * move them to the end one by one.
3415          */
3416         sort(entries, le16_to_cpu(xh->xh_count),
3417              sizeof(struct ocfs2_xattr_entry),
3418              cmp_xe_offset, swap_xe);
3419
3420         /* Move all name/values to the end of the bucket. */
3421         xe = xh->xh_entries;
3422         end = OCFS2_XATTR_BUCKET_SIZE;
3423         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3424                 offset = le16_to_cpu(xe->xe_name_offset);
3425                 if (ocfs2_xattr_is_local(xe))
3426                         value_len = OCFS2_XATTR_SIZE(
3427                                         le64_to_cpu(xe->xe_value_size));
3428                 else
3429                         value_len = OCFS2_XATTR_ROOT_SIZE;
3430                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3431
3432                 /*
3433                  * We must make sure that the name/value pair
3434                  * exist in the same block. So adjust end to
3435                  * the previous block end if needed.
3436                  */
3437                 if (((end - len) / blocksize !=
3438                         (end - 1) / blocksize))
3439                         end = end - end % blocksize;
3440
3441                 if (end > offset + len) {
3442                         memmove(bucket_buf + end - len,
3443                                 bucket_buf + offset, len);
3444                         xe->xe_name_offset = cpu_to_le16(end - len);
3445                 }
3446
3447                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3448                                 "bucket %llu\n", (unsigned long long)blkno);
3449
3450                 end -= len;
3451         }
3452
3453         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3454                         "bucket %llu\n", (unsigned long long)blkno);
3455
3456         if (xh_free_start == end)
3457                 goto out;
3458
3459         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3460         xh->xh_free_start = cpu_to_le16(end);
3461
3462         /* sort the entries by their name_hash. */
3463         sort(entries, le16_to_cpu(xh->xh_count),
3464              sizeof(struct ocfs2_xattr_entry),
3465              cmp_xe, swap_xe);
3466
3467         buf = bucket_buf;
3468         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3469                 memcpy(bucket_block(bucket, i), buf, blocksize);
3470         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3471
3472 out:
3473         kfree(bucket_buf);
3474         return ret;
3475 }
3476
3477 /*
3478  * Move half nums of the xattr bucket in the previous cluster to this new
3479  * cluster. We only touch the last cluster of the previous extend record.
3480  *
3481  * first_bh is the first buffer_head of a series of bucket in the same
3482  * extent rec and header_bh is the header of one bucket in this cluster.
3483  * They will be updated if we move the data header_bh contains to the new
3484  * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3485  */
3486 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3487                                                handle_t *handle,
3488                                                struct buffer_head **first_bh,
3489                                                struct buffer_head **header_bh,
3490                                                u64 new_blkno,
3491                                                u64 prev_blkno,
3492                                                u32 num_clusters,
3493                                                u32 *first_hash)
3494 {
3495         int i, ret, credits;
3496         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3497         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3498         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3499         int blocksize = inode->i_sb->s_blocksize;
3500         struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3501         struct ocfs2_xattr_header *new_xh;
3502         struct ocfs2_xattr_header *xh =
3503                         (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3504
3505         BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3506         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3507
3508         prev_bh = *first_bh;
3509         get_bh(prev_bh);
3510         xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3511
3512         prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3513
3514         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3515              (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
3516
3517         /*
3518          * We need to update the 1st half of the new cluster and
3519          * 1 more for the update of the 1st bucket of the previous
3520          * extent record.
3521          */
3522         credits = bpc / 2 + 1 + handle->h_buffer_credits;
3523         ret = ocfs2_extend_trans(handle, credits);
3524         if (ret) {
3525                 mlog_errno(ret);
3526                 goto out;
3527         }
3528
3529         ret = ocfs2_journal_access(handle, inode, prev_bh,
3530                                    OCFS2_JOURNAL_ACCESS_WRITE);
3531         if (ret) {
3532                 mlog_errno(ret);
3533                 goto out;
3534         }
3535
3536         for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3537                 old_bh = new_bh = NULL;
3538                 new_bh = sb_getblk(inode->i_sb, new_blkno);
3539                 if (!new_bh) {
3540                         ret = -EIO;
3541                         mlog_errno(ret);
3542                         goto out;
3543                 }
3544
3545                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3546
3547                 ret = ocfs2_journal_access(handle, inode, new_bh,
3548                                            OCFS2_JOURNAL_ACCESS_CREATE);
3549                 if (ret < 0) {
3550                         mlog_errno(ret);
3551                         brelse(new_bh);
3552                         goto out;
3553                 }
3554
3555                 ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
3556                 if (ret < 0) {
3557                         mlog_errno(ret);
3558                         brelse(new_bh);
3559                         goto out;
3560                 }
3561
3562                 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3563
3564                 if (i == 0) {
3565                         new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3566                         new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3567
3568                         if (first_hash)
3569                                 *first_hash = le32_to_cpu(
3570                                         new_xh->xh_entries[0].xe_name_hash);
3571                         new_first_bh = new_bh;
3572                         get_bh(new_first_bh);
3573                 }
3574
3575                 ocfs2_journal_dirty(handle, new_bh);
3576
3577                 if (*header_bh == old_bh) {
3578                         brelse(*header_bh);
3579                         *header_bh = new_bh;
3580                         get_bh(*header_bh);
3581
3582                         brelse(*first_bh);
3583                         *first_bh = new_first_bh;
3584                         get_bh(*first_bh);
3585                 }
3586                 brelse(new_bh);
3587                 brelse(old_bh);
3588         }
3589
3590         le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3591
3592         ocfs2_journal_dirty(handle, prev_bh);
3593 out:
3594         brelse(prev_bh);
3595         brelse(new_first_bh);
3596         return ret;
3597 }
3598
3599 /*
3600  * Find the suitable pos when we divide a bucket into 2.
3601  * We have to make sure the xattrs with the same hash value exist
3602  * in the same bucket.
3603  *
3604  * If this ocfs2_xattr_header covers more than one hash value, find a
3605  * place where the hash value changes.  Try to find the most even split.
3606  * The most common case is that all entries have different hash values,
3607  * and the first check we make will find a place to split.
3608  */
3609 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3610 {
3611         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3612         int count = le16_to_cpu(xh->xh_count);
3613         int delta, middle = count / 2;
3614
3615         /*
3616          * We start at the middle.  Each step gets farther away in both
3617          * directions.  We therefore hit the change in hash value
3618          * nearest to the middle.  Note that this loop does not execute for
3619          * count < 2.
3620          */
3621         for (delta = 0; delta < middle; delta++) {
3622                 /* Let's check delta earlier than middle */
3623                 if (cmp_xe(&entries[middle - delta - 1],
3624                            &entries[middle - delta]))
3625                         return middle - delta;
3626
3627                 /* For even counts, don't walk off the end */
3628                 if ((middle + delta + 1) == count)
3629                         continue;
3630
3631                 /* Now try delta past middle */
3632                 if (cmp_xe(&entries[middle + delta],
3633                            &entries[middle + delta + 1]))
3634                         return middle + delta + 1;
3635         }
3636
3637         /* Every entry had the same hash */
3638         return count;
3639 }
3640
3641 /*
3642  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3643  * first_hash will record the 1st hash of the new bucket.
3644  *
3645  * Normally half of the xattrs will be moved.  But we have to make
3646  * sure that the xattrs with the same hash value are stored in the
3647  * same bucket. If all the xattrs in this bucket have the same hash
3648  * value, the new bucket will be initialized as an empty one and the
3649  * first_hash will be initialized as (hash_value+1).
3650  */
3651 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3652                                     handle_t *handle,
3653                                     u64 blk,
3654                                     u64 new_blk,
3655                                     u32 *first_hash,
3656                                     int new_bucket_head)
3657 {
3658         int ret, i;
3659         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3660         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3661         struct ocfs2_xattr_header *xh;
3662         struct ocfs2_xattr_entry *xe;
3663         int blocksize = inode->i_sb->s_blocksize;
3664
3665         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3666              (unsigned long long)blk, (unsigned long long)new_blk);
3667
3668         s_bucket = ocfs2_xattr_bucket_new(inode);
3669         t_bucket = ocfs2_xattr_bucket_new(inode);
3670         if (!s_bucket || !t_bucket) {
3671                 ret = -ENOMEM;
3672                 mlog_errno(ret);
3673                 goto out;
3674         }
3675
3676         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3677         if (ret) {
3678                 mlog_errno(ret);
3679                 goto out;
3680         }
3681
3682         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3683                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3684         if (ret) {
3685                 mlog_errno(ret);
3686                 goto out;
3687         }
3688
3689         /*
3690          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3691          * there's no need to read it.
3692          */
3693         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3694         if (ret) {
3695                 mlog_errno(ret);
3696                 goto out;
3697         }
3698
3699         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3700                                                 new_bucket_head ?
3701                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3702                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3703         if (ret) {
3704                 mlog_errno(ret);
3705                 goto out;
3706         }
3707
3708         xh = bucket_xh(s_bucket);
3709         count = le16_to_cpu(xh->xh_count);
3710         start = ocfs2_xattr_find_divide_pos(xh);
3711
3712         if (start == count) {
3713                 xe = &xh->xh_entries[start-1];
3714
3715                 /*
3716                  * initialized a new empty bucket here.
3717                  * The hash value is set as one larger than
3718                  * that of the last entry in the previous bucket.
3719                  */
3720                 for (i = 0; i < t_bucket->bu_blocks; i++)
3721                         memset(bucket_block(t_bucket, i), 0, blocksize);
3722
3723                 xh = bucket_xh(t_bucket);
3724                 xh->xh_free_start = cpu_to_le16(blocksize);
3725                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3726                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3727
3728                 goto set_num_buckets;
3729         }
3730
3731         /* copy the whole bucket to the new first. */
3732         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3733
3734         /* update the new bucket. */
3735         xh = bucket_xh(t_bucket);
3736
3737         /*
3738          * Calculate the total name/value len and xh_free_start for
3739          * the old bucket first.
3740          */
3741         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3742         name_value_len = 0;
3743         for (i = 0; i < start; i++) {
3744                 xe = &xh->xh_entries[i];
3745                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3746                 if (ocfs2_xattr_is_local(xe))
3747                         xe_len +=
3748                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3749                 else
3750                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3751                 name_value_len += xe_len;
3752                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3753                         name_offset = le16_to_cpu(xe->xe_name_offset);
3754         }
3755
3756         /*
3757          * Now begin the modification to the new bucket.
3758          *
3759          * In the new bucket, We just move the xattr entry to the beginning
3760          * and don't touch the name/value. So there will be some holes in the
3761          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3762          * called.
3763          */
3764         xe = &xh->xh_entries[start];
3765         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3766         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3767              (int)((char *)xe - (char *)xh),
3768              (int)((char *)xh->xh_entries - (char *)xh));
3769         memmove((char *)xh->xh_entries, (char *)xe, len);
3770         xe = &xh->xh_entries[count - start];
3771         len = sizeof(struct ocfs2_xattr_entry) * start;
3772         memset((char *)xe, 0, len);
3773
3774         le16_add_cpu(&xh->xh_count, -start);
3775         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3776
3777         /* Calculate xh_free_start for the new bucket. */
3778         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3779         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3780                 xe = &xh->xh_entries[i];
3781                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3782                 if (ocfs2_xattr_is_local(xe))
3783                         xe_len +=
3784                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3785                 else
3786                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3787                 if (le16_to_cpu(xe->xe_name_offset) <
3788                     le16_to_cpu(xh->xh_free_start))
3789                         xh->xh_free_start = xe->xe_name_offset;
3790         }
3791
3792 set_num_buckets:
3793         /* set xh->xh_num_buckets for the new xh. */
3794         if (new_bucket_head)
3795                 xh->xh_num_buckets = cpu_to_le16(1);
3796         else
3797                 xh->xh_num_buckets = 0;
3798
3799         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3800
3801         /* store the first_hash of the new bucket. */
3802         if (first_hash)
3803                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3804
3805         /*
3806          * Now only update the 1st block of the old bucket.  If we
3807          * just added a new empty bucket, there is no need to modify
3808          * it.
3809          */
3810         if (start == count)
3811                 goto out;
3812
3813         xh = bucket_xh(s_bucket);
3814         memset(&xh->xh_entries[start], 0,
3815                sizeof(struct ocfs2_xattr_entry) * (count - start));
3816         xh->xh_count = cpu_to_le16(start);
3817         xh->xh_free_start = cpu_to_le16(name_offset);
3818         xh->xh_name_value_len = cpu_to_le16(name_value_len);
3819
3820         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3821
3822 out:
3823         ocfs2_xattr_bucket_free(s_bucket);
3824         ocfs2_xattr_bucket_free(t_bucket);
3825
3826         return ret;
3827 }
3828
3829 /*
3830  * Copy xattr from one bucket to another bucket.
3831  *
3832  * The caller must make sure that the journal transaction
3833  * has enough space for journaling.
3834  */
3835 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3836                                  handle_t *handle,
3837                                  u64 s_blkno,
3838                                  u64 t_blkno,
3839                                  int t_is_new)
3840 {
3841         int ret;
3842         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3843
3844         BUG_ON(s_blkno == t_blkno);
3845
3846         mlog(0, "cp bucket %llu to %llu, target is %d\n",
3847              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3848              t_is_new);
3849
3850         s_bucket = ocfs2_xattr_bucket_new(inode);
3851         t_bucket = ocfs2_xattr_bucket_new(inode);
3852         if (!s_bucket || !t_bucket) {
3853                 ret = -ENOMEM;
3854                 mlog_errno(ret);
3855                 goto out;
3856         }
3857   
3858         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3859         if (ret)
3860                 goto out;
3861
3862         /*
3863          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
3864          * there's no need to read it.
3865          */
3866         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3867         if (ret)
3868                 goto out;
3869
3870         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3871                                                 t_is_new ?
3872                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3873                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3874         if (ret)
3875                 goto out;
3876
3877         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3878         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3879
3880 out:
3881         ocfs2_xattr_bucket_free(t_bucket);
3882         ocfs2_xattr_bucket_free(s_bucket);
3883
3884         return ret;
3885 }
3886
3887 /*
3888  * Copy one xattr cluster from src_blk to to_blk.
3889  * The to_blk will become the first bucket header of the cluster, so its
3890  * xh_num_buckets will be initialized as the bucket num in the cluster.
3891  */
3892 static int ocfs2_cp_xattr_cluster(struct inode *inode,
3893                                   handle_t *handle,
3894                                   struct buffer_head *first_bh,
3895                                   u64 src_blk,
3896                                   u64 to_blk,
3897                                   u32 *first_hash)
3898 {
3899         int i, ret, credits;
3900         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3901         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3902         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3903         struct buffer_head *bh = NULL;
3904         struct ocfs2_xattr_header *xh;
3905         u64 to_blk_start = to_blk;
3906
3907         mlog(0, "cp xattrs from cluster %llu to %llu\n",
3908              (unsigned long long)src_blk, (unsigned long long)to_blk);
3909
3910         /*
3911          * We need to update the new cluster and 1 more for the update of
3912          * the 1st bucket of the previous extent rec.
3913          */
3914         credits = bpc + 1 + handle->h_buffer_credits;
3915         ret = ocfs2_extend_trans(handle, credits);
3916         if (ret) {
3917                 mlog_errno(ret);
3918                 goto out;
3919         }
3920
3921         ret = ocfs2_journal_access(handle, inode, first_bh,
3922                                    OCFS2_JOURNAL_ACCESS_WRITE);
3923         if (ret) {
3924                 mlog_errno(ret);
3925                 goto out;
3926         }
3927
3928         for (i = 0; i < num_buckets; i++) {
3929                 ret = ocfs2_cp_xattr_bucket(inode, handle,
3930                                             src_blk, to_blk, 1);
3931                 if (ret) {
3932                         mlog_errno(ret);
3933                         goto out;
3934                 }
3935
3936                 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3937                 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3938         }
3939
3940         /* update the old bucket header. */
3941         xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3942         le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3943
3944         ocfs2_journal_dirty(handle, first_bh);
3945
3946         /* update the new bucket header. */
3947         ret = ocfs2_read_block(inode, to_blk_start, &bh);
3948         if (ret < 0) {
3949                 mlog_errno(ret);
3950                 goto out;
3951         }
3952
3953         ret = ocfs2_journal_access(handle, inode, bh,
3954                                    OCFS2_JOURNAL_ACCESS_WRITE);
3955         if (ret) {
3956                 mlog_errno(ret);
3957                 goto out;
3958         }
3959
3960         xh = (struct ocfs2_xattr_header *)bh->b_data;
3961         xh->xh_num_buckets = cpu_to_le16(num_buckets);
3962
3963         ocfs2_journal_dirty(handle, bh);
3964
3965         if (first_hash)
3966                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3967 out:
3968         brelse(bh);
3969         return ret;
3970 }
3971
3972 /*
3973  * Move some xattrs in this cluster to the new cluster.
3974  * This function should only be called when bucket size == cluster size.
3975  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3976  */
3977 static int ocfs2_divide_xattr_cluster(struct inode *inode,
3978                                       handle_t *handle,
3979                                       u64 prev_blk,
3980                                       u64 new_blk,
3981                                       u32 *first_hash)
3982 {
3983         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3984         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
3985
3986         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3987
3988         ret = ocfs2_extend_trans(handle, credits);
3989         if (ret) {
3990                 mlog_errno(ret);
3991                 return ret;
3992         }
3993
3994         /* Move half of the xattr in start_blk to the next bucket. */
3995         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3996                                           new_blk, first_hash, 1);
3997 }
3998
3999 /*
4000  * Move some xattrs from the old cluster to the new one since they are not
4001  * contiguous in ocfs2 xattr tree.
4002  *
4003  * new_blk starts a new separate cluster, and we will move some xattrs from
4004  * prev_blk to it. v_start will be set as the first name hash value in this
4005  * new cluster so that it can be used as e_cpos during tree insertion and
4006  * don't collide with our original b-tree operations. first_bh and header_bh
4007  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4008  * to extend the insert bucket.
4009  *
4010  * The problem is how much xattr should we move to the new one and when should
4011  * we update first_bh and header_bh?
4012  * 1. If cluster size > bucket size, that means the previous cluster has more
4013  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4014  *    update the first_bh and header_bh if the insert bucket has been moved
4015  *    to the new cluster.
4016  * 2. If cluster_size == bucket_size:
4017  *    a) If the previous extent rec has more than one cluster and the insert
4018  *       place isn't in the last cluster, copy the entire last cluster to the
4019  *       new one. This time, we don't need to upate the first_bh and header_bh
4020  *       since they will not be moved into the new cluster.
4021  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4022  *       the new one. And we set the extend flag to zero if the insert place is
4023  *       moved into the new allocated cluster since no extend is needed.
4024  */
4025 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4026                                             handle_t *handle,
4027                                             struct buffer_head **first_bh,
4028                                             struct buffer_head **header_bh,
4029                                             u64 new_blk,
4030                                             u64 prev_blk,
4031                                             u32 prev_clusters,
4032                                             u32 *v_start,
4033                                             int *extend)
4034 {
4035         int ret = 0;
4036         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4037
4038         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4039              (unsigned long long)prev_blk, prev_clusters,
4040              (unsigned long long)new_blk);
4041
4042         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
4043                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4044                                                           handle,
4045                                                           first_bh,
4046                                                           header_bh,
4047                                                           new_blk,
4048                                                           prev_blk,
4049                                                           prev_clusters,
4050                                                           v_start);
4051         else {
4052                 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
4053
4054                 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
4055                         ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
4056                                                      last_blk, new_blk,
4057                                                      v_start);
4058                 else {
4059                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4060                                                          last_blk, new_blk,
4061                                                          v_start);
4062
4063                         if ((*header_bh)->b_blocknr == last_blk && extend)
4064                                 *extend = 0;
4065                 }
4066         }
4067
4068         return ret;
4069 }
4070
4071 /*
4072  * Add a new cluster for xattr storage.
4073  *
4074  * If the new cluster is contiguous with the previous one, it will be
4075  * appended to the same extent record, and num_clusters will be updated.
4076  * If not, we will insert a new extent for it and move some xattrs in
4077  * the last cluster into the new allocated one.
4078  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4079  * lose the benefits of hashing because we'll have to search large leaves.
4080  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4081  * if it's bigger).
4082  *
4083  * first_bh is the first block of the previous extent rec and header_bh
4084  * indicates the bucket we will insert the new xattrs. They will be updated
4085  * when the header_bh is moved into the new cluster.
4086  */
4087 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4088                                        struct buffer_head *root_bh,
4089                                        struct buffer_head **first_bh,
4090                                        struct buffer_head **header_bh,
4091                                        u32 *num_clusters,
4092                                        u32 prev_cpos,
4093                                        u64 prev_blkno,
4094                                        int *extend,
4095                                        struct ocfs2_xattr_set_ctxt *ctxt)
4096 {
4097         int ret;
4098         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4099         u32 prev_clusters = *num_clusters;
4100         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4101         u64 block;
4102         handle_t *handle = ctxt->handle;
4103         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4104         struct ocfs2_extent_tree et;
4105
4106         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4107              "previous xattr blkno = %llu\n",
4108              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4109              prev_cpos, (unsigned long long)prev_blkno);
4110
4111         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4112
4113         ret = ocfs2_journal_access(handle, inode, root_bh,
4114                                    OCFS2_JOURNAL_ACCESS_WRITE);
4115         if (ret < 0) {
4116                 mlog_errno(ret);
4117                 goto leave;
4118         }
4119
4120         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4121                                      clusters_to_add, &bit_off, &num_bits);
4122         if (ret < 0) {
4123                 if (ret != -ENOSPC)
4124                         mlog_errno(ret);
4125                 goto leave;
4126         }
4127
4128         BUG_ON(num_bits > clusters_to_add);
4129
4130         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4131         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4132              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4133
4134         if (prev_blkno + prev_clusters * bpc == block &&
4135             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4136              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4137                 /*
4138                  * If this cluster is contiguous with the old one and
4139                  * adding this new cluster, we don't surpass the limit of
4140                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4141                  * initialized and used like other buckets in the previous
4142                  * cluster.
4143                  * So add it as a contiguous one. The caller will handle
4144                  * its init process.
4145                  */
4146                 v_start = prev_cpos + prev_clusters;
4147                 *num_clusters = prev_clusters + num_bits;
4148                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4149                      num_bits);
4150         } else {
4151                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4152                                                        handle,
4153                                                        first_bh,
4154                                                        header_bh,
4155                                                        block,
4156                                                        prev_blkno,
4157                                                        prev_clusters,
4158                                                        &v_start,
4159                                                        extend);
4160                 if (ret) {
4161                         mlog_errno(ret);
4162                         goto leave;
4163                 }
4164         }
4165
4166         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4167              num_bits, (unsigned long long)block, v_start);
4168         ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
4169                                   num_bits, 0, ctxt->meta_ac);
4170         if (ret < 0) {
4171                 mlog_errno(ret);
4172                 goto leave;
4173         }
4174
4175         ret = ocfs2_journal_dirty(handle, root_bh);
4176         if (ret < 0)
4177                 mlog_errno(ret);
4178
4179 leave:
4180         return ret;
4181 }
4182
4183 /*
4184  * Extend a new xattr bucket and move xattrs to the end one by one until
4185  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
4186  */
4187 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4188                                      handle_t *handle,
4189                                      struct buffer_head *first_bh,
4190                                      struct buffer_head *start_bh,
4191                                      u32 num_clusters)
4192 {
4193         int ret, credits;
4194         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4195         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4196         u64 start_blk = start_bh->b_blocknr, end_blk;
4197         u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
4198         struct ocfs2_xattr_header *first_xh =
4199                                 (struct ocfs2_xattr_header *)first_bh->b_data;
4200         u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
4201
4202         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4203              "from %llu, len = %u\n", (unsigned long long)start_blk,
4204              (unsigned long long)first_bh->b_blocknr, num_clusters);
4205
4206         BUG_ON(bucket >= num_buckets);
4207
4208         end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
4209
4210         /*
4211          * We will touch all the buckets after the start_bh(include it).
4212          * Then we add one more bucket.
4213          */
4214         credits = end_blk - start_blk + 3 * blk_per_bucket + 1 +
4215                   handle->h_buffer_credits;
4216         ret = ocfs2_extend_trans(handle, credits);
4217         if (ret) {
4218                 mlog_errno(ret);
4219                 goto out;
4220         }
4221
4222         ret = ocfs2_journal_access(handle, inode, first_bh,
4223                                    OCFS2_JOURNAL_ACCESS_WRITE);
4224         if (ret) {
4225                 mlog_errno(ret);
4226                 goto out;
4227         }
4228
4229         while (end_blk != start_blk) {
4230                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4231                                             end_blk + blk_per_bucket, 0);
4232                 if (ret)
4233                         goto out;
4234                 end_blk -= blk_per_bucket;
4235         }
4236
4237         /* Move half of the xattr in start_blk to the next bucket. */
4238         ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
4239                                         start_blk + blk_per_bucket, NULL, 0);
4240
4241         le16_add_cpu(&first_xh->xh_num_buckets, 1);
4242         ocfs2_journal_dirty(handle, first_bh);
4243
4244 out:
4245         return ret;
4246 }
4247
4248 /*
4249  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
4250  * xb_bh is the ocfs2_xattr_block.
4251  * We will move all the buckets starting from header_bh to the next place. As
4252  * for this one, half num of its xattrs will be moved to the next one.
4253  *
4254  * We will allocate a new cluster if current cluster is full and adjust
4255  * header_bh and first_bh if the insert place is moved to the new cluster.
4256  */
4257 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4258                                       struct buffer_head *xb_bh,
4259                                       struct buffer_head *header_bh,
4260                                       struct ocfs2_xattr_set_ctxt *ctxt)
4261 {
4262         struct ocfs2_xattr_header *first_xh = NULL;
4263         struct buffer_head *first_bh = NULL;
4264         struct ocfs2_xattr_block *xb =
4265                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4266         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4267         struct ocfs2_extent_list *el = &xb_root->xt_list;
4268         struct ocfs2_xattr_header *xh =
4269                         (struct ocfs2_xattr_header *)header_bh->b_data;
4270         u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4271         struct super_block *sb = inode->i_sb;
4272         struct ocfs2_super *osb = OCFS2_SB(sb);
4273         int ret, num_buckets, extend = 1;
4274         u64 p_blkno;
4275         u32 e_cpos, num_clusters;
4276
4277         mlog(0, "Add new xattr bucket starting form %llu\n",
4278              (unsigned long long)header_bh->b_blocknr);
4279
4280         /*
4281          * Add refrence for header_bh here because it may be
4282          * changed in ocfs2_add_new_xattr_cluster and we need
4283          * to free it in the end.
4284          */
4285         get_bh(header_bh);
4286
4287         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4288                                   &num_clusters, el);
4289         if (ret) {
4290                 mlog_errno(ret);
4291                 goto out;
4292         }
4293
4294         ret = ocfs2_read_block(inode, p_blkno, &first_bh);
4295         if (ret) {
4296                 mlog_errno(ret);
4297                 goto out;
4298         }
4299
4300         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4301         first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4302
4303         if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
4304                 ret = ocfs2_add_new_xattr_cluster(inode,
4305                                                   xb_bh,
4306                                                   &first_bh,
4307                                                   &header_bh,
4308                                                   &num_clusters,
4309                                                   e_cpos,
4310                                                   p_blkno,
4311                                                   &extend,
4312                                                   ctxt);
4313                 if (ret) {
4314                         mlog_errno(ret);
4315                         goto out;
4316                 }
4317         }
4318
4319         if (extend)
4320                 ret = ocfs2_extend_xattr_bucket(inode,
4321                                                 ctxt->handle,
4322                                                 first_bh,
4323                                                 header_bh,
4324                                                 num_clusters);
4325         if (ret)
4326                 mlog_errno(ret);
4327 out:
4328         brelse(first_bh);
4329         brelse(header_bh);
4330         return ret;
4331 }
4332
4333 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4334                                         struct ocfs2_xattr_bucket *bucket,
4335                                         int offs)
4336 {
4337         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4338
4339         offs = offs % inode->i_sb->s_blocksize;
4340         return bucket_block(bucket, block_off) + offs;
4341 }
4342
4343 /*
4344  * Handle the normal xattr set, including replace, delete and new.
4345  *
4346  * Note: "local" indicates the real data's locality. So we can't
4347  * just its bucket locality by its length.
4348  */
4349 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4350                                          struct ocfs2_xattr_info *xi,
4351                                          struct ocfs2_xattr_search *xs,
4352                                          u32 name_hash,
4353                                          int local)
4354 {
4355         struct ocfs2_xattr_entry *last, *xe;
4356         int name_len = strlen(xi->name);
4357         struct ocfs2_xattr_header *xh = xs->header;
4358         u16 count = le16_to_cpu(xh->xh_count), start;
4359         size_t blocksize = inode->i_sb->s_blocksize;
4360         char *val;
4361         size_t offs, size, new_size;
4362
4363         last = &xh->xh_entries[count];
4364         if (!xs->not_found) {
4365                 xe = xs->here;
4366                 offs = le16_to_cpu(xe->xe_name_offset);
4367                 if (ocfs2_xattr_is_local(xe))
4368                         size = OCFS2_XATTR_SIZE(name_len) +
4369                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4370                 else
4371                         size = OCFS2_XATTR_SIZE(name_len) +
4372                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4373
4374                 /*
4375                  * If the new value will be stored outside, xi->value has been
4376                  * initalized as an empty ocfs2_xattr_value_root, and the same
4377                  * goes with xi->value_len, so we can set new_size safely here.
4378                  * See ocfs2_xattr_set_in_bucket.
4379                  */
4380                 new_size = OCFS2_XATTR_SIZE(name_len) +
4381                            OCFS2_XATTR_SIZE(xi->value_len);
4382
4383                 le16_add_cpu(&xh->xh_name_value_len, -size);
4384                 if (xi->value) {
4385                         if (new_size > size)
4386                                 goto set_new_name_value;
4387
4388                         /* Now replace the old value with new one. */
4389                         if (local)
4390                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
4391                         else
4392                                 xe->xe_value_size = 0;
4393
4394                         val = ocfs2_xattr_bucket_get_val(inode,
4395                                                          xs->bucket, offs);
4396                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4397                                size - OCFS2_XATTR_SIZE(name_len));
4398                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4399                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4400                                        xi->value, xi->value_len);
4401
4402                         le16_add_cpu(&xh->xh_name_value_len, new_size);
4403                         ocfs2_xattr_set_local(xe, local);
4404                         return;
4405                 } else {
4406                         /*
4407                          * Remove the old entry if there is more than one.
4408                          * We don't remove the last entry so that we can
4409                          * use it to indicate the hash value of the empty
4410                          * bucket.
4411                          */
4412                         last -= 1;
4413                         le16_add_cpu(&xh->xh_count, -1);
4414                         if (xh->xh_count) {
4415                                 memmove(xe, xe + 1,
4416                                         (void *)last - (void *)xe);
4417                                 memset(last, 0,
4418                                        sizeof(struct ocfs2_xattr_entry));
4419                         } else
4420                                 xh->xh_free_start =
4421                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4422
4423                         return;
4424                 }
4425         } else {
4426                 /* find a new entry for insert. */
4427                 int low = 0, high = count - 1, tmp;
4428                 struct ocfs2_xattr_entry *tmp_xe;
4429
4430                 while (low <= high && count) {
4431                         tmp = (low + high) / 2;
4432                         tmp_xe = &xh->xh_entries[tmp];
4433
4434                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4435                                 low = tmp + 1;
4436                         else if (name_hash <
4437                                  le32_to_cpu(tmp_xe->xe_name_hash))
4438                                 high = tmp - 1;
4439                         else {
4440                                 low = tmp;
4441                                 break;
4442                         }
4443                 }
4444
4445                 xe = &xh->xh_entries[low];
4446                 if (low != count)
4447                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4448
4449                 le16_add_cpu(&xh->xh_count, 1);
4450                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4451                 xe->xe_name_hash = cpu_to_le32(name_hash);
4452                 xe->xe_name_len = name_len;
4453                 ocfs2_xattr_set_type(xe, xi->name_index);
4454         }
4455
4456 set_new_name_value:
4457         /* Insert the new name+value. */
4458         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4459
4460         /*
4461          * We must make sure that the name/value pair
4462          * exists in the same block.
4463          */
4464         offs = le16_to_cpu(xh->xh_free_start);
4465         start = offs - size;
4466
4467         if (start >> inode->i_sb->s_blocksize_bits !=
4468             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4469                 offs = offs - offs % blocksize;
4470                 xh->xh_free_start = cpu_to_le16(offs);
4471         }
4472
4473         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4474         xe->xe_name_offset = cpu_to_le16(offs - size);
4475
4476         memset(val, 0, size);
4477         memcpy(val, xi->name, name_len);
4478         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4479
4480         xe->xe_value_size = cpu_to_le64(xi->value_len);
4481         ocfs2_xattr_set_local(xe, local);
4482         xs->here = xe;
4483         le16_add_cpu(&xh->xh_free_start, -size);
4484         le16_add_cpu(&xh->xh_name_value_len, size);
4485
4486         return;
4487 }
4488
4489 /*
4490  * Set the xattr entry in the specified bucket.
4491  * The bucket is indicated by xs->bucket and it should have the enough
4492  * space for the xattr insertion.
4493  */
4494 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4495                                            handle_t *handle,
4496                                            struct ocfs2_xattr_info *xi,
4497                                            struct ocfs2_xattr_search *xs,
4498                                            u32 name_hash,
4499                                            int local)
4500 {
4501         int ret;
4502         u64 blkno;
4503
4504         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4505              (unsigned long)xi->value_len, xi->name_index,
4506              (unsigned long long)bucket_blkno(xs->bucket));
4507
4508         if (!xs->bucket->bu_bhs[1]) {
4509                 blkno = bucket_blkno(xs->bucket);
4510                 ocfs2_xattr_bucket_relse(xs->bucket);
4511                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4512                 if (ret) {
4513                         mlog_errno(ret);
4514                         goto out;
4515                 }
4516         }
4517
4518         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4519                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4520         if (ret < 0) {
4521                 mlog_errno(ret);
4522                 goto out;
4523         }
4524
4525         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4526         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4527
4528 out:
4529         return ret;
4530 }
4531
4532 static int ocfs2_xattr_value_update_size(struct inode *inode,
4533                                          handle_t *handle,
4534                                          struct buffer_head *xe_bh,
4535                                          struct ocfs2_xattr_entry *xe,
4536                                          u64 new_size)
4537 {
4538         int ret;
4539
4540         ret = ocfs2_journal_access(handle, inode, xe_bh,
4541                                    OCFS2_JOURNAL_ACCESS_WRITE);
4542         if (ret < 0) {
4543                 mlog_errno(ret);
4544                 goto out;
4545         }
4546
4547         xe->xe_value_size = cpu_to_le64(new_size);
4548
4549         ret = ocfs2_journal_dirty(handle, xe_bh);
4550         if (ret < 0)
4551                 mlog_errno(ret);
4552
4553 out:
4554         return ret;
4555 }
4556
4557 /*
4558  * Truncate the specified xe_off entry in xattr bucket.
4559  * bucket is indicated by header_bh and len is the new length.
4560  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4561  *
4562  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4563  */
4564 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4565                                              struct buffer_head *header_bh,
4566                                              int xe_off,
4567                                              int len,
4568                                              struct ocfs2_xattr_set_ctxt *ctxt)
4569 {
4570         int ret, offset;
4571         u64 value_blk;
4572         struct buffer_head *value_bh = NULL;
4573         struct ocfs2_xattr_value_root *xv;
4574         struct ocfs2_xattr_entry *xe;
4575         struct ocfs2_xattr_header *xh =
4576                         (struct ocfs2_xattr_header *)header_bh->b_data;
4577         size_t blocksize = inode->i_sb->s_blocksize;
4578
4579         xe = &xh->xh_entries[xe_off];
4580
4581         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4582
4583         offset = le16_to_cpu(xe->xe_name_offset) +
4584                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4585
4586         value_blk = offset / blocksize;
4587
4588         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4589         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4590         value_blk += header_bh->b_blocknr;
4591
4592         ret = ocfs2_read_block(inode, value_blk, &value_bh);
4593         if (ret) {
4594                 mlog_errno(ret);
4595                 goto out;
4596         }
4597
4598         xv = (struct ocfs2_xattr_value_root *)
4599                 (value_bh->b_data + offset % blocksize);
4600
4601         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4602              xe_off, (unsigned long long)header_bh->b_blocknr, len);
4603         ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
4604         if (ret) {
4605                 mlog_errno(ret);
4606                 goto out;
4607         }
4608
4609         ret = ocfs2_xattr_value_update_size(inode, ctxt->handle,
4610                                             header_bh, xe, len);
4611         if (ret) {
4612                 mlog_errno(ret);
4613                 goto out;
4614         }
4615
4616 out:
4617         brelse(value_bh);
4618         return ret;
4619 }
4620
4621 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4622                                         struct ocfs2_xattr_search *xs,
4623                                         int len,
4624                                         struct ocfs2_xattr_set_ctxt *ctxt)
4625 {
4626         int ret, offset;
4627         struct ocfs2_xattr_entry *xe = xs->here;
4628         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4629
4630         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4631
4632         offset = xe - xh->xh_entries;
4633         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
4634                                                 offset, len, ctxt);
4635         if (ret)
4636                 mlog_errno(ret);
4637
4638         return ret;
4639 }
4640
4641 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4642                                                 handle_t *handle,
4643                                                 struct ocfs2_xattr_search *xs,
4644                                                 char *val,
4645                                                 int value_len)
4646 {
4647         int offset;
4648         struct ocfs2_xattr_value_root *xv;
4649         struct ocfs2_xattr_entry *xe = xs->here;
4650
4651         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4652
4653         offset = le16_to_cpu(xe->xe_name_offset) +
4654                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4655
4656         xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4657
4658         return __ocfs2_xattr_set_value_outside(inode, handle,
4659                                                xv, val, value_len);
4660 }
4661
4662 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4663                                   struct buffer_head *root_bh,
4664                                   u64 blkno,
4665                                   u32 cpos,
4666                                   u32 len)
4667 {
4668         int ret;
4669         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4670         struct inode *tl_inode = osb->osb_tl_inode;
4671         handle_t *handle;
4672         struct ocfs2_xattr_block *xb =
4673                         (struct ocfs2_xattr_block *)root_bh->b_data;
4674         struct ocfs2_alloc_context *meta_ac = NULL;
4675         struct ocfs2_cached_dealloc_ctxt dealloc;
4676         struct ocfs2_extent_tree et;
4677
4678         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4679
4680         ocfs2_init_dealloc_ctxt(&dealloc);
4681
4682         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4683              cpos, len, (unsigned long long)blkno);
4684
4685         ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4686
4687         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4688         if (ret) {
4689                 mlog_errno(ret);
4690                 return ret;
4691         }
4692
4693         mutex_lock(&tl_inode->i_mutex);
4694
4695         if (ocfs2_truncate_log_needs_flush(osb)) {
4696                 ret = __ocfs2_flush_truncate_log(osb);
4697                 if (ret < 0) {
4698                         mlog_errno(ret);
4699                         goto out;
4700                 }
4701         }
4702
4703         handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4704         if (IS_ERR(handle)) {
4705                 ret = -ENOMEM;
4706                 mlog_errno(ret);
4707                 goto out;
4708         }
4709
4710         ret = ocfs2_journal_access(handle, inode, root_bh,
4711                                    OCFS2_JOURNAL_ACCESS_WRITE);
4712         if (ret) {
4713                 mlog_errno(ret);
4714                 goto out_commit;
4715         }
4716
4717         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4718                                   &dealloc);
4719         if (ret) {
4720                 mlog_errno(ret);
4721                 goto out_commit;
4722         }
4723
4724         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4725
4726         ret = ocfs2_journal_dirty(handle, root_bh);
4727         if (ret) {
4728                 mlog_errno(ret);
4729                 goto out_commit;
4730         }
4731
4732         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4733         if (ret)
4734                 mlog_errno(ret);
4735
4736 out_commit:
4737         ocfs2_commit_trans(osb, handle);
4738 out:
4739         ocfs2_schedule_truncate_log_flush(osb, 1);
4740
4741         mutex_unlock(&tl_inode->i_mutex);
4742
4743         if (meta_ac)
4744                 ocfs2_free_alloc_context(meta_ac);
4745
4746         ocfs2_run_deallocs(osb, &dealloc);
4747
4748         return ret;
4749 }
4750
4751 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4752                                          handle_t *handle,
4753                                          struct ocfs2_xattr_search *xs)
4754 {
4755         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4756         struct ocfs2_xattr_entry *last = &xh->xh_entries[
4757                                                 le16_to_cpu(xh->xh_count) - 1];
4758         int ret = 0;
4759
4760         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4761                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4762         if (ret) {
4763                 mlog_errno(ret);
4764                 return;
4765         }
4766
4767         /* Remove the old entry. */
4768         memmove(xs->here, xs->here + 1,
4769                 (void *)last - (void *)xs->here);
4770         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4771         le16_add_cpu(&xh->xh_count, -1);
4772
4773         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4774 }
4775
4776 /*
4777  * Set the xattr name/value in the bucket specified in xs.
4778  *
4779  * As the new value in xi may be stored in the bucket or in an outside cluster,
4780  * we divide the whole process into 3 steps:
4781  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4782  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4783  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4784  * 4. If the clusters for the new outside value can't be allocated, we need
4785  *    to free the xattr we allocated in set.
4786  */
4787 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4788                                      struct ocfs2_xattr_info *xi,
4789                                      struct ocfs2_xattr_search *xs,
4790                                      struct ocfs2_xattr_set_ctxt *ctxt)
4791 {
4792         int ret, local = 1;
4793         size_t value_len;
4794         char *val = (char *)xi->value;
4795         struct ocfs2_xattr_entry *xe = xs->here;
4796         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4797                                               strlen(xi->name));
4798
4799         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4800                 /*
4801                  * We need to truncate the xattr storage first.
4802                  *
4803                  * If both the old and new value are stored to
4804                  * outside block, we only need to truncate
4805                  * the storage and then set the value outside.
4806                  *
4807                  * If the new value should be stored within block,
4808                  * we should free all the outside block first and
4809                  * the modification to the xattr block will be done
4810                  * by following steps.
4811                  */
4812                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4813                         value_len = xi->value_len;
4814                 else
4815                         value_len = 0;
4816
4817                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4818                                                            value_len,
4819                                                            ctxt);
4820                 if (ret)
4821                         goto out;
4822
4823                 if (value_len)
4824                         goto set_value_outside;
4825         }
4826
4827         value_len = xi->value_len;
4828         /* So we have to handle the inside block change now. */
4829         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4830                 /*
4831                  * If the new value will be stored outside of block,
4832                  * initalize a new empty value root and insert it first.
4833                  */
4834                 local = 0;
4835                 xi->value = &def_xv;
4836                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4837         }
4838
4839         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
4840                                               name_hash, local);
4841         if (ret) {
4842                 mlog_errno(ret);
4843                 goto out;
4844         }
4845
4846         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4847                 goto out;
4848
4849         /* allocate the space now for the outside block storage. */
4850         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4851                                                    value_len, ctxt);
4852         if (ret) {
4853                 mlog_errno(ret);
4854
4855                 if (xs->not_found) {
4856                         /*
4857                          * We can't allocate enough clusters for outside
4858                          * storage and we have allocated xattr already,
4859                          * so need to remove it.
4860                          */
4861                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
4862                 }
4863                 goto out;
4864         }
4865
4866 set_value_outside:
4867         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
4868                                                    xs, val, value_len);
4869 out:
4870         return ret;
4871 }
4872
4873 /*
4874  * check whether the xattr bucket is filled up with the same hash value.
4875  * If we want to insert the xattr with the same hash, return -ENOSPC.
4876  * If we want to insert a xattr with different hash value, go ahead
4877  * and ocfs2_divide_xattr_bucket will handle this.
4878  */
4879 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4880                                               struct ocfs2_xattr_bucket *bucket,
4881                                               const char *name)
4882 {
4883         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4884         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4885
4886         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4887                 return 0;
4888
4889         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4890             xh->xh_entries[0].xe_name_hash) {
4891                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4892                      "hash = %u\n",
4893                      (unsigned long long)bucket_blkno(bucket),
4894                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4895                 return -ENOSPC;
4896         }
4897
4898         return 0;
4899 }
4900
4901 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4902                                              struct ocfs2_xattr_info *xi,
4903                                              struct ocfs2_xattr_search *xs,
4904                                              struct ocfs2_xattr_set_ctxt *ctxt)
4905 {
4906         struct ocfs2_xattr_header *xh;
4907         struct ocfs2_xattr_entry *xe;
4908         u16 count, header_size, xh_free_start;
4909         int free, max_free, need, old;
4910         size_t value_size = 0, name_len = strlen(xi->name);
4911         size_t blocksize = inode->i_sb->s_blocksize;
4912         int ret, allocation = 0;
4913
4914         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4915
4916 try_again:
4917         xh = xs->header;
4918         count = le16_to_cpu(xh->xh_count);
4919         xh_free_start = le16_to_cpu(xh->xh_free_start);
4920         header_size = sizeof(struct ocfs2_xattr_header) +
4921                         count * sizeof(struct ocfs2_xattr_entry);
4922         max_free = OCFS2_XATTR_BUCKET_SIZE -
4923                 le16_to_cpu(xh->xh_name_value_len) - header_size;
4924
4925         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4926                         "of %u which exceed block size\n",
4927                         (unsigned long long)bucket_blkno(xs->bucket),
4928                         header_size);
4929
4930         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4931                 value_size = OCFS2_XATTR_ROOT_SIZE;
4932         else if (xi->value)
4933                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4934
4935         if (xs->not_found)
4936                 need = sizeof(struct ocfs2_xattr_entry) +
4937                         OCFS2_XATTR_SIZE(name_len) + value_size;
4938         else {
4939                 need = value_size + OCFS2_XATTR_SIZE(name_len);
4940
4941                 /*
4942                  * We only replace the old value if the new length is smaller
4943                  * than the old one. Otherwise we will allocate new space in the
4944                  * bucket to store it.
4945                  */
4946                 xe = xs->here;
4947                 if (ocfs2_xattr_is_local(xe))
4948                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4949                 else
4950                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4951
4952                 if (old >= value_size)
4953                         need = 0;
4954         }
4955
4956         free = xh_free_start - header_size;
4957         /*
4958          * We need to make sure the new name/value pair
4959          * can exist in the same block.
4960          */
4961         if (xh_free_start % blocksize < need)
4962                 free -= xh_free_start % blocksize;
4963
4964         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4965              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4966              " %u\n", xs->not_found,
4967              (unsigned long long)bucket_blkno(xs->bucket),
4968              free, need, max_free, le16_to_cpu(xh->xh_free_start),
4969              le16_to_cpu(xh->xh_name_value_len));
4970
4971         if (free < need ||
4972             (xs->not_found &&
4973              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
4974                 if (need <= max_free &&
4975                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4976                         /*
4977                          * We can create the space by defragment. Since only the
4978                          * name/value will be moved, the xe shouldn't be changed
4979                          * in xs.
4980                          */
4981                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
4982                                                         xs->bucket);
4983                         if (ret) {
4984                                 mlog_errno(ret);
4985                                 goto out;
4986                         }
4987
4988                         xh_free_start = le16_to_cpu(xh->xh_free_start);
4989                         free = xh_free_start - header_size;
4990                         if (xh_free_start % blocksize < need)
4991                                 free -= xh_free_start % blocksize;
4992
4993                         if (free >= need)
4994                                 goto xattr_set;
4995
4996                         mlog(0, "Can't get enough space for xattr insert by "
4997                              "defragment. Need %u bytes, but we have %d, so "
4998                              "allocate new bucket for it.\n", need, free);
4999                 }
5000
5001                 /*
5002                  * We have to add new buckets or clusters and one
5003                  * allocation should leave us enough space for insert.
5004                  */
5005                 BUG_ON(allocation);
5006
5007                 /*
5008                  * We do not allow for overlapping ranges between buckets. And
5009                  * the maximum number of collisions we will allow for then is
5010                  * one bucket's worth, so check it here whether we need to
5011                  * add a new bucket for the insert.
5012                  */
5013                 ret = ocfs2_check_xattr_bucket_collision(inode,
5014                                                          xs->bucket,
5015                                                          xi->name);
5016                 if (ret) {
5017                         mlog_errno(ret);
5018                         goto out;
5019                 }
5020
5021                 ret = ocfs2_add_new_xattr_bucket(inode,
5022                                                  xs->xattr_bh,
5023                                                  xs->bucket->bu_bhs[0],
5024                                                  ctxt);
5025                 if (ret) {
5026                         mlog_errno(ret);
5027                         goto out;
5028                 }
5029
5030                 ocfs2_xattr_bucket_relse(xs->bucket);
5031
5032                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5033                                                    xi->name_index,
5034                                                    xi->name, xs);
5035                 if (ret && ret != -ENODATA)
5036                         goto out;
5037                 xs->not_found = ret;
5038                 allocation = 1;
5039                 goto try_again;
5040         }
5041
5042 xattr_set:
5043         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5044 out:
5045         mlog_exit(ret);
5046         return ret;
5047 }
5048
5049 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5050                                         struct ocfs2_xattr_bucket *bucket,
5051                                         void *para)
5052 {
5053         int ret = 0;
5054         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5055         u16 i;
5056         struct ocfs2_xattr_entry *xe;
5057         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5058         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5059
5060         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5061
5062         ctxt.handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
5063         if (IS_ERR(ctxt.handle)) {
5064                 ret = PTR_ERR(ctxt.handle);
5065                 mlog_errno(ret);
5066                 goto out;
5067         }
5068
5069         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5070                 xe = &xh->xh_entries[i];
5071                 if (ocfs2_xattr_is_local(xe))
5072                         continue;
5073
5074                 ret = ocfs2_xattr_bucket_value_truncate(inode,
5075                                                         bucket->bu_bhs[0],
5076                                                         i, 0, &ctxt);
5077                 if (ret) {
5078                         mlog_errno(ret);
5079                         break;
5080                 }
5081         }
5082
5083         ret = ocfs2_commit_trans(osb, ctxt.handle);
5084         ocfs2_schedule_truncate_log_flush(osb, 1);
5085         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5086 out:
5087         return ret;
5088 }
5089
5090 static int ocfs2_delete_xattr_index_block(struct inode *inode,
5091                                           struct buffer_head *xb_bh)
5092 {
5093         struct ocfs2_xattr_block *xb =
5094                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5095         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5096         int ret = 0;
5097         u32 name_hash = UINT_MAX, e_cpos, num_clusters;
5098         u64 p_blkno;
5099
5100         if (le16_to_cpu(el->l_next_free_rec) == 0)
5101                 return 0;
5102
5103         while (name_hash > 0) {
5104                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
5105                                           &e_cpos, &num_clusters, el);
5106                 if (ret) {
5107                         mlog_errno(ret);
5108                         goto out;
5109                 }
5110
5111                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
5112                                                   ocfs2_delete_xattr_in_bucket,
5113                                                   NULL);
5114                 if (ret) {
5115                         mlog_errno(ret);
5116                         goto out;
5117                 }
5118
5119                 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
5120                                              p_blkno, e_cpos, num_clusters);
5121                 if (ret) {
5122                         mlog_errno(ret);
5123                         break;
5124                 }
5125
5126                 if (e_cpos == 0)
5127                         break;
5128
5129                 name_hash = e_cpos - 1;
5130         }
5131
5132 out:
5133         return ret;
5134 }
5135
5136 /*
5137  * 'security' attributes support
5138  */
5139 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5140                                         size_t list_size, const char *name,
5141                                         size_t name_len)
5142 {
5143         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5144         const size_t total_len = prefix_len + name_len + 1;
5145
5146         if (list && total_len <= list_size) {
5147                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5148                 memcpy(list + prefix_len, name, name_len);
5149                 list[prefix_len + name_len] = '\0';
5150         }
5151         return total_len;
5152 }
5153
5154 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5155                                     void *buffer, size_t size)
5156 {
5157         if (strcmp(name, "") == 0)
5158                 return -EINVAL;
5159         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5160                                buffer, size);
5161 }
5162
5163 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5164                                     const void *value, size_t size, int flags)
5165 {
5166         if (strcmp(name, "") == 0)
5167                 return -EINVAL;
5168
5169         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5170                                size, flags);
5171 }
5172
5173 int ocfs2_init_security_get(struct inode *inode,
5174                             struct inode *dir,
5175                             struct ocfs2_security_xattr_info *si)
5176 {
5177         return security_inode_init_security(inode, dir, &si->name, &si->value,
5178                                             &si->value_len);
5179 }
5180
5181 int ocfs2_init_security_set(handle_t *handle,
5182                             struct inode *inode,
5183                             struct buffer_head *di_bh,
5184                             struct ocfs2_security_xattr_info *si,
5185                             struct ocfs2_alloc_context *xattr_ac,
5186                             struct ocfs2_alloc_context *data_ac)
5187 {
5188         return ocfs2_xattr_set_handle(handle, inode, di_bh,
5189                                      OCFS2_XATTR_INDEX_SECURITY,
5190                                      si->name, si->value, si->value_len, 0,
5191                                      xattr_ac, data_ac);
5192 }
5193
5194 struct xattr_handler ocfs2_xattr_security_handler = {
5195         .prefix = XATTR_SECURITY_PREFIX,
5196         .list   = ocfs2_xattr_security_list,
5197         .get    = ocfs2_xattr_security_get,
5198         .set    = ocfs2_xattr_security_set,
5199 };
5200
5201 /*
5202  * 'trusted' attributes support
5203  */
5204 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
5205                                        size_t list_size, const char *name,
5206                                        size_t name_len)
5207 {
5208         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
5209         const size_t total_len = prefix_len + name_len + 1;
5210
5211         if (list && total_len <= list_size) {
5212                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
5213                 memcpy(list + prefix_len, name, name_len);
5214                 list[prefix_len + name_len] = '\0';
5215         }
5216         return total_len;
5217 }
5218
5219 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
5220                                    void *buffer, size_t size)
5221 {
5222         if (strcmp(name, "") == 0)
5223                 return -EINVAL;
5224         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
5225                                buffer, size);
5226 }
5227
5228 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
5229                                    const void *value, size_t size, int flags)
5230 {
5231         if (strcmp(name, "") == 0)
5232                 return -EINVAL;
5233
5234         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
5235                                size, flags);
5236 }
5237
5238 struct xattr_handler ocfs2_xattr_trusted_handler = {
5239         .prefix = XATTR_TRUSTED_PREFIX,
5240         .list   = ocfs2_xattr_trusted_list,
5241         .get    = ocfs2_xattr_trusted_get,
5242         .set    = ocfs2_xattr_trusted_set,
5243 };
5244
5245 /*
5246  * 'user' attributes support
5247  */
5248 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
5249                                     size_t list_size, const char *name,
5250                                     size_t name_len)
5251 {
5252         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
5253         const size_t total_len = prefix_len + name_len + 1;
5254         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5255
5256         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5257                 return 0;
5258
5259         if (list && total_len <= list_size) {
5260                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
5261                 memcpy(list + prefix_len, name, name_len);
5262                 list[prefix_len + name_len] = '\0';
5263         }
5264         return total_len;
5265 }
5266
5267 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
5268                                 void *buffer, size_t size)
5269 {
5270         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5271
5272         if (strcmp(name, "") == 0)
5273                 return -EINVAL;
5274         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5275                 return -EOPNOTSUPP;
5276         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
5277                                buffer, size);
5278 }
5279
5280 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
5281                                 const void *value, size_t size, int flags)
5282 {
5283         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5284
5285         if (strcmp(name, "") == 0)
5286                 return -EINVAL;
5287         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5288                 return -EOPNOTSUPP;
5289
5290         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
5291                                size, flags);
5292 }
5293
5294 struct xattr_handler ocfs2_xattr_user_handler = {
5295         .prefix = XATTR_USER_PREFIX,
5296         .list   = ocfs2_xattr_user_list,
5297         .get    = ocfs2_xattr_user_get,
5298         .set    = ocfs2_xattr_user_set,
5299 };