ocfs2: Rename ocfs2_cp_xattr_cluster() to ocfs2_mv_xattr_buckets().
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "dlmglue.h"
46 #include "file.h"
47 #include "symlink.h"
48 #include "sysfile.h"
49 #include "inode.h"
50 #include "journal.h"
51 #include "ocfs2_fs.h"
52 #include "suballoc.h"
53 #include "uptodate.h"
54 #include "buffer_head_io.h"
55 #include "super.h"
56 #include "xattr.h"
57
58
59 struct ocfs2_xattr_def_value_root {
60         struct ocfs2_xattr_value_root   xv;
61         struct ocfs2_extent_rec         er;
62 };
63
64 struct ocfs2_xattr_bucket {
65         /* The inode these xattrs are associated with */
66         struct inode *bu_inode;
67
68         /* The actual buffers that make up the bucket */
69         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
70
71         /* How many blocks make up one bucket for this filesystem */
72         int bu_blocks;
73 };
74
75 struct ocfs2_xattr_set_ctxt {
76         handle_t *handle;
77         struct ocfs2_alloc_context *meta_ac;
78         struct ocfs2_alloc_context *data_ac;
79         struct ocfs2_cached_dealloc_ctxt dealloc;
80 };
81
82 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
83 #define OCFS2_XATTR_INLINE_SIZE 80
84 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
85                                          - sizeof(struct ocfs2_xattr_header) \
86                                          - sizeof(__u32))
87 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
88                                          - sizeof(struct ocfs2_xattr_block) \
89                                          - sizeof(struct ocfs2_xattr_header) \
90                                          - sizeof(__u32))
91
92 static struct ocfs2_xattr_def_value_root def_xv = {
93         .xv.xr_list.l_count = cpu_to_le16(1),
94 };
95
96 struct xattr_handler *ocfs2_xattr_handlers[] = {
97         &ocfs2_xattr_user_handler,
98 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
99         &ocfs2_xattr_acl_access_handler,
100         &ocfs2_xattr_acl_default_handler,
101 #endif
102         &ocfs2_xattr_trusted_handler,
103         &ocfs2_xattr_security_handler,
104         NULL
105 };
106
107 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
108         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
109 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114 #endif
115         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
116         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
117 };
118
119 struct ocfs2_xattr_info {
120         int name_index;
121         const char *name;
122         const void *value;
123         size_t value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
142                                              struct ocfs2_xattr_header *xh,
143                                              int index,
144                                              int *block_off,
145                                              int *new_offset);
146
147 static int ocfs2_xattr_block_find(struct inode *inode,
148                                   int name_index,
149                                   const char *name,
150                                   struct ocfs2_xattr_search *xs);
151 static int ocfs2_xattr_index_block_find(struct inode *inode,
152                                         struct buffer_head *root_bh,
153                                         int name_index,
154                                         const char *name,
155                                         struct ocfs2_xattr_search *xs);
156
157 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
158                                         struct ocfs2_xattr_tree_root *xt,
159                                         char *buffer,
160                                         size_t buffer_size);
161
162 static int ocfs2_xattr_create_index_block(struct inode *inode,
163                                           struct ocfs2_xattr_search *xs,
164                                           struct ocfs2_xattr_set_ctxt *ctxt);
165
166 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
167                                              struct ocfs2_xattr_info *xi,
168                                              struct ocfs2_xattr_search *xs,
169                                              struct ocfs2_xattr_set_ctxt *ctxt);
170
171 static int ocfs2_delete_xattr_index_block(struct inode *inode,
172                                           struct buffer_head *xb_bh);
173 static int ocfs2_cp_xattr_bucket(struct inode *inode,
174                                  handle_t *handle,
175                                  u64 s_blkno,
176                                  u64 t_blkno,
177                                  int t_is_new);
178
179 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
180 {
181         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
182 }
183
184 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
185 {
186         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
187 }
188
189 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
190 {
191         u16 len = sb->s_blocksize -
192                  offsetof(struct ocfs2_xattr_header, xh_entries);
193
194         return len / sizeof(struct ocfs2_xattr_entry);
195 }
196
197 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
198 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
199 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
200
201 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
202 {
203         struct ocfs2_xattr_bucket *bucket;
204         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
205
206         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
207
208         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
209         if (bucket) {
210                 bucket->bu_inode = inode;
211                 bucket->bu_blocks = blks;
212         }
213
214         return bucket;
215 }
216
217 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
218 {
219         int i;
220
221         for (i = 0; i < bucket->bu_blocks; i++) {
222                 brelse(bucket->bu_bhs[i]);
223                 bucket->bu_bhs[i] = NULL;
224         }
225 }
226
227 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
228 {
229         if (bucket) {
230                 ocfs2_xattr_bucket_relse(bucket);
231                 bucket->bu_inode = NULL;
232                 kfree(bucket);
233         }
234 }
235
236 /*
237  * A bucket that has never been written to disk doesn't need to be
238  * read.  We just need the buffer_heads.  Don't call this for
239  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
240  * them fully.
241  */
242 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
243                                    u64 xb_blkno)
244 {
245         int i, rc = 0;
246
247         for (i = 0; i < bucket->bu_blocks; i++) {
248                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
249                                               xb_blkno + i);
250                 if (!bucket->bu_bhs[i]) {
251                         rc = -EIO;
252                         mlog_errno(rc);
253                         break;
254                 }
255
256                 if (!ocfs2_buffer_uptodate(bucket->bu_inode,
257                                            bucket->bu_bhs[i]))
258                         ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
259                                                       bucket->bu_bhs[i]);
260         }
261
262         if (rc)
263                 ocfs2_xattr_bucket_relse(bucket);
264         return rc;
265 }
266
267 /* Read the xattr bucket at xb_blkno */
268 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
269                                    u64 xb_blkno)
270 {
271         int rc;
272
273         rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
274                                bucket->bu_blocks, bucket->bu_bhs, 0,
275                                NULL);
276         if (rc)
277                 ocfs2_xattr_bucket_relse(bucket);
278         return rc;
279 }
280
281 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
282                                              struct ocfs2_xattr_bucket *bucket,
283                                              int type)
284 {
285         int i, rc = 0;
286
287         for (i = 0; i < bucket->bu_blocks; i++) {
288                 rc = ocfs2_journal_access(handle, bucket->bu_inode,
289                                           bucket->bu_bhs[i], type);
290                 if (rc) {
291                         mlog_errno(rc);
292                         break;
293                 }
294         }
295
296         return rc;
297 }
298
299 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
300                                              struct ocfs2_xattr_bucket *bucket)
301 {
302         int i;
303
304         for (i = 0; i < bucket->bu_blocks; i++)
305                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
306 }
307
308 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
309                                          struct ocfs2_xattr_bucket *src)
310 {
311         int i;
312         int blocksize = src->bu_inode->i_sb->s_blocksize;
313
314         BUG_ON(dest->bu_blocks != src->bu_blocks);
315         BUG_ON(dest->bu_inode != src->bu_inode);
316
317         for (i = 0; i < src->bu_blocks; i++) {
318                 memcpy(bucket_block(dest, i), bucket_block(src, i),
319                        blocksize);
320         }
321 }
322
323 static int ocfs2_validate_xattr_block(struct super_block *sb,
324                                       struct buffer_head *bh)
325 {
326         struct ocfs2_xattr_block *xb =
327                 (struct ocfs2_xattr_block *)bh->b_data;
328
329         mlog(0, "Validating xattr block %llu\n",
330              (unsigned long long)bh->b_blocknr);
331
332         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
333                 ocfs2_error(sb,
334                             "Extended attribute block #%llu has bad "
335                             "signature %.*s",
336                             (unsigned long long)bh->b_blocknr, 7,
337                             xb->xb_signature);
338                 return -EINVAL;
339         }
340
341         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
342                 ocfs2_error(sb,
343                             "Extended attribute block #%llu has an "
344                             "invalid xb_blkno of %llu",
345                             (unsigned long long)bh->b_blocknr,
346                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
347                 return -EINVAL;
348         }
349
350         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
351                 ocfs2_error(sb,
352                             "Extended attribute block #%llu has an invalid "
353                             "xb_fs_generation of #%u",
354                             (unsigned long long)bh->b_blocknr,
355                             le32_to_cpu(xb->xb_fs_generation));
356                 return -EINVAL;
357         }
358
359         return 0;
360 }
361
362 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
363                                   struct buffer_head **bh)
364 {
365         int rc;
366         struct buffer_head *tmp = *bh;
367
368         rc = ocfs2_read_block(inode, xb_blkno, &tmp,
369                               ocfs2_validate_xattr_block);
370
371         /* If ocfs2_read_block() got us a new bh, pass it up. */
372         if (!rc && !*bh)
373                 *bh = tmp;
374
375         return rc;
376 }
377
378 static inline const char *ocfs2_xattr_prefix(int name_index)
379 {
380         struct xattr_handler *handler = NULL;
381
382         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
383                 handler = ocfs2_xattr_handler_map[name_index];
384
385         return handler ? handler->prefix : NULL;
386 }
387
388 static u32 ocfs2_xattr_name_hash(struct inode *inode,
389                                  const char *name,
390                                  int name_len)
391 {
392         /* Get hash value of uuid from super block */
393         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
394         int i;
395
396         /* hash extended attribute name */
397         for (i = 0; i < name_len; i++) {
398                 hash = (hash << OCFS2_HASH_SHIFT) ^
399                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
400                        *name++;
401         }
402
403         return hash;
404 }
405
406 /*
407  * ocfs2_xattr_hash_entry()
408  *
409  * Compute the hash of an extended attribute.
410  */
411 static void ocfs2_xattr_hash_entry(struct inode *inode,
412                                    struct ocfs2_xattr_header *header,
413                                    struct ocfs2_xattr_entry *entry)
414 {
415         u32 hash = 0;
416         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
417
418         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
419         entry->xe_name_hash = cpu_to_le32(hash);
420
421         return;
422 }
423
424 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
425 {
426         int size = 0;
427
428         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
429                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
430         else
431                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
432         size += sizeof(struct ocfs2_xattr_entry);
433
434         return size;
435 }
436
437 int ocfs2_calc_security_init(struct inode *dir,
438                              struct ocfs2_security_xattr_info *si,
439                              int *want_clusters,
440                              int *xattr_credits,
441                              struct ocfs2_alloc_context **xattr_ac)
442 {
443         int ret = 0;
444         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
445         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
446                                                  si->value_len);
447
448         /*
449          * The max space of security xattr taken inline is
450          * 256(name) + 80(value) + 16(entry) = 352 bytes,
451          * So reserve one metadata block for it is ok.
452          */
453         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
454             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
455                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
456                 if (ret) {
457                         mlog_errno(ret);
458                         return ret;
459                 }
460                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
461         }
462
463         /* reserve clusters for xattr value which will be set in B tree*/
464         if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
465                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
466                                                            si->value_len);
467         return ret;
468 }
469
470 int ocfs2_calc_xattr_init(struct inode *dir,
471                           struct buffer_head *dir_bh,
472                           int mode,
473                           struct ocfs2_security_xattr_info *si,
474                           int *want_clusters,
475                           int *xattr_credits,
476                           struct ocfs2_alloc_context **xattr_ac)
477 {
478         int ret = 0;
479         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
480         int s_size = 0;
481         int a_size = 0;
482         int acl_len = 0;
483
484         if (si->enable)
485                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
486                                                      si->value_len);
487
488         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
489                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
490                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
491                                         "", NULL, 0);
492                 if (acl_len > 0) {
493                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
494                         if (S_ISDIR(mode))
495                                 a_size <<= 1;
496                 } else if (acl_len != 0 && acl_len != -ENODATA) {
497                         mlog_errno(ret);
498                         return ret;
499                 }
500         }
501
502         if (!(s_size + a_size))
503                 return ret;
504
505         /*
506          * The max space of security xattr taken inline is
507          * 256(name) + 80(value) + 16(entry) = 352 bytes,
508          * The max space of acl xattr taken inline is
509          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
510          * when blocksize = 512, may reserve one more cluser for
511          * xattr bucket, otherwise reserve one metadata block
512          * for them is ok.
513          */
514         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
515             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
516                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
517                 if (ret) {
518                         mlog_errno(ret);
519                         return ret;
520                 }
521                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
522         }
523
524         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
525             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
526                 *want_clusters += 1;
527                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
528         }
529
530         /* reserve clusters for xattr value which will be set in B tree*/
531         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE)
532                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
533                                                            si->value_len);
534         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
535             acl_len > OCFS2_XATTR_INLINE_SIZE) {
536                 *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
537                 if (S_ISDIR(mode))
538                         *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
539                                                                    acl_len);
540         }
541
542         return ret;
543 }
544
545 static int ocfs2_xattr_extend_allocation(struct inode *inode,
546                                          u32 clusters_to_add,
547                                          struct buffer_head *xattr_bh,
548                                          struct ocfs2_xattr_value_root *xv,
549                                          struct ocfs2_xattr_set_ctxt *ctxt)
550 {
551         int status = 0;
552         handle_t *handle = ctxt->handle;
553         enum ocfs2_alloc_restarted why;
554         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
555         u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
556         struct ocfs2_extent_tree et;
557
558         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
559
560         ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
561
562         status = ocfs2_journal_access(handle, inode, xattr_bh,
563                                       OCFS2_JOURNAL_ACCESS_WRITE);
564         if (status < 0) {
565                 mlog_errno(status);
566                 goto leave;
567         }
568
569         prev_clusters = le32_to_cpu(xv->xr_clusters);
570         status = ocfs2_add_clusters_in_btree(osb,
571                                              inode,
572                                              &logical_start,
573                                              clusters_to_add,
574                                              0,
575                                              &et,
576                                              handle,
577                                              ctxt->data_ac,
578                                              ctxt->meta_ac,
579                                              &why);
580         if (status < 0) {
581                 mlog_errno(status);
582                 goto leave;
583         }
584
585         status = ocfs2_journal_dirty(handle, xattr_bh);
586         if (status < 0) {
587                 mlog_errno(status);
588                 goto leave;
589         }
590
591         clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
592
593         /*
594          * We should have already allocated enough space before the transaction,
595          * so no need to restart.
596          */
597         BUG_ON(why != RESTART_NONE || clusters_to_add);
598
599 leave:
600
601         return status;
602 }
603
604 static int __ocfs2_remove_xattr_range(struct inode *inode,
605                                       struct buffer_head *root_bh,
606                                       struct ocfs2_xattr_value_root *xv,
607                                       u32 cpos, u32 phys_cpos, u32 len,
608                                       struct ocfs2_xattr_set_ctxt *ctxt)
609 {
610         int ret;
611         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
612         handle_t *handle = ctxt->handle;
613         struct ocfs2_extent_tree et;
614
615         ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
616
617         ret = ocfs2_journal_access(handle, inode, root_bh,
618                                    OCFS2_JOURNAL_ACCESS_WRITE);
619         if (ret) {
620                 mlog_errno(ret);
621                 goto out;
622         }
623
624         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
625                                   &ctxt->dealloc);
626         if (ret) {
627                 mlog_errno(ret);
628                 goto out;
629         }
630
631         le32_add_cpu(&xv->xr_clusters, -len);
632
633         ret = ocfs2_journal_dirty(handle, root_bh);
634         if (ret) {
635                 mlog_errno(ret);
636                 goto out;
637         }
638
639         ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
640         if (ret)
641                 mlog_errno(ret);
642
643 out:
644         return ret;
645 }
646
647 static int ocfs2_xattr_shrink_size(struct inode *inode,
648                                    u32 old_clusters,
649                                    u32 new_clusters,
650                                    struct buffer_head *root_bh,
651                                    struct ocfs2_xattr_value_root *xv,
652                                    struct ocfs2_xattr_set_ctxt *ctxt)
653 {
654         int ret = 0;
655         u32 trunc_len, cpos, phys_cpos, alloc_size;
656         u64 block;
657
658         if (old_clusters <= new_clusters)
659                 return 0;
660
661         cpos = new_clusters;
662         trunc_len = old_clusters - new_clusters;
663         while (trunc_len) {
664                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
665                                                &alloc_size, &xv->xr_list);
666                 if (ret) {
667                         mlog_errno(ret);
668                         goto out;
669                 }
670
671                 if (alloc_size > trunc_len)
672                         alloc_size = trunc_len;
673
674                 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
675                                                  phys_cpos, alloc_size,
676                                                  ctxt);
677                 if (ret) {
678                         mlog_errno(ret);
679                         goto out;
680                 }
681
682                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
683                 ocfs2_remove_xattr_clusters_from_cache(inode, block,
684                                                        alloc_size);
685                 cpos += alloc_size;
686                 trunc_len -= alloc_size;
687         }
688
689 out:
690         return ret;
691 }
692
693 static int ocfs2_xattr_value_truncate(struct inode *inode,
694                                       struct buffer_head *root_bh,
695                                       struct ocfs2_xattr_value_root *xv,
696                                       int len,
697                                       struct ocfs2_xattr_set_ctxt *ctxt)
698 {
699         int ret;
700         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
701         u32 old_clusters = le32_to_cpu(xv->xr_clusters);
702
703         if (new_clusters == old_clusters)
704                 return 0;
705
706         if (new_clusters > old_clusters)
707                 ret = ocfs2_xattr_extend_allocation(inode,
708                                                     new_clusters - old_clusters,
709                                                     root_bh, xv, ctxt);
710         else
711                 ret = ocfs2_xattr_shrink_size(inode,
712                                               old_clusters, new_clusters,
713                                               root_bh, xv, ctxt);
714
715         return ret;
716 }
717
718 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
719                                   size_t *result, const char *prefix,
720                                   const char *name, int name_len)
721 {
722         char *p = buffer + *result;
723         int prefix_len = strlen(prefix);
724         int total_len = prefix_len + name_len + 1;
725
726         *result += total_len;
727
728         /* we are just looking for how big our buffer needs to be */
729         if (!size)
730                 return 0;
731
732         if (*result > size)
733                 return -ERANGE;
734
735         memcpy(p, prefix, prefix_len);
736         memcpy(p + prefix_len, name, name_len);
737         p[prefix_len + name_len] = '\0';
738
739         return 0;
740 }
741
742 static int ocfs2_xattr_list_entries(struct inode *inode,
743                                     struct ocfs2_xattr_header *header,
744                                     char *buffer, size_t buffer_size)
745 {
746         size_t result = 0;
747         int i, type, ret;
748         const char *prefix, *name;
749
750         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
751                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
752                 type = ocfs2_xattr_get_type(entry);
753                 prefix = ocfs2_xattr_prefix(type);
754
755                 if (prefix) {
756                         name = (const char *)header +
757                                 le16_to_cpu(entry->xe_name_offset);
758
759                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
760                                                      &result, prefix, name,
761                                                      entry->xe_name_len);
762                         if (ret)
763                                 return ret;
764                 }
765         }
766
767         return result;
768 }
769
770 static int ocfs2_xattr_ibody_list(struct inode *inode,
771                                   struct ocfs2_dinode *di,
772                                   char *buffer,
773                                   size_t buffer_size)
774 {
775         struct ocfs2_xattr_header *header = NULL;
776         struct ocfs2_inode_info *oi = OCFS2_I(inode);
777         int ret = 0;
778
779         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
780                 return ret;
781
782         header = (struct ocfs2_xattr_header *)
783                  ((void *)di + inode->i_sb->s_blocksize -
784                  le16_to_cpu(di->i_xattr_inline_size));
785
786         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
787
788         return ret;
789 }
790
791 static int ocfs2_xattr_block_list(struct inode *inode,
792                                   struct ocfs2_dinode *di,
793                                   char *buffer,
794                                   size_t buffer_size)
795 {
796         struct buffer_head *blk_bh = NULL;
797         struct ocfs2_xattr_block *xb;
798         int ret = 0;
799
800         if (!di->i_xattr_loc)
801                 return ret;
802
803         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
804                                      &blk_bh);
805         if (ret < 0) {
806                 mlog_errno(ret);
807                 return ret;
808         }
809
810         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
811         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
812                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
813                 ret = ocfs2_xattr_list_entries(inode, header,
814                                                buffer, buffer_size);
815         } else {
816                 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
817                 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
818                                                    buffer, buffer_size);
819         }
820
821         brelse(blk_bh);
822
823         return ret;
824 }
825
826 ssize_t ocfs2_listxattr(struct dentry *dentry,
827                         char *buffer,
828                         size_t size)
829 {
830         int ret = 0, i_ret = 0, b_ret = 0;
831         struct buffer_head *di_bh = NULL;
832         struct ocfs2_dinode *di = NULL;
833         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
834
835         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
836                 return -EOPNOTSUPP;
837
838         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
839                 return ret;
840
841         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
842         if (ret < 0) {
843                 mlog_errno(ret);
844                 return ret;
845         }
846
847         di = (struct ocfs2_dinode *)di_bh->b_data;
848
849         down_read(&oi->ip_xattr_sem);
850         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
851         if (i_ret < 0)
852                 b_ret = 0;
853         else {
854                 if (buffer) {
855                         buffer += i_ret;
856                         size -= i_ret;
857                 }
858                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
859                                                buffer, size);
860                 if (b_ret < 0)
861                         i_ret = 0;
862         }
863         up_read(&oi->ip_xattr_sem);
864         ocfs2_inode_unlock(dentry->d_inode, 0);
865
866         brelse(di_bh);
867
868         return i_ret + b_ret;
869 }
870
871 static int ocfs2_xattr_find_entry(int name_index,
872                                   const char *name,
873                                   struct ocfs2_xattr_search *xs)
874 {
875         struct ocfs2_xattr_entry *entry;
876         size_t name_len;
877         int i, cmp = 1;
878
879         if (name == NULL)
880                 return -EINVAL;
881
882         name_len = strlen(name);
883         entry = xs->here;
884         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
885                 cmp = name_index - ocfs2_xattr_get_type(entry);
886                 if (!cmp)
887                         cmp = name_len - entry->xe_name_len;
888                 if (!cmp)
889                         cmp = memcmp(name, (xs->base +
890                                      le16_to_cpu(entry->xe_name_offset)),
891                                      name_len);
892                 if (cmp == 0)
893                         break;
894                 entry += 1;
895         }
896         xs->here = entry;
897
898         return cmp ? -ENODATA : 0;
899 }
900
901 static int ocfs2_xattr_get_value_outside(struct inode *inode,
902                                          struct ocfs2_xattr_value_root *xv,
903                                          void *buffer,
904                                          size_t len)
905 {
906         u32 cpos, p_cluster, num_clusters, bpc, clusters;
907         u64 blkno;
908         int i, ret = 0;
909         size_t cplen, blocksize;
910         struct buffer_head *bh = NULL;
911         struct ocfs2_extent_list *el;
912
913         el = &xv->xr_list;
914         clusters = le32_to_cpu(xv->xr_clusters);
915         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
916         blocksize = inode->i_sb->s_blocksize;
917
918         cpos = 0;
919         while (cpos < clusters) {
920                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
921                                                &num_clusters, el);
922                 if (ret) {
923                         mlog_errno(ret);
924                         goto out;
925                 }
926
927                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
928                 /* Copy ocfs2_xattr_value */
929                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
930                         ret = ocfs2_read_block(inode, blkno, &bh, NULL);
931                         if (ret) {
932                                 mlog_errno(ret);
933                                 goto out;
934                         }
935
936                         cplen = len >= blocksize ? blocksize : len;
937                         memcpy(buffer, bh->b_data, cplen);
938                         len -= cplen;
939                         buffer += cplen;
940
941                         brelse(bh);
942                         bh = NULL;
943                         if (len == 0)
944                                 break;
945                 }
946                 cpos += num_clusters;
947         }
948 out:
949         return ret;
950 }
951
952 static int ocfs2_xattr_ibody_get(struct inode *inode,
953                                  int name_index,
954                                  const char *name,
955                                  void *buffer,
956                                  size_t buffer_size,
957                                  struct ocfs2_xattr_search *xs)
958 {
959         struct ocfs2_inode_info *oi = OCFS2_I(inode);
960         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
961         struct ocfs2_xattr_value_root *xv;
962         size_t size;
963         int ret = 0;
964
965         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
966                 return -ENODATA;
967
968         xs->end = (void *)di + inode->i_sb->s_blocksize;
969         xs->header = (struct ocfs2_xattr_header *)
970                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
971         xs->base = (void *)xs->header;
972         xs->here = xs->header->xh_entries;
973
974         ret = ocfs2_xattr_find_entry(name_index, name, xs);
975         if (ret)
976                 return ret;
977         size = le64_to_cpu(xs->here->xe_value_size);
978         if (buffer) {
979                 if (size > buffer_size)
980                         return -ERANGE;
981                 if (ocfs2_xattr_is_local(xs->here)) {
982                         memcpy(buffer, (void *)xs->base +
983                                le16_to_cpu(xs->here->xe_name_offset) +
984                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
985                 } else {
986                         xv = (struct ocfs2_xattr_value_root *)
987                                 (xs->base + le16_to_cpu(
988                                  xs->here->xe_name_offset) +
989                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
990                         ret = ocfs2_xattr_get_value_outside(inode, xv,
991                                                             buffer, size);
992                         if (ret < 0) {
993                                 mlog_errno(ret);
994                                 return ret;
995                         }
996                 }
997         }
998
999         return size;
1000 }
1001
1002 static int ocfs2_xattr_block_get(struct inode *inode,
1003                                  int name_index,
1004                                  const char *name,
1005                                  void *buffer,
1006                                  size_t buffer_size,
1007                                  struct ocfs2_xattr_search *xs)
1008 {
1009         struct ocfs2_xattr_block *xb;
1010         struct ocfs2_xattr_value_root *xv;
1011         size_t size;
1012         int ret = -ENODATA, name_offset, name_len, block_off, i;
1013
1014         xs->bucket = ocfs2_xattr_bucket_new(inode);
1015         if (!xs->bucket) {
1016                 ret = -ENOMEM;
1017                 mlog_errno(ret);
1018                 goto cleanup;
1019         }
1020
1021         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1022         if (ret) {
1023                 mlog_errno(ret);
1024                 goto cleanup;
1025         }
1026
1027         if (xs->not_found) {
1028                 ret = -ENODATA;
1029                 goto cleanup;
1030         }
1031
1032         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1033         size = le64_to_cpu(xs->here->xe_value_size);
1034         if (buffer) {
1035                 ret = -ERANGE;
1036                 if (size > buffer_size)
1037                         goto cleanup;
1038
1039                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1040                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1041                 i = xs->here - xs->header->xh_entries;
1042
1043                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1044                         ret = ocfs2_xattr_bucket_get_name_value(inode,
1045                                                                 bucket_xh(xs->bucket),
1046                                                                 i,
1047                                                                 &block_off,
1048                                                                 &name_offset);
1049                         xs->base = bucket_block(xs->bucket, block_off);
1050                 }
1051                 if (ocfs2_xattr_is_local(xs->here)) {
1052                         memcpy(buffer, (void *)xs->base +
1053                                name_offset + name_len, size);
1054                 } else {
1055                         xv = (struct ocfs2_xattr_value_root *)
1056                                 (xs->base + name_offset + name_len);
1057                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1058                                                             buffer, size);
1059                         if (ret < 0) {
1060                                 mlog_errno(ret);
1061                                 goto cleanup;
1062                         }
1063                 }
1064         }
1065         ret = size;
1066 cleanup:
1067         ocfs2_xattr_bucket_free(xs->bucket);
1068
1069         brelse(xs->xattr_bh);
1070         xs->xattr_bh = NULL;
1071         return ret;
1072 }
1073
1074 int ocfs2_xattr_get_nolock(struct inode *inode,
1075                            struct buffer_head *di_bh,
1076                            int name_index,
1077                            const char *name,
1078                            void *buffer,
1079                            size_t buffer_size)
1080 {
1081         int ret;
1082         struct ocfs2_dinode *di = NULL;
1083         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1084         struct ocfs2_xattr_search xis = {
1085                 .not_found = -ENODATA,
1086         };
1087         struct ocfs2_xattr_search xbs = {
1088                 .not_found = -ENODATA,
1089         };
1090
1091         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1092                 return -EOPNOTSUPP;
1093
1094         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1095                 ret = -ENODATA;
1096
1097         xis.inode_bh = xbs.inode_bh = di_bh;
1098         di = (struct ocfs2_dinode *)di_bh->b_data;
1099
1100         down_read(&oi->ip_xattr_sem);
1101         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1102                                     buffer_size, &xis);
1103         if (ret == -ENODATA && di->i_xattr_loc)
1104                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1105                                             buffer_size, &xbs);
1106         up_read(&oi->ip_xattr_sem);
1107
1108         return ret;
1109 }
1110
1111 /* ocfs2_xattr_get()
1112  *
1113  * Copy an extended attribute into the buffer provided.
1114  * Buffer is NULL to compute the size of buffer required.
1115  */
1116 static int ocfs2_xattr_get(struct inode *inode,
1117                            int name_index,
1118                            const char *name,
1119                            void *buffer,
1120                            size_t buffer_size)
1121 {
1122         int ret;
1123         struct buffer_head *di_bh = NULL;
1124
1125         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1126         if (ret < 0) {
1127                 mlog_errno(ret);
1128                 return ret;
1129         }
1130         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1131                                      name, buffer, buffer_size);
1132
1133         ocfs2_inode_unlock(inode, 0);
1134
1135         brelse(di_bh);
1136
1137         return ret;
1138 }
1139
1140 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1141                                            handle_t *handle,
1142                                            struct ocfs2_xattr_value_root *xv,
1143                                            const void *value,
1144                                            int value_len)
1145 {
1146         int ret = 0, i, cp_len, credits;
1147         u16 blocksize = inode->i_sb->s_blocksize;
1148         u32 p_cluster, num_clusters;
1149         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1150         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1151         u64 blkno;
1152         struct buffer_head *bh = NULL;
1153
1154         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1155
1156         /*
1157          * In __ocfs2_xattr_set_value_outside has already been dirtied,
1158          * so we don't need to worry about whether ocfs2_extend_trans
1159          * will create a new transactio for us or not.
1160          */
1161         credits = clusters * bpc;
1162         ret = ocfs2_extend_trans(handle, credits);
1163         if (ret) {
1164                 mlog_errno(ret);
1165                 goto out;
1166         }
1167
1168         while (cpos < clusters) {
1169                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1170                                                &num_clusters, &xv->xr_list);
1171                 if (ret) {
1172                         mlog_errno(ret);
1173                         goto out;
1174                 }
1175
1176                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1177
1178                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1179                         ret = ocfs2_read_block(inode, blkno, &bh, NULL);
1180                         if (ret) {
1181                                 mlog_errno(ret);
1182                                 goto out;
1183                         }
1184
1185                         ret = ocfs2_journal_access(handle,
1186                                                    inode,
1187                                                    bh,
1188                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1189                         if (ret < 0) {
1190                                 mlog_errno(ret);
1191                                 goto out;
1192                         }
1193
1194                         cp_len = value_len > blocksize ? blocksize : value_len;
1195                         memcpy(bh->b_data, value, cp_len);
1196                         value_len -= cp_len;
1197                         value += cp_len;
1198                         if (cp_len < blocksize)
1199                                 memset(bh->b_data + cp_len, 0,
1200                                        blocksize - cp_len);
1201
1202                         ret = ocfs2_journal_dirty(handle, bh);
1203                         if (ret < 0) {
1204                                 mlog_errno(ret);
1205                                 goto out;
1206                         }
1207                         brelse(bh);
1208                         bh = NULL;
1209
1210                         /*
1211                          * XXX: do we need to empty all the following
1212                          * blocks in this cluster?
1213                          */
1214                         if (!value_len)
1215                                 break;
1216                 }
1217                 cpos += num_clusters;
1218         }
1219 out:
1220         brelse(bh);
1221
1222         return ret;
1223 }
1224
1225 static int ocfs2_xattr_cleanup(struct inode *inode,
1226                                handle_t *handle,
1227                                struct ocfs2_xattr_info *xi,
1228                                struct ocfs2_xattr_search *xs,
1229                                size_t offs)
1230 {
1231         int ret = 0;
1232         size_t name_len = strlen(xi->name);
1233         void *val = xs->base + offs;
1234         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1235
1236         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1237                                    OCFS2_JOURNAL_ACCESS_WRITE);
1238         if (ret) {
1239                 mlog_errno(ret);
1240                 goto out;
1241         }
1242         /* Decrease xattr count */
1243         le16_add_cpu(&xs->header->xh_count, -1);
1244         /* Remove the xattr entry and tree root which has already be set*/
1245         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1246         memset(val, 0, size);
1247
1248         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1249         if (ret < 0)
1250                 mlog_errno(ret);
1251 out:
1252         return ret;
1253 }
1254
1255 static int ocfs2_xattr_update_entry(struct inode *inode,
1256                                     handle_t *handle,
1257                                     struct ocfs2_xattr_info *xi,
1258                                     struct ocfs2_xattr_search *xs,
1259                                     size_t offs)
1260 {
1261         int ret;
1262
1263         ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1264                                    OCFS2_JOURNAL_ACCESS_WRITE);
1265         if (ret) {
1266                 mlog_errno(ret);
1267                 goto out;
1268         }
1269
1270         xs->here->xe_name_offset = cpu_to_le16(offs);
1271         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1272         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1273                 ocfs2_xattr_set_local(xs->here, 1);
1274         else
1275                 ocfs2_xattr_set_local(xs->here, 0);
1276         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1277
1278         ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1279         if (ret < 0)
1280                 mlog_errno(ret);
1281 out:
1282         return ret;
1283 }
1284
1285 /*
1286  * ocfs2_xattr_set_value_outside()
1287  *
1288  * Set large size value in B tree.
1289  */
1290 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1291                                          struct ocfs2_xattr_info *xi,
1292                                          struct ocfs2_xattr_search *xs,
1293                                          struct ocfs2_xattr_set_ctxt *ctxt,
1294                                          size_t offs)
1295 {
1296         size_t name_len = strlen(xi->name);
1297         void *val = xs->base + offs;
1298         struct ocfs2_xattr_value_root *xv = NULL;
1299         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1300         int ret = 0;
1301
1302         memset(val, 0, size);
1303         memcpy(val, xi->name, name_len);
1304         xv = (struct ocfs2_xattr_value_root *)
1305                 (val + OCFS2_XATTR_SIZE(name_len));
1306         xv->xr_clusters = 0;
1307         xv->xr_last_eb_blk = 0;
1308         xv->xr_list.l_tree_depth = 0;
1309         xv->xr_list.l_count = cpu_to_le16(1);
1310         xv->xr_list.l_next_free_rec = 0;
1311
1312         ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1313                                          xi->value_len, ctxt);
1314         if (ret < 0) {
1315                 mlog_errno(ret);
1316                 return ret;
1317         }
1318         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
1319         if (ret < 0) {
1320                 mlog_errno(ret);
1321                 return ret;
1322         }
1323         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
1324                                               xi->value, xi->value_len);
1325         if (ret < 0)
1326                 mlog_errno(ret);
1327
1328         return ret;
1329 }
1330
1331 /*
1332  * ocfs2_xattr_set_entry_local()
1333  *
1334  * Set, replace or remove extended attribute in local.
1335  */
1336 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1337                                         struct ocfs2_xattr_info *xi,
1338                                         struct ocfs2_xattr_search *xs,
1339                                         struct ocfs2_xattr_entry *last,
1340                                         size_t min_offs)
1341 {
1342         size_t name_len = strlen(xi->name);
1343         int i;
1344
1345         if (xi->value && xs->not_found) {
1346                 /* Insert the new xattr entry. */
1347                 le16_add_cpu(&xs->header->xh_count, 1);
1348                 ocfs2_xattr_set_type(last, xi->name_index);
1349                 ocfs2_xattr_set_local(last, 1);
1350                 last->xe_name_len = name_len;
1351         } else {
1352                 void *first_val;
1353                 void *val;
1354                 size_t offs, size;
1355
1356                 first_val = xs->base + min_offs;
1357                 offs = le16_to_cpu(xs->here->xe_name_offset);
1358                 val = xs->base + offs;
1359
1360                 if (le64_to_cpu(xs->here->xe_value_size) >
1361                     OCFS2_XATTR_INLINE_SIZE)
1362                         size = OCFS2_XATTR_SIZE(name_len) +
1363                                 OCFS2_XATTR_ROOT_SIZE;
1364                 else
1365                         size = OCFS2_XATTR_SIZE(name_len) +
1366                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1367
1368                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1369                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1370                         /* The old and the new value have the
1371                            same size. Just replace the value. */
1372                         ocfs2_xattr_set_local(xs->here, 1);
1373                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1374                         /* Clear value bytes. */
1375                         memset(val + OCFS2_XATTR_SIZE(name_len),
1376                                0,
1377                                OCFS2_XATTR_SIZE(xi->value_len));
1378                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1379                                xi->value,
1380                                xi->value_len);
1381                         return;
1382                 }
1383                 /* Remove the old name+value. */
1384                 memmove(first_val + size, first_val, val - first_val);
1385                 memset(first_val, 0, size);
1386                 xs->here->xe_name_hash = 0;
1387                 xs->here->xe_name_offset = 0;
1388                 ocfs2_xattr_set_local(xs->here, 1);
1389                 xs->here->xe_value_size = 0;
1390
1391                 min_offs += size;
1392
1393                 /* Adjust all value offsets. */
1394                 last = xs->header->xh_entries;
1395                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1396                         size_t o = le16_to_cpu(last->xe_name_offset);
1397
1398                         if (o < offs)
1399                                 last->xe_name_offset = cpu_to_le16(o + size);
1400                         last += 1;
1401                 }
1402
1403                 if (!xi->value) {
1404                         /* Remove the old entry. */
1405                         last -= 1;
1406                         memmove(xs->here, xs->here + 1,
1407                                 (void *)last - (void *)xs->here);
1408                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1409                         le16_add_cpu(&xs->header->xh_count, -1);
1410                 }
1411         }
1412         if (xi->value) {
1413                 /* Insert the new name+value. */
1414                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1415                                 OCFS2_XATTR_SIZE(xi->value_len);
1416                 void *val = xs->base + min_offs - size;
1417
1418                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1419                 memset(val, 0, size);
1420                 memcpy(val, xi->name, name_len);
1421                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1422                        xi->value,
1423                        xi->value_len);
1424                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1425                 ocfs2_xattr_set_local(xs->here, 1);
1426                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1427         }
1428
1429         return;
1430 }
1431
1432 /*
1433  * ocfs2_xattr_set_entry()
1434  *
1435  * Set extended attribute entry into inode or block.
1436  *
1437  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1438  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1439  * then set value in B tree with set_value_outside().
1440  */
1441 static int ocfs2_xattr_set_entry(struct inode *inode,
1442                                  struct ocfs2_xattr_info *xi,
1443                                  struct ocfs2_xattr_search *xs,
1444                                  struct ocfs2_xattr_set_ctxt *ctxt,
1445                                  int flag)
1446 {
1447         struct ocfs2_xattr_entry *last;
1448         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1449         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1450         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1451         size_t size_l = 0;
1452         handle_t *handle = ctxt->handle;
1453         int free, i, ret;
1454         struct ocfs2_xattr_info xi_l = {
1455                 .name_index = xi->name_index,
1456                 .name = xi->name,
1457                 .value = xi->value,
1458                 .value_len = xi->value_len,
1459         };
1460
1461         /* Compute min_offs, last and free space. */
1462         last = xs->header->xh_entries;
1463
1464         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1465                 size_t offs = le16_to_cpu(last->xe_name_offset);
1466                 if (offs < min_offs)
1467                         min_offs = offs;
1468                 last += 1;
1469         }
1470
1471         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1472         if (free < 0)
1473                 return -EIO;
1474
1475         if (!xs->not_found) {
1476                 size_t size = 0;
1477                 if (ocfs2_xattr_is_local(xs->here))
1478                         size = OCFS2_XATTR_SIZE(name_len) +
1479                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1480                 else
1481                         size = OCFS2_XATTR_SIZE(name_len) +
1482                                 OCFS2_XATTR_ROOT_SIZE;
1483                 free += (size + sizeof(struct ocfs2_xattr_entry));
1484         }
1485         /* Check free space in inode or block */
1486         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1487                 if (free < sizeof(struct ocfs2_xattr_entry) +
1488                            OCFS2_XATTR_SIZE(name_len) +
1489                            OCFS2_XATTR_ROOT_SIZE) {
1490                         ret = -ENOSPC;
1491                         goto out;
1492                 }
1493                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1494                 xi_l.value = (void *)&def_xv;
1495                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1496         } else if (xi->value) {
1497                 if (free < sizeof(struct ocfs2_xattr_entry) +
1498                            OCFS2_XATTR_SIZE(name_len) +
1499                            OCFS2_XATTR_SIZE(xi->value_len)) {
1500                         ret = -ENOSPC;
1501                         goto out;
1502                 }
1503         }
1504
1505         if (!xs->not_found) {
1506                 /* For existing extended attribute */
1507                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1508                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1509                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1510                 void *val = xs->base + offs;
1511
1512                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1513                         /* Replace existing local xattr with tree root */
1514                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1515                                                             ctxt, offs);
1516                         if (ret < 0)
1517                                 mlog_errno(ret);
1518                         goto out;
1519                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1520                         /* For existing xattr which has value outside */
1521                         struct ocfs2_xattr_value_root *xv = NULL;
1522                         xv = (struct ocfs2_xattr_value_root *)(val +
1523                                 OCFS2_XATTR_SIZE(name_len));
1524
1525                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1526                                 /*
1527                                  * If new value need set outside also,
1528                                  * first truncate old value to new value,
1529                                  * then set new value with set_value_outside().
1530                                  */
1531                                 ret = ocfs2_xattr_value_truncate(inode,
1532                                                                  xs->xattr_bh,
1533                                                                  xv,
1534                                                                  xi->value_len,
1535                                                                  ctxt);
1536                                 if (ret < 0) {
1537                                         mlog_errno(ret);
1538                                         goto out;
1539                                 }
1540
1541                                 ret = ocfs2_xattr_update_entry(inode,
1542                                                                handle,
1543                                                                xi,
1544                                                                xs,
1545                                                                offs);
1546                                 if (ret < 0) {
1547                                         mlog_errno(ret);
1548                                         goto out;
1549                                 }
1550
1551                                 ret = __ocfs2_xattr_set_value_outside(inode,
1552                                                                 handle,
1553                                                                 xv,
1554                                                                 xi->value,
1555                                                                 xi->value_len);
1556                                 if (ret < 0)
1557                                         mlog_errno(ret);
1558                                 goto out;
1559                         } else {
1560                                 /*
1561                                  * If new value need set in local,
1562                                  * just trucate old value to zero.
1563                                  */
1564                                  ret = ocfs2_xattr_value_truncate(inode,
1565                                                                   xs->xattr_bh,
1566                                                                   xv,
1567                                                                   0,
1568                                                                   ctxt);
1569                                 if (ret < 0)
1570                                         mlog_errno(ret);
1571                         }
1572                 }
1573         }
1574
1575         ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1576                                    OCFS2_JOURNAL_ACCESS_WRITE);
1577         if (ret) {
1578                 mlog_errno(ret);
1579                 goto out;
1580         }
1581
1582         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1583                 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1584                                            OCFS2_JOURNAL_ACCESS_WRITE);
1585                 if (ret) {
1586                         mlog_errno(ret);
1587                         goto out;
1588                 }
1589         }
1590
1591         /*
1592          * Set value in local, include set tree root in local.
1593          * This is the first step for value size >INLINE_SIZE.
1594          */
1595         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1596
1597         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1598                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1599                 if (ret < 0) {
1600                         mlog_errno(ret);
1601                         goto out;
1602                 }
1603         }
1604
1605         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1606             (flag & OCFS2_INLINE_XATTR_FL)) {
1607                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1608                 unsigned int xattrsize = osb->s_xattr_inline_size;
1609
1610                 /*
1611                  * Adjust extent record count or inline data size
1612                  * to reserve space for extended attribute.
1613                  */
1614                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1615                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1616                         le16_add_cpu(&idata->id_count, -xattrsize);
1617                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1618                         struct ocfs2_extent_list *el = &di->id2.i_list;
1619                         le16_add_cpu(&el->l_count, -(xattrsize /
1620                                         sizeof(struct ocfs2_extent_rec)));
1621                 }
1622                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1623         }
1624         /* Update xattr flag */
1625         spin_lock(&oi->ip_lock);
1626         oi->ip_dyn_features |= flag;
1627         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1628         spin_unlock(&oi->ip_lock);
1629         /* Update inode ctime */
1630         inode->i_ctime = CURRENT_TIME;
1631         di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1632         di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1633
1634         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1635         if (ret < 0)
1636                 mlog_errno(ret);
1637
1638         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1639                 /*
1640                  * Set value outside in B tree.
1641                  * This is the second step for value size > INLINE_SIZE.
1642                  */
1643                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1644                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
1645                 if (ret < 0) {
1646                         int ret2;
1647
1648                         mlog_errno(ret);
1649                         /*
1650                          * If set value outside failed, we have to clean
1651                          * the junk tree root we have already set in local.
1652                          */
1653                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1654                                                    xi, xs, offs);
1655                         if (ret2 < 0)
1656                                 mlog_errno(ret2);
1657                 }
1658         }
1659 out:
1660         return ret;
1661 }
1662
1663 static int ocfs2_remove_value_outside(struct inode*inode,
1664                                       struct buffer_head *bh,
1665                                       struct ocfs2_xattr_header *header)
1666 {
1667         int ret = 0, i;
1668         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1669         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1670
1671         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1672
1673         ctxt.handle = ocfs2_start_trans(osb,
1674                                         ocfs2_remove_extent_credits(osb->sb));
1675         if (IS_ERR(ctxt.handle)) {
1676                 ret = PTR_ERR(ctxt.handle);
1677                 mlog_errno(ret);
1678                 goto out;
1679         }
1680
1681         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1682                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1683
1684                 if (!ocfs2_xattr_is_local(entry)) {
1685                         struct ocfs2_xattr_value_root *xv;
1686                         void *val;
1687
1688                         val = (void *)header +
1689                                 le16_to_cpu(entry->xe_name_offset);
1690                         xv = (struct ocfs2_xattr_value_root *)
1691                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1692                         ret = ocfs2_xattr_value_truncate(inode, bh, xv,
1693                                                          0, &ctxt);
1694                         if (ret < 0) {
1695                                 mlog_errno(ret);
1696                                 break;
1697                         }
1698                 }
1699         }
1700
1701         ocfs2_commit_trans(osb, ctxt.handle);
1702         ocfs2_schedule_truncate_log_flush(osb, 1);
1703         ocfs2_run_deallocs(osb, &ctxt.dealloc);
1704 out:
1705         return ret;
1706 }
1707
1708 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1709                                     struct buffer_head *di_bh)
1710 {
1711
1712         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1713         struct ocfs2_xattr_header *header;
1714         int ret;
1715
1716         header = (struct ocfs2_xattr_header *)
1717                  ((void *)di + inode->i_sb->s_blocksize -
1718                  le16_to_cpu(di->i_xattr_inline_size));
1719
1720         ret = ocfs2_remove_value_outside(inode, di_bh, header);
1721
1722         return ret;
1723 }
1724
1725 static int ocfs2_xattr_block_remove(struct inode *inode,
1726                                     struct buffer_head *blk_bh)
1727 {
1728         struct ocfs2_xattr_block *xb;
1729         int ret = 0;
1730
1731         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1732         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1733                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1734                 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1735         } else
1736                 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1737
1738         return ret;
1739 }
1740
1741 static int ocfs2_xattr_free_block(struct inode *inode,
1742                                   u64 block)
1743 {
1744         struct inode *xb_alloc_inode;
1745         struct buffer_head *xb_alloc_bh = NULL;
1746         struct buffer_head *blk_bh = NULL;
1747         struct ocfs2_xattr_block *xb;
1748         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1749         handle_t *handle;
1750         int ret = 0;
1751         u64 blk, bg_blkno;
1752         u16 bit;
1753
1754         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1755         if (ret < 0) {
1756                 mlog_errno(ret);
1757                 goto out;
1758         }
1759
1760         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1761         if (ret < 0) {
1762                 mlog_errno(ret);
1763                 goto out;
1764         }
1765
1766         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1767         blk = le64_to_cpu(xb->xb_blkno);
1768         bit = le16_to_cpu(xb->xb_suballoc_bit);
1769         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1770
1771         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1772                                 EXTENT_ALLOC_SYSTEM_INODE,
1773                                 le16_to_cpu(xb->xb_suballoc_slot));
1774         if (!xb_alloc_inode) {
1775                 ret = -ENOMEM;
1776                 mlog_errno(ret);
1777                 goto out;
1778         }
1779         mutex_lock(&xb_alloc_inode->i_mutex);
1780
1781         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1782         if (ret < 0) {
1783                 mlog_errno(ret);
1784                 goto out_mutex;
1785         }
1786
1787         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1788         if (IS_ERR(handle)) {
1789                 ret = PTR_ERR(handle);
1790                 mlog_errno(ret);
1791                 goto out_unlock;
1792         }
1793
1794         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1795                                        bit, bg_blkno, 1);
1796         if (ret < 0)
1797                 mlog_errno(ret);
1798
1799         ocfs2_commit_trans(osb, handle);
1800 out_unlock:
1801         ocfs2_inode_unlock(xb_alloc_inode, 1);
1802         brelse(xb_alloc_bh);
1803 out_mutex:
1804         mutex_unlock(&xb_alloc_inode->i_mutex);
1805         iput(xb_alloc_inode);
1806 out:
1807         brelse(blk_bh);
1808         return ret;
1809 }
1810
1811 /*
1812  * ocfs2_xattr_remove()
1813  *
1814  * Free extended attribute resources associated with this inode.
1815  */
1816 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1817 {
1818         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1819         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1820         handle_t *handle;
1821         int ret;
1822
1823         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1824                 return 0;
1825
1826         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1827                 return 0;
1828
1829         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1830                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1831                 if (ret < 0) {
1832                         mlog_errno(ret);
1833                         goto out;
1834                 }
1835         }
1836
1837         if (di->i_xattr_loc) {
1838                 ret = ocfs2_xattr_free_block(inode,
1839                                              le64_to_cpu(di->i_xattr_loc));
1840                 if (ret < 0) {
1841                         mlog_errno(ret);
1842                         goto out;
1843                 }
1844         }
1845
1846         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1847                                    OCFS2_INODE_UPDATE_CREDITS);
1848         if (IS_ERR(handle)) {
1849                 ret = PTR_ERR(handle);
1850                 mlog_errno(ret);
1851                 goto out;
1852         }
1853         ret = ocfs2_journal_access(handle, inode, di_bh,
1854                                    OCFS2_JOURNAL_ACCESS_WRITE);
1855         if (ret) {
1856                 mlog_errno(ret);
1857                 goto out_commit;
1858         }
1859
1860         di->i_xattr_loc = 0;
1861
1862         spin_lock(&oi->ip_lock);
1863         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1864         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1865         spin_unlock(&oi->ip_lock);
1866
1867         ret = ocfs2_journal_dirty(handle, di_bh);
1868         if (ret < 0)
1869                 mlog_errno(ret);
1870 out_commit:
1871         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1872 out:
1873         return ret;
1874 }
1875
1876 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1877                                         struct ocfs2_dinode *di)
1878 {
1879         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1880         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1881         int free;
1882
1883         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1884                 return 0;
1885
1886         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1887                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1888                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1889         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1890                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1891                         le64_to_cpu(di->i_size);
1892         } else {
1893                 struct ocfs2_extent_list *el = &di->id2.i_list;
1894                 free = (le16_to_cpu(el->l_count) -
1895                         le16_to_cpu(el->l_next_free_rec)) *
1896                         sizeof(struct ocfs2_extent_rec);
1897         }
1898         if (free >= xattrsize)
1899                 return 1;
1900
1901         return 0;
1902 }
1903
1904 /*
1905  * ocfs2_xattr_ibody_find()
1906  *
1907  * Find extended attribute in inode block and
1908  * fill search info into struct ocfs2_xattr_search.
1909  */
1910 static int ocfs2_xattr_ibody_find(struct inode *inode,
1911                                   int name_index,
1912                                   const char *name,
1913                                   struct ocfs2_xattr_search *xs)
1914 {
1915         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1916         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1917         int ret;
1918         int has_space = 0;
1919
1920         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1921                 return 0;
1922
1923         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1924                 down_read(&oi->ip_alloc_sem);
1925                 has_space = ocfs2_xattr_has_space_inline(inode, di);
1926                 up_read(&oi->ip_alloc_sem);
1927                 if (!has_space)
1928                         return 0;
1929         }
1930
1931         xs->xattr_bh = xs->inode_bh;
1932         xs->end = (void *)di + inode->i_sb->s_blocksize;
1933         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1934                 xs->header = (struct ocfs2_xattr_header *)
1935                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1936         else
1937                 xs->header = (struct ocfs2_xattr_header *)
1938                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1939         xs->base = (void *)xs->header;
1940         xs->here = xs->header->xh_entries;
1941
1942         /* Find the named attribute. */
1943         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1944                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1945                 if (ret && ret != -ENODATA)
1946                         return ret;
1947                 xs->not_found = ret;
1948         }
1949
1950         return 0;
1951 }
1952
1953 /*
1954  * ocfs2_xattr_ibody_set()
1955  *
1956  * Set, replace or remove an extended attribute into inode block.
1957  *
1958  */
1959 static int ocfs2_xattr_ibody_set(struct inode *inode,
1960                                  struct ocfs2_xattr_info *xi,
1961                                  struct ocfs2_xattr_search *xs,
1962                                  struct ocfs2_xattr_set_ctxt *ctxt)
1963 {
1964         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1965         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1966         int ret;
1967
1968         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1969                 return -ENOSPC;
1970
1971         down_write(&oi->ip_alloc_sem);
1972         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1973                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1974                         ret = -ENOSPC;
1975                         goto out;
1976                 }
1977         }
1978
1979         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1980                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1981 out:
1982         up_write(&oi->ip_alloc_sem);
1983
1984         return ret;
1985 }
1986
1987 /*
1988  * ocfs2_xattr_block_find()
1989  *
1990  * Find extended attribute in external block and
1991  * fill search info into struct ocfs2_xattr_search.
1992  */
1993 static int ocfs2_xattr_block_find(struct inode *inode,
1994                                   int name_index,
1995                                   const char *name,
1996                                   struct ocfs2_xattr_search *xs)
1997 {
1998         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1999         struct buffer_head *blk_bh = NULL;
2000         struct ocfs2_xattr_block *xb;
2001         int ret = 0;
2002
2003         if (!di->i_xattr_loc)
2004                 return ret;
2005
2006         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2007                                      &blk_bh);
2008         if (ret < 0) {
2009                 mlog_errno(ret);
2010                 return ret;
2011         }
2012
2013         xs->xattr_bh = blk_bh;
2014         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2015
2016         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2017                 xs->header = &xb->xb_attrs.xb_header;
2018                 xs->base = (void *)xs->header;
2019                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2020                 xs->here = xs->header->xh_entries;
2021
2022                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2023         } else
2024                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2025                                                    name_index,
2026                                                    name, xs);
2027
2028         if (ret && ret != -ENODATA) {
2029                 xs->xattr_bh = NULL;
2030                 goto cleanup;
2031         }
2032         xs->not_found = ret;
2033         return 0;
2034 cleanup:
2035         brelse(blk_bh);
2036
2037         return ret;
2038 }
2039
2040 /*
2041  * ocfs2_xattr_block_set()
2042  *
2043  * Set, replace or remove an extended attribute into external block.
2044  *
2045  */
2046 static int ocfs2_xattr_block_set(struct inode *inode,
2047                                  struct ocfs2_xattr_info *xi,
2048                                  struct ocfs2_xattr_search *xs,
2049                                  struct ocfs2_xattr_set_ctxt *ctxt)
2050 {
2051         struct buffer_head *new_bh = NULL;
2052         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2053         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
2054         handle_t *handle = ctxt->handle;
2055         struct ocfs2_xattr_block *xblk = NULL;
2056         u16 suballoc_bit_start;
2057         u32 num_got;
2058         u64 first_blkno;
2059         int ret;
2060
2061         if (!xs->xattr_bh) {
2062                 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
2063                                            OCFS2_JOURNAL_ACCESS_CREATE);
2064                 if (ret < 0) {
2065                         mlog_errno(ret);
2066                         goto end;
2067                 }
2068
2069                 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
2070                                            &suballoc_bit_start, &num_got,
2071                                            &first_blkno);
2072                 if (ret < 0) {
2073                         mlog_errno(ret);
2074                         goto end;
2075                 }
2076
2077                 new_bh = sb_getblk(inode->i_sb, first_blkno);
2078                 ocfs2_set_new_buffer_uptodate(inode, new_bh);
2079
2080                 ret = ocfs2_journal_access(handle, inode, new_bh,
2081                                            OCFS2_JOURNAL_ACCESS_CREATE);
2082                 if (ret < 0) {
2083                         mlog_errno(ret);
2084                         goto end;
2085                 }
2086
2087                 /* Initialize ocfs2_xattr_block */
2088                 xs->xattr_bh = new_bh;
2089                 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2090                 memset(xblk, 0, inode->i_sb->s_blocksize);
2091                 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2092                 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2093                 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2094                 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2095                 xblk->xb_blkno = cpu_to_le64(first_blkno);
2096
2097                 xs->header = &xblk->xb_attrs.xb_header;
2098                 xs->base = (void *)xs->header;
2099                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2100                 xs->here = xs->header->xh_entries;
2101
2102                 ret = ocfs2_journal_dirty(handle, new_bh);
2103                 if (ret < 0) {
2104                         mlog_errno(ret);
2105                         goto end;
2106                 }
2107                 di->i_xattr_loc = cpu_to_le64(first_blkno);
2108                 ocfs2_journal_dirty(handle, xs->inode_bh);
2109         } else
2110                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2111
2112         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2113                 /* Set extended attribute into external block */
2114                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2115                                             OCFS2_HAS_XATTR_FL);
2116                 if (!ret || ret != -ENOSPC)
2117                         goto end;
2118
2119                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2120                 if (ret)
2121                         goto end;
2122         }
2123
2124         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2125
2126 end:
2127
2128         return ret;
2129 }
2130
2131 /* Check whether the new xattr can be inserted into the inode. */
2132 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2133                                        struct ocfs2_xattr_info *xi,
2134                                        struct ocfs2_xattr_search *xs)
2135 {
2136         u64 value_size;
2137         struct ocfs2_xattr_entry *last;
2138         int free, i;
2139         size_t min_offs = xs->end - xs->base;
2140
2141         if (!xs->header)
2142                 return 0;
2143
2144         last = xs->header->xh_entries;
2145
2146         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2147                 size_t offs = le16_to_cpu(last->xe_name_offset);
2148                 if (offs < min_offs)
2149                         min_offs = offs;
2150                 last += 1;
2151         }
2152
2153         free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
2154         if (free < 0)
2155                 return 0;
2156
2157         BUG_ON(!xs->not_found);
2158
2159         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2160                 value_size = OCFS2_XATTR_ROOT_SIZE;
2161         else
2162                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2163
2164         if (free >= sizeof(struct ocfs2_xattr_entry) +
2165                    OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2166                 return 1;
2167
2168         return 0;
2169 }
2170
2171 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2172                                      struct ocfs2_dinode *di,
2173                                      struct ocfs2_xattr_info *xi,
2174                                      struct ocfs2_xattr_search *xis,
2175                                      struct ocfs2_xattr_search *xbs,
2176                                      int *clusters_need,
2177                                      int *meta_need,
2178                                      int *credits_need)
2179 {
2180         int ret = 0, old_in_xb = 0;
2181         int clusters_add = 0, meta_add = 0, credits = 0;
2182         struct buffer_head *bh = NULL;
2183         struct ocfs2_xattr_block *xb = NULL;
2184         struct ocfs2_xattr_entry *xe = NULL;
2185         struct ocfs2_xattr_value_root *xv = NULL;
2186         char *base = NULL;
2187         int name_offset, name_len = 0;
2188         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2189                                                     xi->value_len);
2190         u64 value_size;
2191
2192         if (xis->not_found && xbs->not_found) {
2193                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2194
2195                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2196                         clusters_add += new_clusters;
2197                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2198                                                         &def_xv.xv.xr_list,
2199                                                         new_clusters);
2200                 }
2201
2202                 goto meta_guess;
2203         }
2204
2205         if (!xis->not_found) {
2206                 xe = xis->here;
2207                 name_offset = le16_to_cpu(xe->xe_name_offset);
2208                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2209                 base = xis->base;
2210                 credits += OCFS2_INODE_UPDATE_CREDITS;
2211         } else {
2212                 int i, block_off = 0;
2213                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2214                 xe = xbs->here;
2215                 name_offset = le16_to_cpu(xe->xe_name_offset);
2216                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2217                 i = xbs->here - xbs->header->xh_entries;
2218                 old_in_xb = 1;
2219
2220                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2221                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2222                                                         bucket_xh(xbs->bucket),
2223                                                         i, &block_off,
2224                                                         &name_offset);
2225                         base = bucket_block(xbs->bucket, block_off);
2226                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2227                 } else {
2228                         base = xbs->base;
2229                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2230                 }
2231         }
2232
2233         /*
2234          * delete a xattr doesn't need metadata and cluster allocation.
2235          * so just calculate the credits and return.
2236          *
2237          * The credits for removing the value tree will be extended
2238          * by ocfs2_remove_extent itself.
2239          */
2240         if (!xi->value) {
2241                 if (!ocfs2_xattr_is_local(xe))
2242                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2243
2244                 goto out;
2245         }
2246
2247         /* do cluster allocation guess first. */
2248         value_size = le64_to_cpu(xe->xe_value_size);
2249
2250         if (old_in_xb) {
2251                 /*
2252                  * In xattr set, we always try to set the xe in inode first,
2253                  * so if it can be inserted into inode successfully, the old
2254                  * one will be removed from the xattr block, and this xattr
2255                  * will be inserted into inode as a new xattr in inode.
2256                  */
2257                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2258                         clusters_add += new_clusters;
2259                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
2260                                     OCFS2_INODE_UPDATE_CREDITS;
2261                         if (!ocfs2_xattr_is_local(xe))
2262                                 credits += ocfs2_calc_extend_credits(
2263                                                         inode->i_sb,
2264                                                         &def_xv.xv.xr_list,
2265                                                         new_clusters);
2266                         goto out;
2267                 }
2268         }
2269
2270         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2271                 /* the new values will be stored outside. */
2272                 u32 old_clusters = 0;
2273
2274                 if (!ocfs2_xattr_is_local(xe)) {
2275                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2276                                                                  value_size);
2277                         xv = (struct ocfs2_xattr_value_root *)
2278                              (base + name_offset + name_len);
2279                         value_size = OCFS2_XATTR_ROOT_SIZE;
2280                 } else
2281                         xv = &def_xv.xv;
2282
2283                 if (old_clusters >= new_clusters) {
2284                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2285                         goto out;
2286                 } else {
2287                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2288                         clusters_add += new_clusters - old_clusters;
2289                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2290                                                              &xv->xr_list,
2291                                                              new_clusters -
2292                                                              old_clusters);
2293                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2294                                 goto out;
2295                 }
2296         } else {
2297                 /*
2298                  * Now the new value will be stored inside. So if the new
2299                  * value is smaller than the size of value root or the old
2300                  * value, we don't need any allocation, otherwise we have
2301                  * to guess metadata allocation.
2302                  */
2303                 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2304                     (!ocfs2_xattr_is_local(xe) &&
2305                      OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2306                         goto out;
2307         }
2308
2309 meta_guess:
2310         /* calculate metadata allocation. */
2311         if (di->i_xattr_loc) {
2312                 if (!xbs->xattr_bh) {
2313                         ret = ocfs2_read_xattr_block(inode,
2314                                                      le64_to_cpu(di->i_xattr_loc),
2315                                                      &bh);
2316                         if (ret) {
2317                                 mlog_errno(ret);
2318                                 goto out;
2319                         }
2320
2321                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2322                 } else
2323                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2324
2325                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2326                         struct ocfs2_extent_list *el =
2327                                  &xb->xb_attrs.xb_root.xt_list;
2328                         meta_add += ocfs2_extend_meta_needed(el);
2329                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2330                                                              el, 1);
2331                 }
2332
2333                 /*
2334                  * This cluster will be used either for new bucket or for
2335                  * new xattr block.
2336                  * If the cluster size is the same as the bucket size, one
2337                  * more is needed since we may need to extend the bucket
2338                  * also.
2339                  */
2340                 clusters_add += 1;
2341                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2342                 if (OCFS2_XATTR_BUCKET_SIZE ==
2343                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2344                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2345                         clusters_add += 1;
2346                 }
2347         } else {
2348                 meta_add += 1;
2349                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2350         }
2351 out:
2352         if (clusters_need)
2353                 *clusters_need = clusters_add;
2354         if (meta_need)
2355                 *meta_need = meta_add;
2356         if (credits_need)
2357                 *credits_need = credits;
2358         brelse(bh);
2359         return ret;
2360 }
2361
2362 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2363                                      struct ocfs2_dinode *di,
2364                                      struct ocfs2_xattr_info *xi,
2365                                      struct ocfs2_xattr_search *xis,
2366                                      struct ocfs2_xattr_search *xbs,
2367                                      struct ocfs2_xattr_set_ctxt *ctxt,
2368                                      int *credits)
2369 {
2370         int clusters_add, meta_add, ret;
2371         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2372
2373         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2374
2375         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2376
2377         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2378                                         &clusters_add, &meta_add, credits);
2379         if (ret) {
2380                 mlog_errno(ret);
2381                 return ret;
2382         }
2383
2384         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2385              "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2386
2387         if (meta_add) {
2388                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2389                                                         &ctxt->meta_ac);
2390                 if (ret) {
2391                         mlog_errno(ret);
2392                         goto out;
2393                 }
2394         }
2395
2396         if (clusters_add) {
2397                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2398                 if (ret)
2399                         mlog_errno(ret);
2400         }
2401 out:
2402         if (ret) {
2403                 if (ctxt->meta_ac) {
2404                         ocfs2_free_alloc_context(ctxt->meta_ac);
2405                         ctxt->meta_ac = NULL;
2406                 }
2407
2408                 /*
2409                  * We cannot have an error and a non null ctxt->data_ac.
2410                  */
2411         }
2412
2413         return ret;
2414 }
2415
2416 static int __ocfs2_xattr_set_handle(struct inode *inode,
2417                                     struct ocfs2_dinode *di,
2418                                     struct ocfs2_xattr_info *xi,
2419                                     struct ocfs2_xattr_search *xis,
2420                                     struct ocfs2_xattr_search *xbs,
2421                                     struct ocfs2_xattr_set_ctxt *ctxt)
2422 {
2423         int ret = 0, credits, old_found;
2424
2425         if (!xi->value) {
2426                 /* Remove existing extended attribute */
2427                 if (!xis->not_found)
2428                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2429                 else if (!xbs->not_found)
2430                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2431         } else {
2432                 /* We always try to set extended attribute into inode first*/
2433                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2434                 if (!ret && !xbs->not_found) {
2435                         /*
2436                          * If succeed and that extended attribute existing in
2437                          * external block, then we will remove it.
2438                          */
2439                         xi->value = NULL;
2440                         xi->value_len = 0;
2441
2442                         old_found = xis->not_found;
2443                         xis->not_found = -ENODATA;
2444                         ret = ocfs2_calc_xattr_set_need(inode,
2445                                                         di,
2446                                                         xi,
2447                                                         xis,
2448                                                         xbs,
2449                                                         NULL,
2450                                                         NULL,
2451                                                         &credits);
2452                         xis->not_found = old_found;
2453                         if (ret) {
2454                                 mlog_errno(ret);
2455                                 goto out;
2456                         }
2457
2458                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2459                                         ctxt->handle->h_buffer_credits);
2460                         if (ret) {
2461                                 mlog_errno(ret);
2462                                 goto out;
2463                         }
2464                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2465                 } else if (ret == -ENOSPC) {
2466                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2467                                 ret = ocfs2_xattr_block_find(inode,
2468                                                              xi->name_index,
2469                                                              xi->name, xbs);
2470                                 if (ret)
2471                                         goto out;
2472
2473                                 old_found = xis->not_found;
2474                                 xis->not_found = -ENODATA;
2475                                 ret = ocfs2_calc_xattr_set_need(inode,
2476                                                                 di,
2477                                                                 xi,
2478                                                                 xis,
2479                                                                 xbs,
2480                                                                 NULL,
2481                                                                 NULL,
2482                                                                 &credits);
2483                                 xis->not_found = old_found;
2484                                 if (ret) {
2485                                         mlog_errno(ret);
2486                                         goto out;
2487                                 }
2488
2489                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2490                                         ctxt->handle->h_buffer_credits);
2491                                 if (ret) {
2492                                         mlog_errno(ret);
2493                                         goto out;
2494                                 }
2495                         }
2496                         /*
2497                          * If no space in inode, we will set extended attribute
2498                          * into external block.
2499                          */
2500                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2501                         if (ret)
2502                                 goto out;
2503                         if (!xis->not_found) {
2504                                 /*
2505                                  * If succeed and that extended attribute
2506                                  * existing in inode, we will remove it.
2507                                  */
2508                                 xi->value = NULL;
2509                                 xi->value_len = 0;
2510                                 xbs->not_found = -ENODATA;
2511                                 ret = ocfs2_calc_xattr_set_need(inode,
2512                                                                 di,
2513                                                                 xi,
2514                                                                 xis,
2515                                                                 xbs,
2516                                                                 NULL,
2517                                                                 NULL,
2518                                                                 &credits);
2519                                 if (ret) {
2520                                         mlog_errno(ret);
2521                                         goto out;
2522                                 }
2523
2524                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2525                                                 ctxt->handle->h_buffer_credits);
2526                                 if (ret) {
2527                                         mlog_errno(ret);
2528                                         goto out;
2529                                 }
2530                                 ret = ocfs2_xattr_ibody_set(inode, xi,
2531                                                             xis, ctxt);
2532                         }
2533                 }
2534         }
2535
2536 out:
2537         return ret;
2538 }
2539
2540 /*
2541  * This function only called duing creating inode
2542  * for init security/acl xattrs of the new inode.
2543  * The xattrs could be put into ibody or extent block,
2544  * xattr bucket would not be use in this case.
2545  * transanction credits also be reserved in here.
2546  */
2547 int ocfs2_xattr_set_handle(handle_t *handle,
2548                            struct inode *inode,
2549                            struct buffer_head *di_bh,
2550                            int name_index,
2551                            const char *name,
2552                            const void *value,
2553                            size_t value_len,
2554                            int flags,
2555                            struct ocfs2_alloc_context *meta_ac,
2556                            struct ocfs2_alloc_context *data_ac)
2557 {
2558         struct ocfs2_dinode *di;
2559         int ret;
2560
2561         struct ocfs2_xattr_info xi = {
2562                 .name_index = name_index,
2563                 .name = name,
2564                 .value = value,
2565                 .value_len = value_len,
2566         };
2567
2568         struct ocfs2_xattr_search xis = {
2569                 .not_found = -ENODATA,
2570         };
2571
2572         struct ocfs2_xattr_search xbs = {
2573                 .not_found = -ENODATA,
2574         };
2575
2576         struct ocfs2_xattr_set_ctxt ctxt = {
2577                 .handle = handle,
2578                 .meta_ac = meta_ac,
2579                 .data_ac = data_ac,
2580         };
2581
2582         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2583                 return -EOPNOTSUPP;
2584
2585         xis.inode_bh = xbs.inode_bh = di_bh;
2586         di = (struct ocfs2_dinode *)di_bh->b_data;
2587
2588         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2589
2590         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2591         if (ret)
2592                 goto cleanup;
2593         if (xis.not_found) {
2594                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2595                 if (ret)
2596                         goto cleanup;
2597         }
2598
2599         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2600
2601 cleanup:
2602         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2603         brelse(xbs.xattr_bh);
2604
2605         return ret;
2606 }
2607
2608 /*
2609  * ocfs2_xattr_set()
2610  *
2611  * Set, replace or remove an extended attribute for this inode.
2612  * value is NULL to remove an existing extended attribute, else either
2613  * create or replace an extended attribute.
2614  */
2615 int ocfs2_xattr_set(struct inode *inode,
2616                     int name_index,
2617                     const char *name,
2618                     const void *value,
2619                     size_t value_len,
2620                     int flags)
2621 {
2622         struct buffer_head *di_bh = NULL;
2623         struct ocfs2_dinode *di;
2624         int ret, credits;
2625         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2626         struct inode *tl_inode = osb->osb_tl_inode;
2627         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2628
2629         struct ocfs2_xattr_info xi = {
2630                 .name_index = name_index,
2631                 .name = name,
2632                 .value = value,
2633                 .value_len = value_len,
2634         };
2635
2636         struct ocfs2_xattr_search xis = {
2637                 .not_found = -ENODATA,
2638         };
2639
2640         struct ocfs2_xattr_search xbs = {
2641                 .not_found = -ENODATA,
2642         };
2643
2644         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2645                 return -EOPNOTSUPP;
2646
2647         /*
2648          * Only xbs will be used on indexed trees.  xis doesn't need a
2649          * bucket.
2650          */
2651         xbs.bucket = ocfs2_xattr_bucket_new(inode);
2652         if (!xbs.bucket) {
2653                 mlog_errno(-ENOMEM);
2654                 return -ENOMEM;
2655         }
2656
2657         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2658         if (ret < 0) {
2659                 mlog_errno(ret);
2660                 goto cleanup_nolock;
2661         }
2662         xis.inode_bh = xbs.inode_bh = di_bh;
2663         di = (struct ocfs2_dinode *)di_bh->b_data;
2664
2665         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2666         /*
2667          * Scan inode and external block to find the same name
2668          * extended attribute and collect search infomation.
2669          */
2670         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2671         if (ret)
2672                 goto cleanup;
2673         if (xis.not_found) {
2674                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2675                 if (ret)
2676                         goto cleanup;
2677         }
2678
2679         if (xis.not_found && xbs.not_found) {
2680                 ret = -ENODATA;
2681                 if (flags & XATTR_REPLACE)
2682                         goto cleanup;
2683                 ret = 0;
2684                 if (!value)
2685                         goto cleanup;
2686         } else {
2687                 ret = -EEXIST;
2688                 if (flags & XATTR_CREATE)
2689                         goto cleanup;
2690         }
2691
2692
2693         mutex_lock(&tl_inode->i_mutex);
2694
2695         if (ocfs2_truncate_log_needs_flush(osb)) {
2696                 ret = __ocfs2_flush_truncate_log(osb);
2697                 if (ret < 0) {
2698                         mutex_unlock(&tl_inode->i_mutex);
2699                         mlog_errno(ret);
2700                         goto cleanup;
2701                 }
2702         }
2703         mutex_unlock(&tl_inode->i_mutex);
2704
2705         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2706                                         &xbs, &ctxt, &credits);
2707         if (ret) {
2708                 mlog_errno(ret);
2709                 goto cleanup;
2710         }
2711
2712         ctxt.handle = ocfs2_start_trans(osb, credits);
2713         if (IS_ERR(ctxt.handle)) {
2714                 ret = PTR_ERR(ctxt.handle);
2715                 mlog_errno(ret);
2716                 goto cleanup;
2717         }
2718
2719         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2720
2721         ocfs2_commit_trans(osb, ctxt.handle);
2722
2723         if (ctxt.data_ac)
2724                 ocfs2_free_alloc_context(ctxt.data_ac);
2725         if (ctxt.meta_ac)
2726                 ocfs2_free_alloc_context(ctxt.meta_ac);
2727         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2728                 ocfs2_schedule_truncate_log_flush(osb, 1);
2729         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2730 cleanup:
2731         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2732         ocfs2_inode_unlock(inode, 1);
2733 cleanup_nolock:
2734         brelse(di_bh);
2735         brelse(xbs.xattr_bh);
2736         ocfs2_xattr_bucket_free(xbs.bucket);
2737
2738         return ret;
2739 }
2740
2741 /*
2742  * Find the xattr extent rec which may contains name_hash.
2743  * e_cpos will be the first name hash of the xattr rec.
2744  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2745  */
2746 static int ocfs2_xattr_get_rec(struct inode *inode,
2747                                u32 name_hash,
2748                                u64 *p_blkno,
2749                                u32 *e_cpos,
2750                                u32 *num_clusters,
2751                                struct ocfs2_extent_list *el)
2752 {
2753         int ret = 0, i;
2754         struct buffer_head *eb_bh = NULL;
2755         struct ocfs2_extent_block *eb;
2756         struct ocfs2_extent_rec *rec = NULL;
2757         u64 e_blkno = 0;
2758
2759         if (el->l_tree_depth) {
2760                 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2761                 if (ret) {
2762                         mlog_errno(ret);
2763                         goto out;
2764                 }
2765
2766                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2767                 el = &eb->h_list;
2768
2769                 if (el->l_tree_depth) {
2770                         ocfs2_error(inode->i_sb,
2771                                     "Inode %lu has non zero tree depth in "
2772                                     "xattr tree block %llu\n", inode->i_ino,
2773                                     (unsigned long long)eb_bh->b_blocknr);
2774                         ret = -EROFS;
2775                         goto out;
2776                 }
2777         }
2778
2779         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2780                 rec = &el->l_recs[i];
2781
2782                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2783                         e_blkno = le64_to_cpu(rec->e_blkno);
2784                         break;
2785                 }
2786         }
2787
2788         if (!e_blkno) {
2789                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2790                             "record (%u, %u, 0) in xattr", inode->i_ino,
2791                             le32_to_cpu(rec->e_cpos),
2792                             ocfs2_rec_clusters(el, rec));
2793                 ret = -EROFS;
2794                 goto out;
2795         }
2796
2797         *p_blkno = le64_to_cpu(rec->e_blkno);
2798         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2799         if (e_cpos)
2800                 *e_cpos = le32_to_cpu(rec->e_cpos);
2801 out:
2802         brelse(eb_bh);
2803         return ret;
2804 }
2805
2806 typedef int (xattr_bucket_func)(struct inode *inode,
2807                                 struct ocfs2_xattr_bucket *bucket,
2808                                 void *para);
2809
2810 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2811                                    struct ocfs2_xattr_bucket *bucket,
2812                                    int name_index,
2813                                    const char *name,
2814                                    u32 name_hash,
2815                                    u16 *xe_index,
2816                                    int *found)
2817 {
2818         int i, ret = 0, cmp = 1, block_off, new_offset;
2819         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2820         size_t name_len = strlen(name);
2821         struct ocfs2_xattr_entry *xe = NULL;
2822         char *xe_name;
2823
2824         /*
2825          * We don't use binary search in the bucket because there
2826          * may be multiple entries with the same name hash.
2827          */
2828         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2829                 xe = &xh->xh_entries[i];
2830
2831                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2832                         continue;
2833                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2834                         break;
2835
2836                 cmp = name_index - ocfs2_xattr_get_type(xe);
2837                 if (!cmp)
2838                         cmp = name_len - xe->xe_name_len;
2839                 if (cmp)
2840                         continue;
2841
2842                 ret = ocfs2_xattr_bucket_get_name_value(inode,
2843                                                         xh,
2844                                                         i,
2845                                                         &block_off,
2846                                                         &new_offset);
2847                 if (ret) {
2848                         mlog_errno(ret);
2849                         break;
2850                 }
2851
2852
2853                 xe_name = bucket_block(bucket, block_off) + new_offset;
2854                 if (!memcmp(name, xe_name, name_len)) {
2855                         *xe_index = i;
2856                         *found = 1;
2857                         ret = 0;
2858                         break;
2859                 }
2860         }
2861
2862         return ret;
2863 }
2864
2865 /*
2866  * Find the specified xattr entry in a series of buckets.
2867  * This series start from p_blkno and last for num_clusters.
2868  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2869  * the num of the valid buckets.
2870  *
2871  * Return the buffer_head this xattr should reside in. And if the xattr's
2872  * hash is in the gap of 2 buckets, return the lower bucket.
2873  */
2874 static int ocfs2_xattr_bucket_find(struct inode *inode,
2875                                    int name_index,
2876                                    const char *name,
2877                                    u32 name_hash,
2878                                    u64 p_blkno,
2879                                    u32 first_hash,
2880                                    u32 num_clusters,
2881                                    struct ocfs2_xattr_search *xs)
2882 {
2883         int ret, found = 0;
2884         struct ocfs2_xattr_header *xh = NULL;
2885         struct ocfs2_xattr_entry *xe = NULL;
2886         u16 index = 0;
2887         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2888         int low_bucket = 0, bucket, high_bucket;
2889         struct ocfs2_xattr_bucket *search;
2890         u32 last_hash;
2891         u64 blkno, lower_blkno = 0;
2892
2893         search = ocfs2_xattr_bucket_new(inode);
2894         if (!search) {
2895                 ret = -ENOMEM;
2896                 mlog_errno(ret);
2897                 goto out;
2898         }
2899
2900         ret = ocfs2_read_xattr_bucket(search, p_blkno);
2901         if (ret) {
2902                 mlog_errno(ret);
2903                 goto out;
2904         }
2905
2906         xh = bucket_xh(search);
2907         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2908         while (low_bucket <= high_bucket) {
2909                 ocfs2_xattr_bucket_relse(search);
2910
2911                 bucket = (low_bucket + high_bucket) / 2;
2912                 blkno = p_blkno + bucket * blk_per_bucket;
2913                 ret = ocfs2_read_xattr_bucket(search, blkno);
2914                 if (ret) {
2915                         mlog_errno(ret);
2916                         goto out;
2917                 }
2918
2919                 xh = bucket_xh(search);
2920                 xe = &xh->xh_entries[0];
2921                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2922                         high_bucket = bucket - 1;
2923                         continue;
2924                 }
2925
2926                 /*
2927                  * Check whether the hash of the last entry in our
2928                  * bucket is larger than the search one. for an empty
2929                  * bucket, the last one is also the first one.
2930                  */
2931                 if (xh->xh_count)
2932                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2933
2934                 last_hash = le32_to_cpu(xe->xe_name_hash);
2935
2936                 /* record lower_blkno which may be the insert place. */
2937                 lower_blkno = blkno;
2938
2939                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2940                         low_bucket = bucket + 1;
2941                         continue;
2942                 }
2943
2944                 /* the searched xattr should reside in this bucket if exists. */
2945                 ret = ocfs2_find_xe_in_bucket(inode, search,
2946                                               name_index, name, name_hash,
2947                                               &index, &found);
2948                 if (ret) {
2949                         mlog_errno(ret);
2950                         goto out;
2951                 }
2952                 break;
2953         }
2954
2955         /*
2956          * Record the bucket we have found.
2957          * When the xattr's hash value is in the gap of 2 buckets, we will
2958          * always set it to the previous bucket.
2959          */
2960         if (!lower_blkno)
2961                 lower_blkno = p_blkno;
2962
2963         /* This should be in cache - we just read it during the search */
2964         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
2965         if (ret) {
2966                 mlog_errno(ret);
2967                 goto out;
2968         }
2969
2970         xs->header = bucket_xh(xs->bucket);
2971         xs->base = bucket_block(xs->bucket, 0);
2972         xs->end = xs->base + inode->i_sb->s_blocksize;
2973
2974         if (found) {
2975                 xs->here = &xs->header->xh_entries[index];
2976                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2977                      (unsigned long long)bucket_blkno(xs->bucket), index);
2978         } else
2979                 ret = -ENODATA;
2980
2981 out:
2982         ocfs2_xattr_bucket_free(search);
2983         return ret;
2984 }
2985
2986 static int ocfs2_xattr_index_block_find(struct inode *inode,
2987                                         struct buffer_head *root_bh,
2988                                         int name_index,
2989                                         const char *name,
2990                                         struct ocfs2_xattr_search *xs)
2991 {
2992         int ret;
2993         struct ocfs2_xattr_block *xb =
2994                         (struct ocfs2_xattr_block *)root_bh->b_data;
2995         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2996         struct ocfs2_extent_list *el = &xb_root->xt_list;
2997         u64 p_blkno = 0;
2998         u32 first_hash, num_clusters = 0;
2999         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3000
3001         if (le16_to_cpu(el->l_next_free_rec) == 0)
3002                 return -ENODATA;
3003
3004         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3005              name, name_hash, name_index);
3006
3007         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3008                                   &num_clusters, el);
3009         if (ret) {
3010                 mlog_errno(ret);
3011                 goto out;
3012         }
3013
3014         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3015
3016         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3017              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3018              first_hash);
3019
3020         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3021                                       p_blkno, first_hash, num_clusters, xs);
3022
3023 out:
3024         return ret;
3025 }
3026
3027 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3028                                        u64 blkno,
3029                                        u32 clusters,
3030                                        xattr_bucket_func *func,
3031                                        void *para)
3032 {
3033         int i, ret = 0;
3034         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3035         u32 num_buckets = clusters * bpc;
3036         struct ocfs2_xattr_bucket *bucket;
3037
3038         bucket = ocfs2_xattr_bucket_new(inode);
3039         if (!bucket) {
3040                 mlog_errno(-ENOMEM);
3041                 return -ENOMEM;
3042         }
3043
3044         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3045              clusters, (unsigned long long)blkno);
3046
3047         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3048                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3049                 if (ret) {
3050                         mlog_errno(ret);
3051                         break;
3052                 }
3053
3054                 /*
3055                  * The real bucket num in this series of blocks is stored
3056                  * in the 1st bucket.
3057                  */
3058                 if (i == 0)
3059                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3060
3061                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3062                      (unsigned long long)blkno,
3063                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3064                 if (func) {
3065                         ret = func(inode, bucket, para);
3066                         if (ret)
3067                                 mlog_errno(ret);
3068                         /* Fall through to bucket_relse() */
3069                 }
3070
3071                 ocfs2_xattr_bucket_relse(bucket);
3072                 if (ret)
3073                         break;
3074         }
3075
3076         ocfs2_xattr_bucket_free(bucket);
3077         return ret;
3078 }
3079
3080 struct ocfs2_xattr_tree_list {
3081         char *buffer;
3082         size_t buffer_size;
3083         size_t result;
3084 };
3085
3086 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
3087                                              struct ocfs2_xattr_header *xh,
3088                                              int index,
3089                                              int *block_off,
3090                                              int *new_offset)
3091 {
3092         u16 name_offset;
3093
3094         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3095                 return -EINVAL;
3096
3097         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3098
3099         *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
3100         *new_offset = name_offset % inode->i_sb->s_blocksize;
3101
3102         return 0;
3103 }
3104
3105 static int ocfs2_list_xattr_bucket(struct inode *inode,
3106                                    struct ocfs2_xattr_bucket *bucket,
3107                                    void *para)
3108 {
3109         int ret = 0, type;
3110         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3111         int i, block_off, new_offset;
3112         const char *prefix, *name;
3113
3114         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3115                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3116                 type = ocfs2_xattr_get_type(entry);
3117                 prefix = ocfs2_xattr_prefix(type);
3118
3119                 if (prefix) {
3120                         ret = ocfs2_xattr_bucket_get_name_value(inode,
3121                                                                 bucket_xh(bucket),
3122                                                                 i,
3123                                                                 &block_off,
3124                                                                 &new_offset);
3125                         if (ret)
3126                                 break;
3127
3128                         name = (const char *)bucket_block(bucket, block_off) +
3129                                 new_offset;
3130                         ret = ocfs2_xattr_list_entry(xl->buffer,
3131                                                      xl->buffer_size,
3132                                                      &xl->result,
3133                                                      prefix, name,
3134                                                      entry->xe_name_len);
3135                         if (ret)
3136                                 break;
3137                 }
3138         }
3139
3140         return ret;
3141 }
3142
3143 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3144                                              struct ocfs2_xattr_tree_root *xt,
3145                                              char *buffer,
3146                                              size_t buffer_size)
3147 {
3148         struct ocfs2_extent_list *el = &xt->xt_list;
3149         int ret = 0;
3150         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3151         u64 p_blkno = 0;
3152         struct ocfs2_xattr_tree_list xl = {
3153                 .buffer = buffer,
3154                 .buffer_size = buffer_size,
3155                 .result = 0,
3156         };
3157
3158         if (le16_to_cpu(el->l_next_free_rec) == 0)
3159                 return 0;
3160
3161         while (name_hash > 0) {
3162                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3163                                           &e_cpos, &num_clusters, el);
3164                 if (ret) {
3165                         mlog_errno(ret);
3166                         goto out;
3167                 }
3168
3169                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
3170                                                   ocfs2_list_xattr_bucket,
3171                                                   &xl);
3172                 if (ret) {
3173                         mlog_errno(ret);
3174                         goto out;
3175                 }
3176
3177                 if (e_cpos == 0)
3178                         break;
3179
3180                 name_hash = e_cpos - 1;
3181         }
3182
3183         ret = xl.result;
3184 out:
3185         return ret;
3186 }
3187
3188 static int cmp_xe(const void *a, const void *b)
3189 {
3190         const struct ocfs2_xattr_entry *l = a, *r = b;
3191         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3192         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3193
3194         if (l_hash > r_hash)
3195                 return 1;
3196         if (l_hash < r_hash)
3197                 return -1;
3198         return 0;
3199 }
3200
3201 static void swap_xe(void *a, void *b, int size)
3202 {
3203         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3204
3205         tmp = *l;
3206         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3207         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3208 }
3209
3210 /*
3211  * When the ocfs2_xattr_block is filled up, new bucket will be created
3212  * and all the xattr entries will be moved to the new bucket.
3213  * The header goes at the start of the bucket, and the names+values are
3214  * filled from the end.  This is why *target starts as the last buffer.
3215  * Note: we need to sort the entries since they are not saved in order
3216  * in the ocfs2_xattr_block.
3217  */
3218 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3219                                            struct buffer_head *xb_bh,
3220                                            struct ocfs2_xattr_bucket *bucket)
3221 {
3222         int i, blocksize = inode->i_sb->s_blocksize;
3223         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3224         u16 offset, size, off_change;
3225         struct ocfs2_xattr_entry *xe;
3226         struct ocfs2_xattr_block *xb =
3227                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3228         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3229         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3230         u16 count = le16_to_cpu(xb_xh->xh_count);
3231         char *src = xb_bh->b_data;
3232         char *target = bucket_block(bucket, blks - 1);
3233
3234         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3235              (unsigned long long)xb_bh->b_blocknr,
3236              (unsigned long long)bucket_blkno(bucket));
3237
3238         for (i = 0; i < blks; i++)
3239                 memset(bucket_block(bucket, i), 0, blocksize);
3240
3241         /*
3242          * Since the xe_name_offset is based on ocfs2_xattr_header,
3243          * there is a offset change corresponding to the change of
3244          * ocfs2_xattr_header's position.
3245          */
3246         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3247         xe = &xb_xh->xh_entries[count - 1];
3248         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3249         size = blocksize - offset;
3250
3251         /* copy all the names and values. */
3252         memcpy(target + offset, src + offset, size);
3253
3254         /* Init new header now. */
3255         xh->xh_count = xb_xh->xh_count;
3256         xh->xh_num_buckets = cpu_to_le16(1);
3257         xh->xh_name_value_len = cpu_to_le16(size);
3258         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3259
3260         /* copy all the entries. */
3261         target = bucket_block(bucket, 0);
3262         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3263         size = count * sizeof(struct ocfs2_xattr_entry);
3264         memcpy(target + offset, (char *)xb_xh + offset, size);
3265
3266         /* Change the xe offset for all the xe because of the move. */
3267         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3268                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3269         for (i = 0; i < count; i++)
3270                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3271
3272         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3273              offset, size, off_change);
3274
3275         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3276              cmp_xe, swap_xe);
3277 }
3278
3279 /*
3280  * After we move xattr from block to index btree, we have to
3281  * update ocfs2_xattr_search to the new xe and base.
3282  *
3283  * When the entry is in xattr block, xattr_bh indicates the storage place.
3284  * While if the entry is in index b-tree, "bucket" indicates the
3285  * real place of the xattr.
3286  */
3287 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3288                                             struct ocfs2_xattr_search *xs,
3289                                             struct buffer_head *old_bh)
3290 {
3291         char *buf = old_bh->b_data;
3292         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3293         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3294         int i;
3295
3296         xs->header = bucket_xh(xs->bucket);
3297         xs->base = bucket_block(xs->bucket, 0);
3298         xs->end = xs->base + inode->i_sb->s_blocksize;
3299
3300         if (xs->not_found)
3301                 return;
3302
3303         i = xs->here - old_xh->xh_entries;
3304         xs->here = &xs->header->xh_entries[i];
3305 }
3306
3307 static int ocfs2_xattr_create_index_block(struct inode *inode,
3308                                           struct ocfs2_xattr_search *xs,
3309                                           struct ocfs2_xattr_set_ctxt *ctxt)
3310 {
3311         int ret;
3312         u32 bit_off, len;
3313         u64 blkno;
3314         handle_t *handle = ctxt->handle;
3315         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3316         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3317         struct buffer_head *xb_bh = xs->xattr_bh;
3318         struct ocfs2_xattr_block *xb =
3319                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3320         struct ocfs2_xattr_tree_root *xr;
3321         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3322
3323         mlog(0, "create xattr index block for %llu\n",
3324              (unsigned long long)xb_bh->b_blocknr);
3325
3326         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3327         BUG_ON(!xs->bucket);
3328
3329         /*
3330          * XXX:
3331          * We can use this lock for now, and maybe move to a dedicated mutex
3332          * if performance becomes a problem later.
3333          */
3334         down_write(&oi->ip_alloc_sem);
3335
3336         ret = ocfs2_journal_access(handle, inode, xb_bh,
3337                                    OCFS2_JOURNAL_ACCESS_WRITE);
3338         if (ret) {
3339                 mlog_errno(ret);
3340                 goto out;
3341         }
3342
3343         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3344                                      1, 1, &bit_off, &len);
3345         if (ret) {
3346                 mlog_errno(ret);
3347                 goto out;
3348         }
3349
3350         /*
3351          * The bucket may spread in many blocks, and
3352          * we will only touch the 1st block and the last block
3353          * in the whole bucket(one for entry and one for data).
3354          */
3355         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3356
3357         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3358              (unsigned long long)blkno);
3359
3360         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3361         if (ret) {
3362                 mlog_errno(ret);
3363                 goto out;
3364         }
3365
3366         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3367                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3368         if (ret) {
3369                 mlog_errno(ret);
3370                 goto out;
3371         }
3372
3373         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3374         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3375
3376         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3377
3378         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3379         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3380                offsetof(struct ocfs2_xattr_block, xb_attrs));
3381
3382         xr = &xb->xb_attrs.xb_root;
3383         xr->xt_clusters = cpu_to_le32(1);
3384         xr->xt_last_eb_blk = 0;
3385         xr->xt_list.l_tree_depth = 0;
3386         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3387         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3388
3389         xr->xt_list.l_recs[0].e_cpos = 0;
3390         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3391         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3392
3393         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3394
3395         ocfs2_journal_dirty(handle, xb_bh);
3396
3397 out:
3398         up_write(&oi->ip_alloc_sem);
3399
3400         return ret;
3401 }
3402
3403 static int cmp_xe_offset(const void *a, const void *b)
3404 {
3405         const struct ocfs2_xattr_entry *l = a, *r = b;
3406         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3407         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3408
3409         if (l_name_offset < r_name_offset)
3410                 return 1;
3411         if (l_name_offset > r_name_offset)
3412                 return -1;
3413         return 0;
3414 }
3415
3416 /*
3417  * defrag a xattr bucket if we find that the bucket has some
3418  * holes beteen name/value pairs.
3419  * We will move all the name/value pairs to the end of the bucket
3420  * so that we can spare some space for insertion.
3421  */
3422 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3423                                      handle_t *handle,
3424                                      struct ocfs2_xattr_bucket *bucket)
3425 {
3426         int ret, i;
3427         size_t end, offset, len, value_len;
3428         struct ocfs2_xattr_header *xh;
3429         char *entries, *buf, *bucket_buf = NULL;
3430         u64 blkno = bucket_blkno(bucket);
3431         u16 xh_free_start;
3432         size_t blocksize = inode->i_sb->s_blocksize;
3433         struct ocfs2_xattr_entry *xe;
3434
3435         /*
3436          * In order to make the operation more efficient and generic,
3437          * we copy all the blocks into a contiguous memory and do the
3438          * defragment there, so if anything is error, we will not touch
3439          * the real block.
3440          */
3441         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3442         if (!bucket_buf) {
3443                 ret = -EIO;
3444                 goto out;
3445         }
3446
3447         buf = bucket_buf;
3448         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3449                 memcpy(buf, bucket_block(bucket, i), blocksize);
3450
3451         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3452                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3453         if (ret < 0) {
3454                 mlog_errno(ret);
3455                 goto out;
3456         }
3457
3458         xh = (struct ocfs2_xattr_header *)bucket_buf;
3459         entries = (char *)xh->xh_entries;
3460         xh_free_start = le16_to_cpu(xh->xh_free_start);
3461
3462         mlog(0, "adjust xattr bucket in %llu, count = %u, "
3463              "xh_free_start = %u, xh_name_value_len = %u.\n",
3464              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3465              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3466
3467         /*
3468          * sort all the entries by their offset.
3469          * the largest will be the first, so that we can
3470          * move them to the end one by one.
3471          */
3472         sort(entries, le16_to_cpu(xh->xh_count),
3473              sizeof(struct ocfs2_xattr_entry),
3474              cmp_xe_offset, swap_xe);
3475
3476         /* Move all name/values to the end of the bucket. */
3477         xe = xh->xh_entries;
3478         end = OCFS2_XATTR_BUCKET_SIZE;
3479         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3480                 offset = le16_to_cpu(xe->xe_name_offset);
3481                 if (ocfs2_xattr_is_local(xe))
3482                         value_len = OCFS2_XATTR_SIZE(
3483                                         le64_to_cpu(xe->xe_value_size));
3484                 else
3485                         value_len = OCFS2_XATTR_ROOT_SIZE;
3486                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3487
3488                 /*
3489                  * We must make sure that the name/value pair
3490                  * exist in the same block. So adjust end to
3491                  * the previous block end if needed.
3492                  */
3493                 if (((end - len) / blocksize !=
3494                         (end - 1) / blocksize))
3495                         end = end - end % blocksize;
3496
3497                 if (end > offset + len) {
3498                         memmove(bucket_buf + end - len,
3499                                 bucket_buf + offset, len);
3500                         xe->xe_name_offset = cpu_to_le16(end - len);
3501                 }
3502
3503                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3504                                 "bucket %llu\n", (unsigned long long)blkno);
3505
3506                 end -= len;
3507         }
3508
3509         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3510                         "bucket %llu\n", (unsigned long long)blkno);
3511
3512         if (xh_free_start == end)
3513                 goto out;
3514
3515         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3516         xh->xh_free_start = cpu_to_le16(end);
3517
3518         /* sort the entries by their name_hash. */
3519         sort(entries, le16_to_cpu(xh->xh_count),
3520              sizeof(struct ocfs2_xattr_entry),
3521              cmp_xe, swap_xe);
3522
3523         buf = bucket_buf;
3524         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3525                 memcpy(bucket_block(bucket, i), buf, blocksize);
3526         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3527
3528 out:
3529         kfree(bucket_buf);
3530         return ret;
3531 }
3532
3533 /*
3534  * prev_blkno points to the start of an existing extent.  new_blkno
3535  * points to a newly allocated extent.  Because we know each of our
3536  * clusters contains more than bucket, we can easily split one cluster
3537  * at a bucket boundary.  So we take the last cluster of the existing
3538  * extent and split it down the middle.  We move the last half of the
3539  * buckets in the last cluster of the existing extent over to the new
3540  * extent.
3541  *
3542  * first_bh is the buffer at prev_blkno so we can update the existing
3543  * extent's bucket count.  header_bh is the bucket were we were hoping
3544  * to insert our xattr.  If the bucket move places the target in the new
3545  * extent, we'll update first_bh and header_bh after modifying the old
3546  * extent.
3547  *
3548  * first_hash will be set as the 1st xe's name_hash in the new extent.
3549  */
3550 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3551                                                handle_t *handle,
3552                                                struct buffer_head **first_bh,
3553                                                struct buffer_head **header_bh,
3554                                                u64 new_blkno,
3555                                                u64 prev_blkno,
3556                                                u32 num_clusters,
3557                                                u32 *first_hash)
3558 {
3559         int i, ret, credits;
3560         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3561         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3562         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3563         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3564         int to_move = num_buckets / 2;
3565         u64 last_cluster_blkno, src_blkno;
3566         struct ocfs2_xattr_header *xh =
3567                         (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3568         struct ocfs2_xattr_bucket *old_first, *new_first;
3569
3570         BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3571         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3572
3573         last_cluster_blkno = prev_blkno + ((num_clusters - 1) * bpc);
3574         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3575
3576         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3577              (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
3578
3579         /* The first bucket of the original extent */
3580         old_first = ocfs2_xattr_bucket_new(inode);
3581         /* The first bucket of the new extent */
3582         new_first = ocfs2_xattr_bucket_new(inode);
3583         if (!old_first || !new_first) {
3584                 ret = -ENOMEM;
3585                 mlog_errno(ret);
3586                 goto out;
3587         }
3588
3589         ret = ocfs2_read_xattr_bucket(old_first, prev_blkno);
3590         if (ret) {
3591                 mlog_errno(ret);
3592                 goto out;
3593         }
3594
3595         /*
3596          * We need to update the 1st half of the new extent, and we
3597          * need to update the first bucket of the old extent.
3598          */
3599         credits = ((to_move + 1) * blks_per_bucket) + handle->h_buffer_credits;
3600         ret = ocfs2_extend_trans(handle, credits);
3601         if (ret) {
3602                 mlog_errno(ret);
3603                 goto out;
3604         }
3605
3606         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
3607                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3608         if (ret) {
3609                 mlog_errno(ret);
3610                 goto out;
3611         }
3612
3613         for (i = 0; i < to_move; i++) {
3614                 ret = ocfs2_cp_xattr_bucket(inode, handle,
3615                                             src_blkno + (i * blks_per_bucket),
3616                                             new_blkno + (i * blks_per_bucket),
3617                                             1);
3618                 if (ret) {
3619                         mlog_errno(ret);
3620                         goto out;
3621                 }
3622         }
3623
3624         /*
3625          * Get the new bucket ready before we dirty anything
3626          * (This actually shouldn't fail, because we already dirtied
3627          * it once in ocfs2_cp_xattr_bucket()).
3628          */
3629         ret = ocfs2_read_xattr_bucket(new_first, new_blkno);
3630         if (ret) {
3631                 mlog_errno(ret);
3632                 goto out;
3633         }
3634         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
3635                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3636         if (ret) {
3637                 mlog_errno(ret);
3638                 goto out;
3639         }
3640
3641         /* Now update the headers */
3642         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -to_move);
3643         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
3644
3645         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(to_move);
3646         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
3647
3648         if (first_hash)
3649                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
3650
3651         /*
3652          * If the target bucket is anywhere past src_blkno, we moved
3653          * it to the new extent.  We need to update first_bh and header_bh.
3654          */
3655         if ((*header_bh)->b_blocknr >= src_blkno) {
3656                 /* We're done with old_first, so we can re-use it. */
3657                 ocfs2_xattr_bucket_relse(old_first);
3658
3659                 /* Find the block for the new target bucket */
3660                 src_blkno = new_blkno +
3661                         ((*header_bh)->b_blocknr - src_blkno);
3662
3663                 /*
3664                  * This shouldn't fail - the buffers are in the
3665                  * journal from ocfs2_cp_xattr_bucket().
3666                  */
3667                 ret = ocfs2_read_xattr_bucket(old_first, src_blkno);
3668                 if (ret) {
3669                         mlog_errno(ret);
3670                         goto out;
3671                 }
3672
3673                 brelse(*first_bh);
3674                 *first_bh = new_first->bu_bhs[0];
3675                 get_bh(*first_bh);
3676
3677                 brelse(*header_bh);
3678                 *header_bh = old_first->bu_bhs[0];
3679                 get_bh(*header_bh);
3680         }
3681
3682 out:
3683         ocfs2_xattr_bucket_free(new_first);
3684         ocfs2_xattr_bucket_free(old_first);
3685
3686         return ret;
3687 }
3688
3689 /*
3690  * Find the suitable pos when we divide a bucket into 2.
3691  * We have to make sure the xattrs with the same hash value exist
3692  * in the same bucket.
3693  *
3694  * If this ocfs2_xattr_header covers more than one hash value, find a
3695  * place where the hash value changes.  Try to find the most even split.
3696  * The most common case is that all entries have different hash values,
3697  * and the first check we make will find a place to split.
3698  */
3699 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3700 {
3701         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3702         int count = le16_to_cpu(xh->xh_count);
3703         int delta, middle = count / 2;
3704
3705         /*
3706          * We start at the middle.  Each step gets farther away in both
3707          * directions.  We therefore hit the change in hash value
3708          * nearest to the middle.  Note that this loop does not execute for
3709          * count < 2.
3710          */
3711         for (delta = 0; delta < middle; delta++) {
3712                 /* Let's check delta earlier than middle */
3713                 if (cmp_xe(&entries[middle - delta - 1],
3714                            &entries[middle - delta]))
3715                         return middle - delta;
3716
3717                 /* For even counts, don't walk off the end */
3718                 if ((middle + delta + 1) == count)
3719                         continue;
3720
3721                 /* Now try delta past middle */
3722                 if (cmp_xe(&entries[middle + delta],
3723                            &entries[middle + delta + 1]))
3724                         return middle + delta + 1;
3725         }
3726
3727         /* Every entry had the same hash */
3728         return count;
3729 }
3730
3731 /*
3732  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3733  * first_hash will record the 1st hash of the new bucket.
3734  *
3735  * Normally half of the xattrs will be moved.  But we have to make
3736  * sure that the xattrs with the same hash value are stored in the
3737  * same bucket. If all the xattrs in this bucket have the same hash
3738  * value, the new bucket will be initialized as an empty one and the
3739  * first_hash will be initialized as (hash_value+1).
3740  */
3741 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3742                                     handle_t *handle,
3743                                     u64 blk,
3744                                     u64 new_blk,
3745                                     u32 *first_hash,
3746                                     int new_bucket_head)
3747 {
3748         int ret, i;
3749         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3750         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3751         struct ocfs2_xattr_header *xh;
3752         struct ocfs2_xattr_entry *xe;
3753         int blocksize = inode->i_sb->s_blocksize;
3754
3755         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3756              (unsigned long long)blk, (unsigned long long)new_blk);
3757
3758         s_bucket = ocfs2_xattr_bucket_new(inode);
3759         t_bucket = ocfs2_xattr_bucket_new(inode);
3760         if (!s_bucket || !t_bucket) {
3761                 ret = -ENOMEM;
3762                 mlog_errno(ret);
3763                 goto out;
3764         }
3765
3766         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3767         if (ret) {
3768                 mlog_errno(ret);
3769                 goto out;
3770         }
3771
3772         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3773                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3774         if (ret) {
3775                 mlog_errno(ret);
3776                 goto out;
3777         }
3778
3779         /*
3780          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3781          * there's no need to read it.
3782          */
3783         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3784         if (ret) {
3785                 mlog_errno(ret);
3786                 goto out;
3787         }
3788
3789         /*
3790          * Hey, if we're overwriting t_bucket, what difference does
3791          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
3792          * same part of ocfs2_cp_xattr_bucket().
3793          */
3794         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3795                                                 new_bucket_head ?
3796                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3797                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3798         if (ret) {
3799                 mlog_errno(ret);
3800                 goto out;
3801         }
3802
3803         xh = bucket_xh(s_bucket);
3804         count = le16_to_cpu(xh->xh_count);
3805         start = ocfs2_xattr_find_divide_pos(xh);
3806
3807         if (start == count) {
3808                 xe = &xh->xh_entries[start-1];
3809
3810                 /*
3811                  * initialized a new empty bucket here.
3812                  * The hash value is set as one larger than
3813                  * that of the last entry in the previous bucket.
3814                  */
3815                 for (i = 0; i < t_bucket->bu_blocks; i++)
3816                         memset(bucket_block(t_bucket, i), 0, blocksize);
3817
3818                 xh = bucket_xh(t_bucket);
3819                 xh->xh_free_start = cpu_to_le16(blocksize);
3820                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3821                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3822
3823                 goto set_num_buckets;
3824         }
3825
3826         /* copy the whole bucket to the new first. */
3827         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3828
3829         /* update the new bucket. */
3830         xh = bucket_xh(t_bucket);
3831
3832         /*
3833          * Calculate the total name/value len and xh_free_start for
3834          * the old bucket first.
3835          */
3836         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3837         name_value_len = 0;
3838         for (i = 0; i < start; i++) {
3839                 xe = &xh->xh_entries[i];
3840                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3841                 if (ocfs2_xattr_is_local(xe))
3842                         xe_len +=
3843                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3844                 else
3845                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3846                 name_value_len += xe_len;
3847                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3848                         name_offset = le16_to_cpu(xe->xe_name_offset);
3849         }
3850
3851         /*
3852          * Now begin the modification to the new bucket.
3853          *
3854          * In the new bucket, We just move the xattr entry to the beginning
3855          * and don't touch the name/value. So there will be some holes in the
3856          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3857          * called.
3858          */
3859         xe = &xh->xh_entries[start];
3860         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3861         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3862              (int)((char *)xe - (char *)xh),
3863              (int)((char *)xh->xh_entries - (char *)xh));
3864         memmove((char *)xh->xh_entries, (char *)xe, len);
3865         xe = &xh->xh_entries[count - start];
3866         len = sizeof(struct ocfs2_xattr_entry) * start;
3867         memset((char *)xe, 0, len);
3868
3869         le16_add_cpu(&xh->xh_count, -start);
3870         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3871
3872         /* Calculate xh_free_start for the new bucket. */
3873         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3874         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3875                 xe = &xh->xh_entries[i];
3876                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3877                 if (ocfs2_xattr_is_local(xe))
3878                         xe_len +=
3879                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3880                 else
3881                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3882                 if (le16_to_cpu(xe->xe_name_offset) <
3883                     le16_to_cpu(xh->xh_free_start))
3884                         xh->xh_free_start = xe->xe_name_offset;
3885         }
3886
3887 set_num_buckets:
3888         /* set xh->xh_num_buckets for the new xh. */
3889         if (new_bucket_head)
3890                 xh->xh_num_buckets = cpu_to_le16(1);
3891         else
3892                 xh->xh_num_buckets = 0;
3893
3894         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3895
3896         /* store the first_hash of the new bucket. */
3897         if (first_hash)
3898                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3899
3900         /*
3901          * Now only update the 1st block of the old bucket.  If we
3902          * just added a new empty bucket, there is no need to modify
3903          * it.
3904          */
3905         if (start == count)
3906                 goto out;
3907
3908         xh = bucket_xh(s_bucket);
3909         memset(&xh->xh_entries[start], 0,
3910                sizeof(struct ocfs2_xattr_entry) * (count - start));
3911         xh->xh_count = cpu_to_le16(start);
3912         xh->xh_free_start = cpu_to_le16(name_offset);
3913         xh->xh_name_value_len = cpu_to_le16(name_value_len);
3914
3915         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3916
3917 out:
3918         ocfs2_xattr_bucket_free(s_bucket);
3919         ocfs2_xattr_bucket_free(t_bucket);
3920
3921         return ret;
3922 }
3923
3924 /*
3925  * Copy xattr from one bucket to another bucket.
3926  *
3927  * The caller must make sure that the journal transaction
3928  * has enough space for journaling.
3929  */
3930 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3931                                  handle_t *handle,
3932                                  u64 s_blkno,
3933                                  u64 t_blkno,
3934                                  int t_is_new)
3935 {
3936         int ret;
3937         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3938
3939         BUG_ON(s_blkno == t_blkno);
3940
3941         mlog(0, "cp bucket %llu to %llu, target is %d\n",
3942              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3943              t_is_new);
3944
3945         s_bucket = ocfs2_xattr_bucket_new(inode);
3946         t_bucket = ocfs2_xattr_bucket_new(inode);
3947         if (!s_bucket || !t_bucket) {
3948                 ret = -ENOMEM;
3949                 mlog_errno(ret);
3950                 goto out;
3951         }
3952
3953         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3954         if (ret)
3955                 goto out;
3956
3957         /*
3958          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
3959          * there's no need to read it.
3960          */
3961         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3962         if (ret)
3963                 goto out;
3964
3965         /*
3966          * Hey, if we're overwriting t_bucket, what difference does
3967          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
3968          * cluster to fill, we came here from
3969          * ocfs2_mv_xattr_buckets(), and it is really new -
3970          * ACCESS_CREATE is required.  But we also might have moved data
3971          * out of t_bucket before extending back into it.
3972          * ocfs2_add_new_xattr_bucket() can do this - its call to
3973          * ocfs2_add_new_xattr_cluster() may have created a new extent
3974          * and copied out the end of the old extent.  Then it re-extends
3975          * the old extent back to create space for new xattrs.  That's
3976          * how we get here, and the bucket isn't really new.
3977          */
3978         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3979                                                 t_is_new ?
3980                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3981                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3982         if (ret)
3983                 goto out;
3984
3985         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3986         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3987
3988 out:
3989         ocfs2_xattr_bucket_free(t_bucket);
3990         ocfs2_xattr_bucket_free(s_bucket);
3991
3992         return ret;
3993 }
3994
3995 /*
3996  * src_blk points to the start of an existing extent.  last_blk points to
3997  * last cluster in that extent.  to_blk points to a newly allocated
3998  * extent.  We copy the buckets from cluster at last_blk to the new extent,
3999  * initializing its xh_num_buckets.  The old extent's xh_num_buckets
4000  * shrinks by the same amount.
4001  */
4002 static int ocfs2_mv_xattr_buckets(struct inode *inode,
4003                                   handle_t *handle,
4004                                   u64 src_blk, u64 last_blk,
4005                                   u64 to_blk, u32 *first_hash)
4006 {
4007         int i, ret, credits;
4008         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4009         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4010         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4011         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4012         struct ocfs2_xattr_bucket *old_first, *new_first;
4013
4014         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4015              (unsigned long long)last_blk, (unsigned long long)to_blk);
4016
4017         /* The first bucket of the original extent */
4018         old_first = ocfs2_xattr_bucket_new(inode);
4019         /* The first bucket of the new extent */
4020         new_first = ocfs2_xattr_bucket_new(inode);
4021         if (!old_first || !new_first) {
4022                 ret = -ENOMEM;
4023                 mlog_errno(ret);
4024                 goto out;
4025         }
4026
4027         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4028         if (ret) {
4029                 mlog_errno(ret);
4030                 goto out;
4031         }
4032
4033         /*
4034          * We need to update the first bucket of the old extent and the
4035          * entire first cluster of the new extent.
4036          */
4037         credits = blks_per_bucket + bpc + handle->h_buffer_credits;
4038         ret = ocfs2_extend_trans(handle, credits);
4039         if (ret) {
4040                 mlog_errno(ret);
4041                 goto out;
4042         }
4043
4044         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4045                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4046         if (ret) {
4047                 mlog_errno(ret);
4048                 goto out;
4049         }
4050
4051         for (i = 0; i < num_buckets; i++) {
4052                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4053                                             last_blk + (i * blks_per_bucket),
4054                                             to_blk + (i * blks_per_bucket),
4055                                             1);
4056                 if (ret) {
4057                         mlog_errno(ret);
4058                         goto out;
4059                 }
4060         }
4061
4062         /*
4063          * Get the new bucket ready before we dirty anything
4064          * (This actually shouldn't fail, because we already dirtied
4065          * it once in ocfs2_cp_xattr_bucket()).
4066          */
4067         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4068         if (ret) {
4069                 mlog_errno(ret);
4070                 goto out;
4071         }
4072         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4073                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4074         if (ret) {
4075                 mlog_errno(ret);
4076                 goto out;
4077         }
4078
4079         /* Now update the headers */
4080         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4081         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4082
4083         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4084         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4085
4086         if (first_hash)
4087                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4088
4089 out:
4090         ocfs2_xattr_bucket_free(new_first);
4091         ocfs2_xattr_bucket_free(old_first);
4092         return ret;
4093 }
4094
4095 /*
4096  * Move some xattrs in this cluster to the new cluster.
4097  * This function should only be called when bucket size == cluster size.
4098  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4099  */
4100 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4101                                       handle_t *handle,
4102                                       u64 prev_blk,
4103                                       u64 new_blk,
4104                                       u32 *first_hash)
4105 {
4106         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4107         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4108
4109         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4110
4111         ret = ocfs2_extend_trans(handle, credits);
4112         if (ret) {
4113                 mlog_errno(ret);
4114                 return ret;
4115         }
4116
4117         /* Move half of the xattr in start_blk to the next bucket. */
4118         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4119                                           new_blk, first_hash, 1);
4120 }
4121
4122 /*
4123  * Move some xattrs from the old cluster to the new one since they are not
4124  * contiguous in ocfs2 xattr tree.
4125  *
4126  * new_blk starts a new separate cluster, and we will move some xattrs from
4127  * prev_blk to it. v_start will be set as the first name hash value in this
4128  * new cluster so that it can be used as e_cpos during tree insertion and
4129  * don't collide with our original b-tree operations. first_bh and header_bh
4130  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4131  * to extend the insert bucket.
4132  *
4133  * The problem is how much xattr should we move to the new one and when should
4134  * we update first_bh and header_bh?
4135  * 1. If cluster size > bucket size, that means the previous cluster has more
4136  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4137  *    update the first_bh and header_bh if the insert bucket has been moved
4138  *    to the new cluster.
4139  * 2. If cluster_size == bucket_size:
4140  *    a) If the previous extent rec has more than one cluster and the insert
4141  *       place isn't in the last cluster, copy the entire last cluster to the
4142  *       new one. This time, we don't need to upate the first_bh and header_bh
4143  *       since they will not be moved into the new cluster.
4144  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4145  *       the new one. And we set the extend flag to zero if the insert place is
4146  *       moved into the new allocated cluster since no extend is needed.
4147  */
4148 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4149                                             handle_t *handle,
4150                                             struct buffer_head **first_bh,
4151                                             struct buffer_head **header_bh,
4152                                             u64 new_blk,
4153                                             u64 prev_blk,
4154                                             u32 prev_clusters,
4155                                             u32 *v_start,
4156                                             int *extend)
4157 {
4158         int ret = 0;
4159         int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4160
4161         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4162              (unsigned long long)prev_blk, prev_clusters,
4163              (unsigned long long)new_blk);
4164
4165         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
4166                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4167                                                           handle,
4168                                                           first_bh,
4169                                                           header_bh,
4170                                                           new_blk,
4171                                                           prev_blk,
4172                                                           prev_clusters,
4173                                                           v_start);
4174         else {
4175                 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
4176
4177                 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
4178                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4179                                                      (*first_bh)->b_blocknr,
4180                                                      last_blk,
4181                                                      new_blk,
4182                                                      v_start);
4183                 else {
4184                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4185                                                          last_blk, new_blk,
4186                                                          v_start);
4187
4188                         if ((*header_bh)->b_blocknr == last_blk && extend)
4189                                 *extend = 0;
4190                 }
4191         }
4192
4193         return ret;
4194 }
4195
4196 /*
4197  * Add a new cluster for xattr storage.
4198  *
4199  * If the new cluster is contiguous with the previous one, it will be
4200  * appended to the same extent record, and num_clusters will be updated.
4201  * If not, we will insert a new extent for it and move some xattrs in
4202  * the last cluster into the new allocated one.
4203  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4204  * lose the benefits of hashing because we'll have to search large leaves.
4205  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4206  * if it's bigger).
4207  *
4208  * first_bh is the first block of the previous extent rec and header_bh
4209  * indicates the bucket we will insert the new xattrs. They will be updated
4210  * when the header_bh is moved into the new cluster.
4211  */
4212 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4213                                        struct buffer_head *root_bh,
4214                                        struct buffer_head **first_bh,
4215                                        struct buffer_head **header_bh,
4216                                        u32 *num_clusters,
4217                                        u32 prev_cpos,
4218                                        u64 prev_blkno,
4219                                        int *extend,
4220                                        struct ocfs2_xattr_set_ctxt *ctxt)
4221 {
4222         int ret;
4223         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4224         u32 prev_clusters = *num_clusters;
4225         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4226         u64 block;
4227         handle_t *handle = ctxt->handle;
4228         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4229         struct ocfs2_extent_tree et;
4230
4231         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4232              "previous xattr blkno = %llu\n",
4233              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4234              prev_cpos, (unsigned long long)prev_blkno);
4235
4236         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4237
4238         ret = ocfs2_journal_access(handle, inode, root_bh,
4239                                    OCFS2_JOURNAL_ACCESS_WRITE);
4240         if (ret < 0) {
4241                 mlog_errno(ret);
4242                 goto leave;
4243         }
4244
4245         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4246                                      clusters_to_add, &bit_off, &num_bits);
4247         if (ret < 0) {
4248                 if (ret != -ENOSPC)
4249                         mlog_errno(ret);
4250                 goto leave;
4251         }
4252
4253         BUG_ON(num_bits > clusters_to_add);
4254
4255         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4256         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4257              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4258
4259         if (prev_blkno + prev_clusters * bpc == block &&
4260             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4261              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4262                 /*
4263                  * If this cluster is contiguous with the old one and
4264                  * adding this new cluster, we don't surpass the limit of
4265                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4266                  * initialized and used like other buckets in the previous
4267                  * cluster.
4268                  * So add it as a contiguous one. The caller will handle
4269                  * its init process.
4270                  */
4271                 v_start = prev_cpos + prev_clusters;
4272                 *num_clusters = prev_clusters + num_bits;
4273                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4274                      num_bits);
4275         } else {
4276                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4277                                                        handle,
4278                                                        first_bh,
4279                                                        header_bh,
4280                                                        block,
4281                                                        prev_blkno,
4282                                                        prev_clusters,
4283                                                        &v_start,
4284                                                        extend);
4285                 if (ret) {
4286                         mlog_errno(ret);
4287                         goto leave;
4288                 }
4289         }
4290
4291         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4292              num_bits, (unsigned long long)block, v_start);
4293         ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
4294                                   num_bits, 0, ctxt->meta_ac);
4295         if (ret < 0) {
4296                 mlog_errno(ret);
4297                 goto leave;
4298         }
4299
4300         ret = ocfs2_journal_dirty(handle, root_bh);
4301         if (ret < 0)
4302                 mlog_errno(ret);
4303
4304 leave:
4305         return ret;
4306 }
4307
4308 /*
4309  * We are given an extent.  'first' is the bucket at the very front of
4310  * the extent.  The extent has space for an additional bucket past
4311  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
4312  * of the target bucket.  We wish to shift every bucket past the target
4313  * down one, filling in that additional space.  When we get back to the
4314  * target, we split the target between itself and the now-empty bucket
4315  * at target+1 (aka, target_blkno + blks_per_bucket).
4316  */
4317 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4318                                      handle_t *handle,
4319                                      struct ocfs2_xattr_bucket *first,
4320                                      u64 target_blk,
4321                                      u32 num_clusters)
4322 {
4323         int ret, credits;
4324         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4325         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4326         u64 end_blk;
4327         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4328
4329         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4330              "from %llu, len = %u\n", (unsigned long long)target_blk,
4331              (unsigned long long)bucket_blkno(first), num_clusters);
4332
4333         /* The extent must have room for an additional bucket */
4334         BUG_ON(new_bucket >=
4335                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4336
4337         /* end_blk points to the last existing bucket */
4338         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4339
4340         /*
4341          * end_blk is the start of the last existing bucket.
4342          * Thus, (end_blk - target_blk) covers the target bucket and
4343          * every bucket after it up to, but not including, the last
4344          * existing bucket.  Then we add the last existing bucket, the
4345          * new bucket, and the first bucket (3 * blk_per_bucket).
4346          */
4347         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4348                   handle->h_buffer_credits;
4349         ret = ocfs2_extend_trans(handle, credits);
4350         if (ret) {
4351                 mlog_errno(ret);
4352                 goto out;
4353         }
4354
4355         ret = ocfs2_xattr_bucket_journal_access(handle, first,
4356                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4357         if (ret) {
4358                 mlog_errno(ret);
4359                 goto out;
4360         }
4361
4362         while (end_blk != target_blk) {
4363                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4364                                             end_blk + blk_per_bucket, 0);
4365                 if (ret)
4366                         goto out;
4367                 end_blk -= blk_per_bucket;
4368         }
4369
4370         /* Move half of the xattr in target_blkno to the next bucket. */
4371         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4372                                         target_blk + blk_per_bucket, NULL, 0);
4373
4374         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4375         ocfs2_xattr_bucket_journal_dirty(handle, first);
4376
4377 out:
4378         return ret;
4379 }
4380
4381 /*
4382  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
4383  * xb_bh is the ocfs2_xattr_block.
4384  * We will move all the buckets starting from header_bh to the next place. As
4385  * for this one, half num of its xattrs will be moved to the next one.
4386  *
4387  * We will allocate a new cluster if current cluster is full and adjust
4388  * header_bh and first_bh if the insert place is moved to the new cluster.
4389  */
4390 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4391                                       struct buffer_head *xb_bh,
4392                                       struct buffer_head *header_bh,
4393                                       struct ocfs2_xattr_set_ctxt *ctxt)
4394 {
4395         struct ocfs2_xattr_header *first_xh = NULL;
4396         struct buffer_head *first_bh = NULL;
4397         struct ocfs2_xattr_block *xb =
4398                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4399         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4400         struct ocfs2_extent_list *el = &xb_root->xt_list;
4401         struct ocfs2_xattr_header *xh =
4402                         (struct ocfs2_xattr_header *)header_bh->b_data;
4403         u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4404         struct super_block *sb = inode->i_sb;
4405         struct ocfs2_super *osb = OCFS2_SB(sb);
4406         int ret, num_buckets, extend = 1;
4407         u64 p_blkno;
4408         u32 e_cpos, num_clusters;
4409         /* The bucket at the front of the extent */
4410         struct ocfs2_xattr_bucket *first;
4411
4412         mlog(0, "Add new xattr bucket starting form %llu\n",
4413              (unsigned long long)header_bh->b_blocknr);
4414
4415         first = ocfs2_xattr_bucket_new(inode);
4416         if (!first) {
4417                 ret = -ENOMEM;
4418                 mlog_errno(ret);
4419                 goto out;
4420         }
4421
4422         /*
4423          * Add refrence for header_bh here because it may be
4424          * changed in ocfs2_add_new_xattr_cluster and we need
4425          * to free it in the end.
4426          */
4427         get_bh(header_bh);
4428
4429         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4430                                   &num_clusters, el);
4431         if (ret) {
4432                 mlog_errno(ret);
4433                 goto out;
4434         }
4435
4436         ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL);
4437         if (ret) {
4438                 mlog_errno(ret);
4439                 goto out;
4440         }
4441
4442         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4443         first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4444
4445         if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
4446                 ret = ocfs2_add_new_xattr_cluster(inode,
4447                                                   xb_bh,
4448                                                   &first_bh,
4449                                                   &header_bh,
4450                                                   &num_clusters,
4451                                                   e_cpos,
4452                                                   p_blkno,
4453                                                   &extend,
4454                                                   ctxt);
4455                 if (ret) {
4456                         mlog_errno(ret);
4457                         goto out;
4458                 }
4459         }
4460
4461         if (extend) {
4462                 /* These bucket reads should be cached */
4463                 ret = ocfs2_read_xattr_bucket(first, first_bh->b_blocknr);
4464                 if (ret) {
4465                         mlog_errno(ret);
4466                         goto out;
4467                 }
4468                 ret = ocfs2_extend_xattr_bucket(inode,
4469                                                 ctxt->handle,
4470                                                 first, header_bh->b_blocknr,
4471                                                 num_clusters);
4472                 if (ret)
4473                         mlog_errno(ret);
4474         }
4475
4476 out:
4477         brelse(first_bh);
4478         brelse(header_bh);
4479         ocfs2_xattr_bucket_free(first);
4480         return ret;
4481 }
4482
4483 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4484                                         struct ocfs2_xattr_bucket *bucket,
4485                                         int offs)
4486 {
4487         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4488
4489         offs = offs % inode->i_sb->s_blocksize;
4490         return bucket_block(bucket, block_off) + offs;
4491 }
4492
4493 /*
4494  * Handle the normal xattr set, including replace, delete and new.
4495  *
4496  * Note: "local" indicates the real data's locality. So we can't
4497  * just its bucket locality by its length.
4498  */
4499 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4500                                          struct ocfs2_xattr_info *xi,
4501                                          struct ocfs2_xattr_search *xs,
4502                                          u32 name_hash,
4503                                          int local)
4504 {
4505         struct ocfs2_xattr_entry *last, *xe;
4506         int name_len = strlen(xi->name);
4507         struct ocfs2_xattr_header *xh = xs->header;
4508         u16 count = le16_to_cpu(xh->xh_count), start;
4509         size_t blocksize = inode->i_sb->s_blocksize;
4510         char *val;
4511         size_t offs, size, new_size;
4512
4513         last = &xh->xh_entries[count];
4514         if (!xs->not_found) {
4515                 xe = xs->here;
4516                 offs = le16_to_cpu(xe->xe_name_offset);
4517                 if (ocfs2_xattr_is_local(xe))
4518                         size = OCFS2_XATTR_SIZE(name_len) +
4519                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4520                 else
4521                         size = OCFS2_XATTR_SIZE(name_len) +
4522                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4523
4524                 /*
4525                  * If the new value will be stored outside, xi->value has been
4526                  * initalized as an empty ocfs2_xattr_value_root, and the same
4527                  * goes with xi->value_len, so we can set new_size safely here.
4528                  * See ocfs2_xattr_set_in_bucket.
4529                  */
4530                 new_size = OCFS2_XATTR_SIZE(name_len) +
4531                            OCFS2_XATTR_SIZE(xi->value_len);
4532
4533                 le16_add_cpu(&xh->xh_name_value_len, -size);
4534                 if (xi->value) {
4535                         if (new_size > size)
4536                                 goto set_new_name_value;
4537
4538                         /* Now replace the old value with new one. */
4539                         if (local)
4540                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
4541                         else
4542                                 xe->xe_value_size = 0;
4543
4544                         val = ocfs2_xattr_bucket_get_val(inode,
4545                                                          xs->bucket, offs);
4546                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4547                                size - OCFS2_XATTR_SIZE(name_len));
4548                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4549                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4550                                        xi->value, xi->value_len);
4551
4552                         le16_add_cpu(&xh->xh_name_value_len, new_size);
4553                         ocfs2_xattr_set_local(xe, local);
4554                         return;
4555                 } else {
4556                         /*
4557                          * Remove the old entry if there is more than one.
4558                          * We don't remove the last entry so that we can
4559                          * use it to indicate the hash value of the empty
4560                          * bucket.
4561                          */
4562                         last -= 1;
4563                         le16_add_cpu(&xh->xh_count, -1);
4564                         if (xh->xh_count) {
4565                                 memmove(xe, xe + 1,
4566                                         (void *)last - (void *)xe);
4567                                 memset(last, 0,
4568                                        sizeof(struct ocfs2_xattr_entry));
4569                         } else
4570                                 xh->xh_free_start =
4571                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4572
4573                         return;
4574                 }
4575         } else {
4576                 /* find a new entry for insert. */
4577                 int low = 0, high = count - 1, tmp;
4578                 struct ocfs2_xattr_entry *tmp_xe;
4579
4580                 while (low <= high && count) {
4581                         tmp = (low + high) / 2;
4582                         tmp_xe = &xh->xh_entries[tmp];
4583
4584                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4585                                 low = tmp + 1;
4586                         else if (name_hash <
4587                                  le32_to_cpu(tmp_xe->xe_name_hash))
4588                                 high = tmp - 1;
4589                         else {
4590                                 low = tmp;
4591                                 break;
4592                         }
4593                 }
4594
4595                 xe = &xh->xh_entries[low];
4596                 if (low != count)
4597                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4598
4599                 le16_add_cpu(&xh->xh_count, 1);
4600                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4601                 xe->xe_name_hash = cpu_to_le32(name_hash);
4602                 xe->xe_name_len = name_len;
4603                 ocfs2_xattr_set_type(xe, xi->name_index);
4604         }
4605
4606 set_new_name_value:
4607         /* Insert the new name+value. */
4608         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4609
4610         /*
4611          * We must make sure that the name/value pair
4612          * exists in the same block.
4613          */
4614         offs = le16_to_cpu(xh->xh_free_start);
4615         start = offs - size;
4616
4617         if (start >> inode->i_sb->s_blocksize_bits !=
4618             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4619                 offs = offs - offs % blocksize;
4620                 xh->xh_free_start = cpu_to_le16(offs);
4621         }
4622
4623         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4624         xe->xe_name_offset = cpu_to_le16(offs - size);
4625
4626         memset(val, 0, size);
4627         memcpy(val, xi->name, name_len);
4628         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4629
4630         xe->xe_value_size = cpu_to_le64(xi->value_len);
4631         ocfs2_xattr_set_local(xe, local);
4632         xs->here = xe;
4633         le16_add_cpu(&xh->xh_free_start, -size);
4634         le16_add_cpu(&xh->xh_name_value_len, size);
4635
4636         return;
4637 }
4638
4639 /*
4640  * Set the xattr entry in the specified bucket.
4641  * The bucket is indicated by xs->bucket and it should have the enough
4642  * space for the xattr insertion.
4643  */
4644 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4645                                            handle_t *handle,
4646                                            struct ocfs2_xattr_info *xi,
4647                                            struct ocfs2_xattr_search *xs,
4648                                            u32 name_hash,
4649                                            int local)
4650 {
4651         int ret;
4652         u64 blkno;
4653
4654         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4655              (unsigned long)xi->value_len, xi->name_index,
4656              (unsigned long long)bucket_blkno(xs->bucket));
4657
4658         if (!xs->bucket->bu_bhs[1]) {
4659                 blkno = bucket_blkno(xs->bucket);
4660                 ocfs2_xattr_bucket_relse(xs->bucket);
4661                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4662                 if (ret) {
4663                         mlog_errno(ret);
4664                         goto out;
4665                 }
4666         }
4667
4668         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4669                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4670         if (ret < 0) {
4671                 mlog_errno(ret);
4672                 goto out;
4673         }
4674
4675         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4676         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4677
4678 out:
4679         return ret;
4680 }
4681
4682 /*
4683  * Truncate the specified xe_off entry in xattr bucket.
4684  * bucket is indicated by header_bh and len is the new length.
4685  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4686  *
4687  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4688  */
4689 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4690                                              struct ocfs2_xattr_bucket *bucket,
4691                                              int xe_off,
4692                                              int len,
4693                                              struct ocfs2_xattr_set_ctxt *ctxt)
4694 {
4695         int ret, offset;
4696         u64 value_blk;
4697         struct buffer_head *value_bh = NULL;
4698         struct ocfs2_xattr_value_root *xv;
4699         struct ocfs2_xattr_entry *xe;
4700         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4701         size_t blocksize = inode->i_sb->s_blocksize;
4702
4703         xe = &xh->xh_entries[xe_off];
4704
4705         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4706
4707         offset = le16_to_cpu(xe->xe_name_offset) +
4708                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4709
4710         value_blk = offset / blocksize;
4711
4712         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4713         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4714
4715         value_bh = bucket->bu_bhs[value_blk];
4716         BUG_ON(!value_bh);
4717
4718         xv = (struct ocfs2_xattr_value_root *)
4719                 (value_bh->b_data + offset % blocksize);
4720
4721         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4722                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4723         if (ret) {
4724                 mlog_errno(ret);
4725                 goto out;
4726         }
4727
4728         /*
4729          * From here on out we have to dirty the bucket.  The generic
4730          * value calls only modify one of the bucket's bhs, but we need
4731          * to send the bucket at once.  So if they error, they *could* have
4732          * modified something.  We have to assume they did, and dirty
4733          * the whole bucket.  This leaves us in a consistent state.
4734          */
4735         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4736              xe_off, (unsigned long long)bucket_blkno(bucket), len);
4737         ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
4738         if (ret) {
4739                 mlog_errno(ret);
4740                 goto out_dirty;
4741         }
4742
4743         xe->xe_value_size = cpu_to_le64(len);
4744
4745 out_dirty:
4746         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4747
4748 out:
4749         return ret;
4750 }
4751
4752 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4753                                         struct ocfs2_xattr_search *xs,
4754                                         int len,
4755                                         struct ocfs2_xattr_set_ctxt *ctxt)
4756 {
4757         int ret, offset;
4758         struct ocfs2_xattr_entry *xe = xs->here;
4759         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4760
4761         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4762
4763         offset = xe - xh->xh_entries;
4764         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
4765                                                 offset, len, ctxt);
4766         if (ret)
4767                 mlog_errno(ret);
4768
4769         return ret;
4770 }
4771
4772 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4773                                                 handle_t *handle,
4774                                                 struct ocfs2_xattr_search *xs,
4775                                                 char *val,
4776                                                 int value_len)
4777 {
4778         int offset;
4779         struct ocfs2_xattr_value_root *xv;
4780         struct ocfs2_xattr_entry *xe = xs->here;
4781
4782         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4783
4784         offset = le16_to_cpu(xe->xe_name_offset) +
4785                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4786
4787         xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4788
4789         return __ocfs2_xattr_set_value_outside(inode, handle,
4790                                                xv, val, value_len);
4791 }
4792
4793 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4794                                   struct buffer_head *root_bh,
4795                                   u64 blkno,
4796                                   u32 cpos,
4797                                   u32 len)
4798 {
4799         int ret;
4800         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4801         struct inode *tl_inode = osb->osb_tl_inode;
4802         handle_t *handle;
4803         struct ocfs2_xattr_block *xb =
4804                         (struct ocfs2_xattr_block *)root_bh->b_data;
4805         struct ocfs2_alloc_context *meta_ac = NULL;
4806         struct ocfs2_cached_dealloc_ctxt dealloc;
4807         struct ocfs2_extent_tree et;
4808
4809         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4810
4811         ocfs2_init_dealloc_ctxt(&dealloc);
4812
4813         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4814              cpos, len, (unsigned long long)blkno);
4815
4816         ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4817
4818         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4819         if (ret) {
4820                 mlog_errno(ret);
4821                 return ret;
4822         }
4823
4824         mutex_lock(&tl_inode->i_mutex);
4825
4826         if (ocfs2_truncate_log_needs_flush(osb)) {
4827                 ret = __ocfs2_flush_truncate_log(osb);
4828                 if (ret < 0) {
4829                         mlog_errno(ret);
4830                         goto out;
4831                 }
4832         }
4833
4834         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
4835         if (IS_ERR(handle)) {
4836                 ret = -ENOMEM;
4837                 mlog_errno(ret);
4838                 goto out;
4839         }
4840
4841         ret = ocfs2_journal_access(handle, inode, root_bh,
4842                                    OCFS2_JOURNAL_ACCESS_WRITE);
4843         if (ret) {
4844                 mlog_errno(ret);
4845                 goto out_commit;
4846         }
4847
4848         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4849                                   &dealloc);
4850         if (ret) {
4851                 mlog_errno(ret);
4852                 goto out_commit;
4853         }
4854
4855         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4856
4857         ret = ocfs2_journal_dirty(handle, root_bh);
4858         if (ret) {
4859                 mlog_errno(ret);
4860                 goto out_commit;
4861         }
4862
4863         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4864         if (ret)
4865                 mlog_errno(ret);
4866
4867 out_commit:
4868         ocfs2_commit_trans(osb, handle);
4869 out:
4870         ocfs2_schedule_truncate_log_flush(osb, 1);
4871
4872         mutex_unlock(&tl_inode->i_mutex);
4873
4874         if (meta_ac)
4875                 ocfs2_free_alloc_context(meta_ac);
4876
4877         ocfs2_run_deallocs(osb, &dealloc);
4878
4879         return ret;
4880 }
4881
4882 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4883                                          handle_t *handle,
4884                                          struct ocfs2_xattr_search *xs)
4885 {
4886         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4887         struct ocfs2_xattr_entry *last = &xh->xh_entries[
4888                                                 le16_to_cpu(xh->xh_count) - 1];
4889         int ret = 0;
4890
4891         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4892                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4893         if (ret) {
4894                 mlog_errno(ret);
4895                 return;
4896         }
4897
4898         /* Remove the old entry. */
4899         memmove(xs->here, xs->here + 1,
4900                 (void *)last - (void *)xs->here);
4901         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4902         le16_add_cpu(&xh->xh_count, -1);
4903
4904         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4905 }
4906
4907 /*
4908  * Set the xattr name/value in the bucket specified in xs.
4909  *
4910  * As the new value in xi may be stored in the bucket or in an outside cluster,
4911  * we divide the whole process into 3 steps:
4912  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4913  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4914  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4915  * 4. If the clusters for the new outside value can't be allocated, we need
4916  *    to free the xattr we allocated in set.
4917  */
4918 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4919                                      struct ocfs2_xattr_info *xi,
4920                                      struct ocfs2_xattr_search *xs,
4921                                      struct ocfs2_xattr_set_ctxt *ctxt)
4922 {
4923         int ret, local = 1;
4924         size_t value_len;
4925         char *val = (char *)xi->value;
4926         struct ocfs2_xattr_entry *xe = xs->here;
4927         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4928                                               strlen(xi->name));
4929
4930         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4931                 /*
4932                  * We need to truncate the xattr storage first.
4933                  *
4934                  * If both the old and new value are stored to
4935                  * outside block, we only need to truncate
4936                  * the storage and then set the value outside.
4937                  *
4938                  * If the new value should be stored within block,
4939                  * we should free all the outside block first and
4940                  * the modification to the xattr block will be done
4941                  * by following steps.
4942                  */
4943                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4944                         value_len = xi->value_len;
4945                 else
4946                         value_len = 0;
4947
4948                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4949                                                            value_len,
4950                                                            ctxt);
4951                 if (ret)
4952                         goto out;
4953
4954                 if (value_len)
4955                         goto set_value_outside;
4956         }
4957
4958         value_len = xi->value_len;
4959         /* So we have to handle the inside block change now. */
4960         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4961                 /*
4962                  * If the new value will be stored outside of block,
4963                  * initalize a new empty value root and insert it first.
4964                  */
4965                 local = 0;
4966                 xi->value = &def_xv;
4967                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4968         }
4969
4970         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
4971                                               name_hash, local);
4972         if (ret) {
4973                 mlog_errno(ret);
4974                 goto out;
4975         }
4976
4977         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4978                 goto out;
4979
4980         /* allocate the space now for the outside block storage. */
4981         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4982                                                    value_len, ctxt);
4983         if (ret) {
4984                 mlog_errno(ret);
4985
4986                 if (xs->not_found) {
4987                         /*
4988                          * We can't allocate enough clusters for outside
4989                          * storage and we have allocated xattr already,
4990                          * so need to remove it.
4991                          */
4992                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
4993                 }
4994                 goto out;
4995         }
4996
4997 set_value_outside:
4998         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
4999                                                    xs, val, value_len);
5000 out:
5001         return ret;
5002 }
5003
5004 /*
5005  * check whether the xattr bucket is filled up with the same hash value.
5006  * If we want to insert the xattr with the same hash, return -ENOSPC.
5007  * If we want to insert a xattr with different hash value, go ahead
5008  * and ocfs2_divide_xattr_bucket will handle this.
5009  */
5010 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5011                                               struct ocfs2_xattr_bucket *bucket,
5012                                               const char *name)
5013 {
5014         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5015         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5016
5017         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5018                 return 0;
5019
5020         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5021             xh->xh_entries[0].xe_name_hash) {
5022                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5023                      "hash = %u\n",
5024                      (unsigned long long)bucket_blkno(bucket),
5025                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5026                 return -ENOSPC;
5027         }
5028
5029         return 0;
5030 }
5031
5032 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5033                                              struct ocfs2_xattr_info *xi,
5034                                              struct ocfs2_xattr_search *xs,
5035                                              struct ocfs2_xattr_set_ctxt *ctxt)
5036 {
5037         struct ocfs2_xattr_header *xh;
5038         struct ocfs2_xattr_entry *xe;
5039         u16 count, header_size, xh_free_start;
5040         int free, max_free, need, old;
5041         size_t value_size = 0, name_len = strlen(xi->name);
5042         size_t blocksize = inode->i_sb->s_blocksize;
5043         int ret, allocation = 0;
5044
5045         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
5046
5047 try_again:
5048         xh = xs->header;
5049         count = le16_to_cpu(xh->xh_count);
5050         xh_free_start = le16_to_cpu(xh->xh_free_start);
5051         header_size = sizeof(struct ocfs2_xattr_header) +
5052                         count * sizeof(struct ocfs2_xattr_entry);
5053         max_free = OCFS2_XATTR_BUCKET_SIZE -
5054                 le16_to_cpu(xh->xh_name_value_len) - header_size;
5055
5056         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5057                         "of %u which exceed block size\n",
5058                         (unsigned long long)bucket_blkno(xs->bucket),
5059                         header_size);
5060
5061         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5062                 value_size = OCFS2_XATTR_ROOT_SIZE;
5063         else if (xi->value)
5064                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
5065
5066         if (xs->not_found)
5067                 need = sizeof(struct ocfs2_xattr_entry) +
5068                         OCFS2_XATTR_SIZE(name_len) + value_size;
5069         else {
5070                 need = value_size + OCFS2_XATTR_SIZE(name_len);
5071
5072                 /*
5073                  * We only replace the old value if the new length is smaller
5074                  * than the old one. Otherwise we will allocate new space in the
5075                  * bucket to store it.
5076                  */
5077                 xe = xs->here;
5078                 if (ocfs2_xattr_is_local(xe))
5079                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5080                 else
5081                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5082
5083                 if (old >= value_size)
5084                         need = 0;
5085         }
5086
5087         free = xh_free_start - header_size;
5088         /*
5089          * We need to make sure the new name/value pair
5090          * can exist in the same block.
5091          */
5092         if (xh_free_start % blocksize < need)
5093                 free -= xh_free_start % blocksize;
5094
5095         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5096              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5097              " %u\n", xs->not_found,
5098              (unsigned long long)bucket_blkno(xs->bucket),
5099              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5100              le16_to_cpu(xh->xh_name_value_len));
5101
5102         if (free < need ||
5103             (xs->not_found &&
5104              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5105                 if (need <= max_free &&
5106                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5107                         /*
5108                          * We can create the space by defragment. Since only the
5109                          * name/value will be moved, the xe shouldn't be changed
5110                          * in xs.
5111                          */
5112                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5113                                                         xs->bucket);
5114                         if (ret) {
5115                                 mlog_errno(ret);
5116                                 goto out;
5117                         }
5118
5119                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5120                         free = xh_free_start - header_size;
5121                         if (xh_free_start % blocksize < need)
5122                                 free -= xh_free_start % blocksize;
5123
5124                         if (free >= need)
5125                                 goto xattr_set;
5126
5127                         mlog(0, "Can't get enough space for xattr insert by "
5128                              "defragment. Need %u bytes, but we have %d, so "
5129                              "allocate new bucket for it.\n", need, free);
5130                 }
5131
5132                 /*
5133                  * We have to add new buckets or clusters and one
5134                  * allocation should leave us enough space for insert.
5135                  */
5136                 BUG_ON(allocation);
5137
5138                 /*
5139                  * We do not allow for overlapping ranges between buckets. And
5140                  * the maximum number of collisions we will allow for then is
5141                  * one bucket's worth, so check it here whether we need to
5142                  * add a new bucket for the insert.
5143                  */
5144                 ret = ocfs2_check_xattr_bucket_collision(inode,
5145                                                          xs->bucket,
5146                                                          xi->name);
5147                 if (ret) {
5148                         mlog_errno(ret);
5149                         goto out;
5150                 }
5151
5152                 ret = ocfs2_add_new_xattr_bucket(inode,
5153                                                  xs->xattr_bh,
5154                                                  xs->bucket->bu_bhs[0],
5155                                                  ctxt);
5156                 if (ret) {
5157                         mlog_errno(ret);
5158                         goto out;
5159                 }
5160
5161                 ocfs2_xattr_bucket_relse(xs->bucket);
5162
5163                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5164                                                    xi->name_index,
5165                                                    xi->name, xs);
5166                 if (ret && ret != -ENODATA)
5167                         goto out;
5168                 xs->not_found = ret;
5169                 allocation = 1;
5170                 goto try_again;
5171         }
5172
5173 xattr_set:
5174         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5175 out:
5176         mlog_exit(ret);
5177         return ret;
5178 }
5179
5180 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5181                                         struct ocfs2_xattr_bucket *bucket,
5182                                         void *para)
5183 {
5184         int ret = 0;
5185         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5186         u16 i;
5187         struct ocfs2_xattr_entry *xe;
5188         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5189         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5190         int credits = ocfs2_remove_extent_credits(osb->sb) +
5191                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5192
5193
5194         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5195
5196         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5197                 xe = &xh->xh_entries[i];
5198                 if (ocfs2_xattr_is_local(xe))
5199                         continue;
5200
5201                 ctxt.handle = ocfs2_start_trans(osb, credits);
5202                 if (IS_ERR(ctxt.handle)) {
5203                         ret = PTR_ERR(ctxt.handle);
5204                         mlog_errno(ret);
5205                         break;
5206                 }
5207
5208                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5209                                                         i, 0, &ctxt);
5210
5211                 ocfs2_commit_trans(osb, ctxt.handle);
5212                 if (ret) {
5213                         mlog_errno(ret);
5214                         break;
5215                 }
5216         }
5217
5218         ocfs2_schedule_truncate_log_flush(osb, 1);
5219         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5220         return ret;
5221 }
5222
5223 static int ocfs2_delete_xattr_index_block(struct inode *inode,
5224                                           struct buffer_head *xb_bh)
5225 {
5226         struct ocfs2_xattr_block *xb =
5227                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5228         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5229         int ret = 0;
5230         u32 name_hash = UINT_MAX, e_cpos, num_clusters;
5231         u64 p_blkno;
5232
5233         if (le16_to_cpu(el->l_next_free_rec) == 0)
5234                 return 0;
5235
5236         while (name_hash > 0) {
5237                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
5238                                           &e_cpos, &num_clusters, el);
5239                 if (ret) {
5240                         mlog_errno(ret);
5241                         goto out;
5242                 }
5243
5244                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
5245                                                   ocfs2_delete_xattr_in_bucket,
5246                                                   NULL);
5247                 if (ret) {
5248                         mlog_errno(ret);
5249                         goto out;
5250                 }
5251
5252                 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
5253                                              p_blkno, e_cpos, num_clusters);
5254                 if (ret) {
5255                         mlog_errno(ret);
5256                         break;
5257                 }
5258
5259                 if (e_cpos == 0)
5260                         break;
5261
5262                 name_hash = e_cpos - 1;
5263         }
5264
5265 out:
5266         return ret;
5267 }
5268
5269 /*
5270  * 'security' attributes support
5271  */
5272 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5273                                         size_t list_size, const char *name,
5274                                         size_t name_len)
5275 {
5276         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5277         const size_t total_len = prefix_len + name_len + 1;
5278
5279         if (list && total_len <= list_size) {
5280                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5281                 memcpy(list + prefix_len, name, name_len);
5282                 list[prefix_len + name_len] = '\0';
5283         }
5284         return total_len;
5285 }
5286
5287 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5288                                     void *buffer, size_t size)
5289 {
5290         if (strcmp(name, "") == 0)
5291                 return -EINVAL;
5292         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5293                                buffer, size);
5294 }
5295
5296 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5297                                     const void *value, size_t size, int flags)
5298 {
5299         if (strcmp(name, "") == 0)
5300                 return -EINVAL;
5301
5302         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5303                                size, flags);
5304 }
5305
5306 int ocfs2_init_security_get(struct inode *inode,
5307                             struct inode *dir,
5308                             struct ocfs2_security_xattr_info *si)
5309 {
5310         return security_inode_init_security(inode, dir, &si->name, &si->value,
5311                                             &si->value_len);
5312 }
5313
5314 int ocfs2_init_security_set(handle_t *handle,
5315                             struct inode *inode,
5316                             struct buffer_head *di_bh,
5317                             struct ocfs2_security_xattr_info *si,
5318                             struct ocfs2_alloc_context *xattr_ac,
5319                             struct ocfs2_alloc_context *data_ac)
5320 {
5321         return ocfs2_xattr_set_handle(handle, inode, di_bh,
5322                                      OCFS2_XATTR_INDEX_SECURITY,
5323                                      si->name, si->value, si->value_len, 0,
5324                                      xattr_ac, data_ac);
5325 }
5326
5327 struct xattr_handler ocfs2_xattr_security_handler = {
5328         .prefix = XATTR_SECURITY_PREFIX,
5329         .list   = ocfs2_xattr_security_list,
5330         .get    = ocfs2_xattr_security_get,
5331         .set    = ocfs2_xattr_security_set,
5332 };
5333
5334 /*
5335  * 'trusted' attributes support
5336  */
5337 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
5338                                        size_t list_size, const char *name,
5339                                        size_t name_len)
5340 {
5341         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
5342         const size_t total_len = prefix_len + name_len + 1;
5343
5344         if (list && total_len <= list_size) {
5345                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
5346                 memcpy(list + prefix_len, name, name_len);
5347                 list[prefix_len + name_len] = '\0';
5348         }
5349         return total_len;
5350 }
5351
5352 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
5353                                    void *buffer, size_t size)
5354 {
5355         if (strcmp(name, "") == 0)
5356                 return -EINVAL;
5357         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
5358                                buffer, size);
5359 }
5360
5361 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
5362                                    const void *value, size_t size, int flags)
5363 {
5364         if (strcmp(name, "") == 0)
5365                 return -EINVAL;
5366
5367         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
5368                                size, flags);
5369 }
5370
5371 struct xattr_handler ocfs2_xattr_trusted_handler = {
5372         .prefix = XATTR_TRUSTED_PREFIX,
5373         .list   = ocfs2_xattr_trusted_list,
5374         .get    = ocfs2_xattr_trusted_get,
5375         .set    = ocfs2_xattr_trusted_set,
5376 };
5377
5378 /*
5379  * 'user' attributes support
5380  */
5381 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
5382                                     size_t list_size, const char *name,
5383                                     size_t name_len)
5384 {
5385         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
5386         const size_t total_len = prefix_len + name_len + 1;
5387         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5388
5389         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5390                 return 0;
5391
5392         if (list && total_len <= list_size) {
5393                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
5394                 memcpy(list + prefix_len, name, name_len);
5395                 list[prefix_len + name_len] = '\0';
5396         }
5397         return total_len;
5398 }
5399
5400 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
5401                                 void *buffer, size_t size)
5402 {
5403         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5404
5405         if (strcmp(name, "") == 0)
5406                 return -EINVAL;
5407         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5408                 return -EOPNOTSUPP;
5409         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
5410                                buffer, size);
5411 }
5412
5413 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
5414                                 const void *value, size_t size, int flags)
5415 {
5416         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5417
5418         if (strcmp(name, "") == 0)
5419                 return -EINVAL;
5420         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5421                 return -EOPNOTSUPP;
5422
5423         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
5424                                size, flags);
5425 }
5426
5427 struct xattr_handler ocfs2_xattr_user_handler = {
5428         .prefix = XATTR_USER_PREFIX,
5429         .list   = ocfs2_xattr_user_list,
5430         .get    = ocfs2_xattr_user_get,
5431         .set    = ocfs2_xattr_user_set,
5432 };