ocfs2: ocfs2_find_path() only needs the caching info
[safe/jmp/linux-2.6] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58
59
60 struct ocfs2_xattr_def_value_root {
61         struct ocfs2_xattr_value_root   xv;
62         struct ocfs2_extent_rec         er;
63 };
64
65 struct ocfs2_xattr_bucket {
66         /* The inode these xattrs are associated with */
67         struct inode *bu_inode;
68
69         /* The actual buffers that make up the bucket */
70         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
71
72         /* How many blocks make up one bucket for this filesystem */
73         int bu_blocks;
74 };
75
76 struct ocfs2_xattr_set_ctxt {
77         handle_t *handle;
78         struct ocfs2_alloc_context *meta_ac;
79         struct ocfs2_alloc_context *data_ac;
80         struct ocfs2_cached_dealloc_ctxt dealloc;
81 };
82
83 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
84 #define OCFS2_XATTR_INLINE_SIZE 80
85 #define OCFS2_XATTR_HEADER_GAP  4
86 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
87                                          - sizeof(struct ocfs2_xattr_header) \
88                                          - OCFS2_XATTR_HEADER_GAP)
89 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
90                                          - sizeof(struct ocfs2_xattr_block) \
91                                          - sizeof(struct ocfs2_xattr_header) \
92                                          - OCFS2_XATTR_HEADER_GAP)
93
94 static struct ocfs2_xattr_def_value_root def_xv = {
95         .xv.xr_list.l_count = cpu_to_le16(1),
96 };
97
98 struct xattr_handler *ocfs2_xattr_handlers[] = {
99         &ocfs2_xattr_user_handler,
100 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103 #endif
104         &ocfs2_xattr_trusted_handler,
105         &ocfs2_xattr_security_handler,
106         NULL
107 };
108
109 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
110         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
111 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
112         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
113                                         = &ocfs2_xattr_acl_access_handler,
114         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
115                                         = &ocfs2_xattr_acl_default_handler,
116 #endif
117         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
118         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
119 };
120
121 struct ocfs2_xattr_info {
122         int name_index;
123         const char *name;
124         const void *value;
125         size_t value_len;
126 };
127
128 struct ocfs2_xattr_search {
129         struct buffer_head *inode_bh;
130         /*
131          * xattr_bh point to the block buffer head which has extended attribute
132          * when extended attribute in inode, xattr_bh is equal to inode_bh.
133          */
134         struct buffer_head *xattr_bh;
135         struct ocfs2_xattr_header *header;
136         struct ocfs2_xattr_bucket *bucket;
137         void *base;
138         void *end;
139         struct ocfs2_xattr_entry *here;
140         int not_found;
141 };
142
143 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
144                                              struct ocfs2_xattr_header *xh,
145                                              int index,
146                                              int *block_off,
147                                              int *new_offset);
148
149 static int ocfs2_xattr_block_find(struct inode *inode,
150                                   int name_index,
151                                   const char *name,
152                                   struct ocfs2_xattr_search *xs);
153 static int ocfs2_xattr_index_block_find(struct inode *inode,
154                                         struct buffer_head *root_bh,
155                                         int name_index,
156                                         const char *name,
157                                         struct ocfs2_xattr_search *xs);
158
159 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
160                                         struct ocfs2_xattr_tree_root *xt,
161                                         char *buffer,
162                                         size_t buffer_size);
163
164 static int ocfs2_xattr_create_index_block(struct inode *inode,
165                                           struct ocfs2_xattr_search *xs,
166                                           struct ocfs2_xattr_set_ctxt *ctxt);
167
168 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
169                                              struct ocfs2_xattr_info *xi,
170                                              struct ocfs2_xattr_search *xs,
171                                              struct ocfs2_xattr_set_ctxt *ctxt);
172
173 static int ocfs2_delete_xattr_index_block(struct inode *inode,
174                                           struct buffer_head *xb_bh);
175 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
176                                   u64 src_blk, u64 last_blk, u64 to_blk,
177                                   unsigned int start_bucket,
178                                   u32 *first_hash);
179
180 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
181 {
182         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
183 }
184
185 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
186 {
187         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
188 }
189
190 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
191 {
192         u16 len = sb->s_blocksize -
193                  offsetof(struct ocfs2_xattr_header, xh_entries);
194
195         return len / sizeof(struct ocfs2_xattr_entry);
196 }
197
198 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
199 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
200 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
201
202 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
203 {
204         struct ocfs2_xattr_bucket *bucket;
205         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
206
207         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
208
209         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
210         if (bucket) {
211                 bucket->bu_inode = inode;
212                 bucket->bu_blocks = blks;
213         }
214
215         return bucket;
216 }
217
218 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
219 {
220         int i;
221
222         for (i = 0; i < bucket->bu_blocks; i++) {
223                 brelse(bucket->bu_bhs[i]);
224                 bucket->bu_bhs[i] = NULL;
225         }
226 }
227
228 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
229 {
230         if (bucket) {
231                 ocfs2_xattr_bucket_relse(bucket);
232                 bucket->bu_inode = NULL;
233                 kfree(bucket);
234         }
235 }
236
237 /*
238  * A bucket that has never been written to disk doesn't need to be
239  * read.  We just need the buffer_heads.  Don't call this for
240  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
241  * them fully.
242  */
243 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
244                                    u64 xb_blkno)
245 {
246         int i, rc = 0;
247
248         for (i = 0; i < bucket->bu_blocks; i++) {
249                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
250                                               xb_blkno + i);
251                 if (!bucket->bu_bhs[i]) {
252                         rc = -EIO;
253                         mlog_errno(rc);
254                         break;
255                 }
256
257                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
258                                            bucket->bu_bhs[i]))
259                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
260                                                       bucket->bu_bhs[i]);
261         }
262
263         if (rc)
264                 ocfs2_xattr_bucket_relse(bucket);
265         return rc;
266 }
267
268 /* Read the xattr bucket at xb_blkno */
269 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
270                                    u64 xb_blkno)
271 {
272         int rc;
273
274         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
275                                bucket->bu_blocks, bucket->bu_bhs, 0,
276                                NULL);
277         if (!rc) {
278                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
279                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
280                                                  bucket->bu_bhs,
281                                                  bucket->bu_blocks,
282                                                  &bucket_xh(bucket)->xh_check);
283                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
284                 if (rc)
285                         mlog_errno(rc);
286         }
287
288         if (rc)
289                 ocfs2_xattr_bucket_relse(bucket);
290         return rc;
291 }
292
293 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
294                                              struct ocfs2_xattr_bucket *bucket,
295                                              int type)
296 {
297         int i, rc = 0;
298
299         for (i = 0; i < bucket->bu_blocks; i++) {
300                 rc = ocfs2_journal_access(handle,
301                                           INODE_CACHE(bucket->bu_inode),
302                                           bucket->bu_bhs[i], type);
303                 if (rc) {
304                         mlog_errno(rc);
305                         break;
306                 }
307         }
308
309         return rc;
310 }
311
312 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
313                                              struct ocfs2_xattr_bucket *bucket)
314 {
315         int i;
316
317         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
318         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
319                                    bucket->bu_bhs, bucket->bu_blocks,
320                                    &bucket_xh(bucket)->xh_check);
321         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
322
323         for (i = 0; i < bucket->bu_blocks; i++)
324                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
325 }
326
327 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
328                                          struct ocfs2_xattr_bucket *src)
329 {
330         int i;
331         int blocksize = src->bu_inode->i_sb->s_blocksize;
332
333         BUG_ON(dest->bu_blocks != src->bu_blocks);
334         BUG_ON(dest->bu_inode != src->bu_inode);
335
336         for (i = 0; i < src->bu_blocks; i++) {
337                 memcpy(bucket_block(dest, i), bucket_block(src, i),
338                        blocksize);
339         }
340 }
341
342 static int ocfs2_validate_xattr_block(struct super_block *sb,
343                                       struct buffer_head *bh)
344 {
345         int rc;
346         struct ocfs2_xattr_block *xb =
347                 (struct ocfs2_xattr_block *)bh->b_data;
348
349         mlog(0, "Validating xattr block %llu\n",
350              (unsigned long long)bh->b_blocknr);
351
352         BUG_ON(!buffer_uptodate(bh));
353
354         /*
355          * If the ecc fails, we return the error but otherwise
356          * leave the filesystem running.  We know any error is
357          * local to this block.
358          */
359         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
360         if (rc)
361                 return rc;
362
363         /*
364          * Errors after here are fatal
365          */
366
367         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
368                 ocfs2_error(sb,
369                             "Extended attribute block #%llu has bad "
370                             "signature %.*s",
371                             (unsigned long long)bh->b_blocknr, 7,
372                             xb->xb_signature);
373                 return -EINVAL;
374         }
375
376         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
377                 ocfs2_error(sb,
378                             "Extended attribute block #%llu has an "
379                             "invalid xb_blkno of %llu",
380                             (unsigned long long)bh->b_blocknr,
381                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
382                 return -EINVAL;
383         }
384
385         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
386                 ocfs2_error(sb,
387                             "Extended attribute block #%llu has an invalid "
388                             "xb_fs_generation of #%u",
389                             (unsigned long long)bh->b_blocknr,
390                             le32_to_cpu(xb->xb_fs_generation));
391                 return -EINVAL;
392         }
393
394         return 0;
395 }
396
397 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
398                                   struct buffer_head **bh)
399 {
400         int rc;
401         struct buffer_head *tmp = *bh;
402
403         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
404                               ocfs2_validate_xattr_block);
405
406         /* If ocfs2_read_block() got us a new bh, pass it up. */
407         if (!rc && !*bh)
408                 *bh = tmp;
409
410         return rc;
411 }
412
413 static inline const char *ocfs2_xattr_prefix(int name_index)
414 {
415         struct xattr_handler *handler = NULL;
416
417         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
418                 handler = ocfs2_xattr_handler_map[name_index];
419
420         return handler ? handler->prefix : NULL;
421 }
422
423 static u32 ocfs2_xattr_name_hash(struct inode *inode,
424                                  const char *name,
425                                  int name_len)
426 {
427         /* Get hash value of uuid from super block */
428         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
429         int i;
430
431         /* hash extended attribute name */
432         for (i = 0; i < name_len; i++) {
433                 hash = (hash << OCFS2_HASH_SHIFT) ^
434                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
435                        *name++;
436         }
437
438         return hash;
439 }
440
441 /*
442  * ocfs2_xattr_hash_entry()
443  *
444  * Compute the hash of an extended attribute.
445  */
446 static void ocfs2_xattr_hash_entry(struct inode *inode,
447                                    struct ocfs2_xattr_header *header,
448                                    struct ocfs2_xattr_entry *entry)
449 {
450         u32 hash = 0;
451         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
452
453         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
454         entry->xe_name_hash = cpu_to_le32(hash);
455
456         return;
457 }
458
459 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
460 {
461         int size = 0;
462
463         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
464                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
465         else
466                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
467         size += sizeof(struct ocfs2_xattr_entry);
468
469         return size;
470 }
471
472 int ocfs2_calc_security_init(struct inode *dir,
473                              struct ocfs2_security_xattr_info *si,
474                              int *want_clusters,
475                              int *xattr_credits,
476                              struct ocfs2_alloc_context **xattr_ac)
477 {
478         int ret = 0;
479         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
480         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
481                                                  si->value_len);
482
483         /*
484          * The max space of security xattr taken inline is
485          * 256(name) + 80(value) + 16(entry) = 352 bytes,
486          * So reserve one metadata block for it is ok.
487          */
488         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
489             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
490                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
491                 if (ret) {
492                         mlog_errno(ret);
493                         return ret;
494                 }
495                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
496         }
497
498         /* reserve clusters for xattr value which will be set in B tree*/
499         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
500                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
501                                                             si->value_len);
502
503                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
504                                                            new_clusters);
505                 *want_clusters += new_clusters;
506         }
507         return ret;
508 }
509
510 int ocfs2_calc_xattr_init(struct inode *dir,
511                           struct buffer_head *dir_bh,
512                           int mode,
513                           struct ocfs2_security_xattr_info *si,
514                           int *want_clusters,
515                           int *xattr_credits,
516                           int *want_meta)
517 {
518         int ret = 0;
519         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
520         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
521
522         if (si->enable)
523                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
524                                                      si->value_len);
525
526         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
527                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
528                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
529                                         "", NULL, 0);
530                 if (acl_len > 0) {
531                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
532                         if (S_ISDIR(mode))
533                                 a_size <<= 1;
534                 } else if (acl_len != 0 && acl_len != -ENODATA) {
535                         mlog_errno(ret);
536                         return ret;
537                 }
538         }
539
540         if (!(s_size + a_size))
541                 return ret;
542
543         /*
544          * The max space of security xattr taken inline is
545          * 256(name) + 80(value) + 16(entry) = 352 bytes,
546          * The max space of acl xattr taken inline is
547          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
548          * when blocksize = 512, may reserve one more cluser for
549          * xattr bucket, otherwise reserve one metadata block
550          * for them is ok.
551          * If this is a new directory with inline data,
552          * we choose to reserve the entire inline area for
553          * directory contents and force an external xattr block.
554          */
555         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
556             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
557             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
558                 *want_meta = *want_meta + 1;
559                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
560         }
561
562         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
563             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
564                 *want_clusters += 1;
565                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
566         }
567
568         /*
569          * reserve credits and clusters for xattrs which has large value
570          * and have to be set outside
571          */
572         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
573                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
574                                                         si->value_len);
575                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
576                                                            new_clusters);
577                 *want_clusters += new_clusters;
578         }
579         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
580             acl_len > OCFS2_XATTR_INLINE_SIZE) {
581                 /* for directory, it has DEFAULT and ACCESS two types of acls */
582                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
583                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
584                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
585                                                            new_clusters);
586                 *want_clusters += new_clusters;
587         }
588
589         return ret;
590 }
591
592 static int ocfs2_xattr_extend_allocation(struct inode *inode,
593                                          u32 clusters_to_add,
594                                          struct ocfs2_xattr_value_buf *vb,
595                                          struct ocfs2_xattr_set_ctxt *ctxt)
596 {
597         int status = 0;
598         handle_t *handle = ctxt->handle;
599         enum ocfs2_alloc_restarted why;
600         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
601         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
602         struct ocfs2_extent_tree et;
603
604         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
605
606         ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
607
608         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
609                               OCFS2_JOURNAL_ACCESS_WRITE);
610         if (status < 0) {
611                 mlog_errno(status);
612                 goto leave;
613         }
614
615         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
616         status = ocfs2_add_clusters_in_btree(osb,
617                                              inode,
618                                              &logical_start,
619                                              clusters_to_add,
620                                              0,
621                                              &et,
622                                              handle,
623                                              ctxt->data_ac,
624                                              ctxt->meta_ac,
625                                              &why);
626         if (status < 0) {
627                 mlog_errno(status);
628                 goto leave;
629         }
630
631         status = ocfs2_journal_dirty(handle, vb->vb_bh);
632         if (status < 0) {
633                 mlog_errno(status);
634                 goto leave;
635         }
636
637         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
638
639         /*
640          * We should have already allocated enough space before the transaction,
641          * so no need to restart.
642          */
643         BUG_ON(why != RESTART_NONE || clusters_to_add);
644
645 leave:
646
647         return status;
648 }
649
650 static int __ocfs2_remove_xattr_range(struct inode *inode,
651                                       struct ocfs2_xattr_value_buf *vb,
652                                       u32 cpos, u32 phys_cpos, u32 len,
653                                       struct ocfs2_xattr_set_ctxt *ctxt)
654 {
655         int ret;
656         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
657         handle_t *handle = ctxt->handle;
658         struct ocfs2_extent_tree et;
659
660         ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
661
662         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
663                             OCFS2_JOURNAL_ACCESS_WRITE);
664         if (ret) {
665                 mlog_errno(ret);
666                 goto out;
667         }
668
669         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
670                                   &ctxt->dealloc);
671         if (ret) {
672                 mlog_errno(ret);
673                 goto out;
674         }
675
676         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
677
678         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
679         if (ret) {
680                 mlog_errno(ret);
681                 goto out;
682         }
683
684         ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
685         if (ret)
686                 mlog_errno(ret);
687
688 out:
689         return ret;
690 }
691
692 static int ocfs2_xattr_shrink_size(struct inode *inode,
693                                    u32 old_clusters,
694                                    u32 new_clusters,
695                                    struct ocfs2_xattr_value_buf *vb,
696                                    struct ocfs2_xattr_set_ctxt *ctxt)
697 {
698         int ret = 0;
699         u32 trunc_len, cpos, phys_cpos, alloc_size;
700         u64 block;
701
702         if (old_clusters <= new_clusters)
703                 return 0;
704
705         cpos = new_clusters;
706         trunc_len = old_clusters - new_clusters;
707         while (trunc_len) {
708                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
709                                                &alloc_size,
710                                                &vb->vb_xv->xr_list);
711                 if (ret) {
712                         mlog_errno(ret);
713                         goto out;
714                 }
715
716                 if (alloc_size > trunc_len)
717                         alloc_size = trunc_len;
718
719                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
720                                                  phys_cpos, alloc_size,
721                                                  ctxt);
722                 if (ret) {
723                         mlog_errno(ret);
724                         goto out;
725                 }
726
727                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
728                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
729                                                        block, alloc_size);
730                 cpos += alloc_size;
731                 trunc_len -= alloc_size;
732         }
733
734 out:
735         return ret;
736 }
737
738 static int ocfs2_xattr_value_truncate(struct inode *inode,
739                                       struct ocfs2_xattr_value_buf *vb,
740                                       int len,
741                                       struct ocfs2_xattr_set_ctxt *ctxt)
742 {
743         int ret;
744         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
745         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
746
747         if (new_clusters == old_clusters)
748                 return 0;
749
750         if (new_clusters > old_clusters)
751                 ret = ocfs2_xattr_extend_allocation(inode,
752                                                     new_clusters - old_clusters,
753                                                     vb, ctxt);
754         else
755                 ret = ocfs2_xattr_shrink_size(inode,
756                                               old_clusters, new_clusters,
757                                               vb, ctxt);
758
759         return ret;
760 }
761
762 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
763                                   size_t *result, const char *prefix,
764                                   const char *name, int name_len)
765 {
766         char *p = buffer + *result;
767         int prefix_len = strlen(prefix);
768         int total_len = prefix_len + name_len + 1;
769
770         *result += total_len;
771
772         /* we are just looking for how big our buffer needs to be */
773         if (!size)
774                 return 0;
775
776         if (*result > size)
777                 return -ERANGE;
778
779         memcpy(p, prefix, prefix_len);
780         memcpy(p + prefix_len, name, name_len);
781         p[prefix_len + name_len] = '\0';
782
783         return 0;
784 }
785
786 static int ocfs2_xattr_list_entries(struct inode *inode,
787                                     struct ocfs2_xattr_header *header,
788                                     char *buffer, size_t buffer_size)
789 {
790         size_t result = 0;
791         int i, type, ret;
792         const char *prefix, *name;
793
794         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
795                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
796                 type = ocfs2_xattr_get_type(entry);
797                 prefix = ocfs2_xattr_prefix(type);
798
799                 if (prefix) {
800                         name = (const char *)header +
801                                 le16_to_cpu(entry->xe_name_offset);
802
803                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
804                                                      &result, prefix, name,
805                                                      entry->xe_name_len);
806                         if (ret)
807                                 return ret;
808                 }
809         }
810
811         return result;
812 }
813
814 static int ocfs2_xattr_ibody_list(struct inode *inode,
815                                   struct ocfs2_dinode *di,
816                                   char *buffer,
817                                   size_t buffer_size)
818 {
819         struct ocfs2_xattr_header *header = NULL;
820         struct ocfs2_inode_info *oi = OCFS2_I(inode);
821         int ret = 0;
822
823         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
824                 return ret;
825
826         header = (struct ocfs2_xattr_header *)
827                  ((void *)di + inode->i_sb->s_blocksize -
828                  le16_to_cpu(di->i_xattr_inline_size));
829
830         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
831
832         return ret;
833 }
834
835 static int ocfs2_xattr_block_list(struct inode *inode,
836                                   struct ocfs2_dinode *di,
837                                   char *buffer,
838                                   size_t buffer_size)
839 {
840         struct buffer_head *blk_bh = NULL;
841         struct ocfs2_xattr_block *xb;
842         int ret = 0;
843
844         if (!di->i_xattr_loc)
845                 return ret;
846
847         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
848                                      &blk_bh);
849         if (ret < 0) {
850                 mlog_errno(ret);
851                 return ret;
852         }
853
854         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
855         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
856                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
857                 ret = ocfs2_xattr_list_entries(inode, header,
858                                                buffer, buffer_size);
859         } else {
860                 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
861                 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
862                                                    buffer, buffer_size);
863         }
864
865         brelse(blk_bh);
866
867         return ret;
868 }
869
870 ssize_t ocfs2_listxattr(struct dentry *dentry,
871                         char *buffer,
872                         size_t size)
873 {
874         int ret = 0, i_ret = 0, b_ret = 0;
875         struct buffer_head *di_bh = NULL;
876         struct ocfs2_dinode *di = NULL;
877         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
878
879         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
880                 return -EOPNOTSUPP;
881
882         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
883                 return ret;
884
885         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
886         if (ret < 0) {
887                 mlog_errno(ret);
888                 return ret;
889         }
890
891         di = (struct ocfs2_dinode *)di_bh->b_data;
892
893         down_read(&oi->ip_xattr_sem);
894         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
895         if (i_ret < 0)
896                 b_ret = 0;
897         else {
898                 if (buffer) {
899                         buffer += i_ret;
900                         size -= i_ret;
901                 }
902                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
903                                                buffer, size);
904                 if (b_ret < 0)
905                         i_ret = 0;
906         }
907         up_read(&oi->ip_xattr_sem);
908         ocfs2_inode_unlock(dentry->d_inode, 0);
909
910         brelse(di_bh);
911
912         return i_ret + b_ret;
913 }
914
915 static int ocfs2_xattr_find_entry(int name_index,
916                                   const char *name,
917                                   struct ocfs2_xattr_search *xs)
918 {
919         struct ocfs2_xattr_entry *entry;
920         size_t name_len;
921         int i, cmp = 1;
922
923         if (name == NULL)
924                 return -EINVAL;
925
926         name_len = strlen(name);
927         entry = xs->here;
928         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
929                 cmp = name_index - ocfs2_xattr_get_type(entry);
930                 if (!cmp)
931                         cmp = name_len - entry->xe_name_len;
932                 if (!cmp)
933                         cmp = memcmp(name, (xs->base +
934                                      le16_to_cpu(entry->xe_name_offset)),
935                                      name_len);
936                 if (cmp == 0)
937                         break;
938                 entry += 1;
939         }
940         xs->here = entry;
941
942         return cmp ? -ENODATA : 0;
943 }
944
945 static int ocfs2_xattr_get_value_outside(struct inode *inode,
946                                          struct ocfs2_xattr_value_root *xv,
947                                          void *buffer,
948                                          size_t len)
949 {
950         u32 cpos, p_cluster, num_clusters, bpc, clusters;
951         u64 blkno;
952         int i, ret = 0;
953         size_t cplen, blocksize;
954         struct buffer_head *bh = NULL;
955         struct ocfs2_extent_list *el;
956
957         el = &xv->xr_list;
958         clusters = le32_to_cpu(xv->xr_clusters);
959         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
960         blocksize = inode->i_sb->s_blocksize;
961
962         cpos = 0;
963         while (cpos < clusters) {
964                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
965                                                &num_clusters, el);
966                 if (ret) {
967                         mlog_errno(ret);
968                         goto out;
969                 }
970
971                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
972                 /* Copy ocfs2_xattr_value */
973                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
974                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
975                                                &bh, NULL);
976                         if (ret) {
977                                 mlog_errno(ret);
978                                 goto out;
979                         }
980
981                         cplen = len >= blocksize ? blocksize : len;
982                         memcpy(buffer, bh->b_data, cplen);
983                         len -= cplen;
984                         buffer += cplen;
985
986                         brelse(bh);
987                         bh = NULL;
988                         if (len == 0)
989                                 break;
990                 }
991                 cpos += num_clusters;
992         }
993 out:
994         return ret;
995 }
996
997 static int ocfs2_xattr_ibody_get(struct inode *inode,
998                                  int name_index,
999                                  const char *name,
1000                                  void *buffer,
1001                                  size_t buffer_size,
1002                                  struct ocfs2_xattr_search *xs)
1003 {
1004         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1005         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1006         struct ocfs2_xattr_value_root *xv;
1007         size_t size;
1008         int ret = 0;
1009
1010         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1011                 return -ENODATA;
1012
1013         xs->end = (void *)di + inode->i_sb->s_blocksize;
1014         xs->header = (struct ocfs2_xattr_header *)
1015                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1016         xs->base = (void *)xs->header;
1017         xs->here = xs->header->xh_entries;
1018
1019         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1020         if (ret)
1021                 return ret;
1022         size = le64_to_cpu(xs->here->xe_value_size);
1023         if (buffer) {
1024                 if (size > buffer_size)
1025                         return -ERANGE;
1026                 if (ocfs2_xattr_is_local(xs->here)) {
1027                         memcpy(buffer, (void *)xs->base +
1028                                le16_to_cpu(xs->here->xe_name_offset) +
1029                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1030                 } else {
1031                         xv = (struct ocfs2_xattr_value_root *)
1032                                 (xs->base + le16_to_cpu(
1033                                  xs->here->xe_name_offset) +
1034                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1035                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1036                                                             buffer, size);
1037                         if (ret < 0) {
1038                                 mlog_errno(ret);
1039                                 return ret;
1040                         }
1041                 }
1042         }
1043
1044         return size;
1045 }
1046
1047 static int ocfs2_xattr_block_get(struct inode *inode,
1048                                  int name_index,
1049                                  const char *name,
1050                                  void *buffer,
1051                                  size_t buffer_size,
1052                                  struct ocfs2_xattr_search *xs)
1053 {
1054         struct ocfs2_xattr_block *xb;
1055         struct ocfs2_xattr_value_root *xv;
1056         size_t size;
1057         int ret = -ENODATA, name_offset, name_len, i;
1058         int uninitialized_var(block_off);
1059
1060         xs->bucket = ocfs2_xattr_bucket_new(inode);
1061         if (!xs->bucket) {
1062                 ret = -ENOMEM;
1063                 mlog_errno(ret);
1064                 goto cleanup;
1065         }
1066
1067         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1068         if (ret) {
1069                 mlog_errno(ret);
1070                 goto cleanup;
1071         }
1072
1073         if (xs->not_found) {
1074                 ret = -ENODATA;
1075                 goto cleanup;
1076         }
1077
1078         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1079         size = le64_to_cpu(xs->here->xe_value_size);
1080         if (buffer) {
1081                 ret = -ERANGE;
1082                 if (size > buffer_size)
1083                         goto cleanup;
1084
1085                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1086                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1087                 i = xs->here - xs->header->xh_entries;
1088
1089                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1090                         ret = ocfs2_xattr_bucket_get_name_value(inode,
1091                                                                 bucket_xh(xs->bucket),
1092                                                                 i,
1093                                                                 &block_off,
1094                                                                 &name_offset);
1095                         xs->base = bucket_block(xs->bucket, block_off);
1096                 }
1097                 if (ocfs2_xattr_is_local(xs->here)) {
1098                         memcpy(buffer, (void *)xs->base +
1099                                name_offset + name_len, size);
1100                 } else {
1101                         xv = (struct ocfs2_xattr_value_root *)
1102                                 (xs->base + name_offset + name_len);
1103                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1104                                                             buffer, size);
1105                         if (ret < 0) {
1106                                 mlog_errno(ret);
1107                                 goto cleanup;
1108                         }
1109                 }
1110         }
1111         ret = size;
1112 cleanup:
1113         ocfs2_xattr_bucket_free(xs->bucket);
1114
1115         brelse(xs->xattr_bh);
1116         xs->xattr_bh = NULL;
1117         return ret;
1118 }
1119
1120 int ocfs2_xattr_get_nolock(struct inode *inode,
1121                            struct buffer_head *di_bh,
1122                            int name_index,
1123                            const char *name,
1124                            void *buffer,
1125                            size_t buffer_size)
1126 {
1127         int ret;
1128         struct ocfs2_dinode *di = NULL;
1129         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1130         struct ocfs2_xattr_search xis = {
1131                 .not_found = -ENODATA,
1132         };
1133         struct ocfs2_xattr_search xbs = {
1134                 .not_found = -ENODATA,
1135         };
1136
1137         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1138                 return -EOPNOTSUPP;
1139
1140         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1141                 ret = -ENODATA;
1142
1143         xis.inode_bh = xbs.inode_bh = di_bh;
1144         di = (struct ocfs2_dinode *)di_bh->b_data;
1145
1146         down_read(&oi->ip_xattr_sem);
1147         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1148                                     buffer_size, &xis);
1149         if (ret == -ENODATA && di->i_xattr_loc)
1150                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1151                                             buffer_size, &xbs);
1152         up_read(&oi->ip_xattr_sem);
1153
1154         return ret;
1155 }
1156
1157 /* ocfs2_xattr_get()
1158  *
1159  * Copy an extended attribute into the buffer provided.
1160  * Buffer is NULL to compute the size of buffer required.
1161  */
1162 static int ocfs2_xattr_get(struct inode *inode,
1163                            int name_index,
1164                            const char *name,
1165                            void *buffer,
1166                            size_t buffer_size)
1167 {
1168         int ret;
1169         struct buffer_head *di_bh = NULL;
1170
1171         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1172         if (ret < 0) {
1173                 mlog_errno(ret);
1174                 return ret;
1175         }
1176         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1177                                      name, buffer, buffer_size);
1178
1179         ocfs2_inode_unlock(inode, 0);
1180
1181         brelse(di_bh);
1182
1183         return ret;
1184 }
1185
1186 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1187                                            handle_t *handle,
1188                                            struct ocfs2_xattr_value_root *xv,
1189                                            const void *value,
1190                                            int value_len)
1191 {
1192         int ret = 0, i, cp_len;
1193         u16 blocksize = inode->i_sb->s_blocksize;
1194         u32 p_cluster, num_clusters;
1195         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1196         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1197         u64 blkno;
1198         struct buffer_head *bh = NULL;
1199
1200         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1201
1202         while (cpos < clusters) {
1203                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1204                                                &num_clusters, &xv->xr_list);
1205                 if (ret) {
1206                         mlog_errno(ret);
1207                         goto out;
1208                 }
1209
1210                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1211
1212                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1213                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1214                                                &bh, NULL);
1215                         if (ret) {
1216                                 mlog_errno(ret);
1217                                 goto out;
1218                         }
1219
1220                         ret = ocfs2_journal_access(handle,
1221                                                    INODE_CACHE(inode),
1222                                                    bh,
1223                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1224                         if (ret < 0) {
1225                                 mlog_errno(ret);
1226                                 goto out;
1227                         }
1228
1229                         cp_len = value_len > blocksize ? blocksize : value_len;
1230                         memcpy(bh->b_data, value, cp_len);
1231                         value_len -= cp_len;
1232                         value += cp_len;
1233                         if (cp_len < blocksize)
1234                                 memset(bh->b_data + cp_len, 0,
1235                                        blocksize - cp_len);
1236
1237                         ret = ocfs2_journal_dirty(handle, bh);
1238                         if (ret < 0) {
1239                                 mlog_errno(ret);
1240                                 goto out;
1241                         }
1242                         brelse(bh);
1243                         bh = NULL;
1244
1245                         /*
1246                          * XXX: do we need to empty all the following
1247                          * blocks in this cluster?
1248                          */
1249                         if (!value_len)
1250                                 break;
1251                 }
1252                 cpos += num_clusters;
1253         }
1254 out:
1255         brelse(bh);
1256
1257         return ret;
1258 }
1259
1260 static int ocfs2_xattr_cleanup(struct inode *inode,
1261                                handle_t *handle,
1262                                struct ocfs2_xattr_info *xi,
1263                                struct ocfs2_xattr_search *xs,
1264                                struct ocfs2_xattr_value_buf *vb,
1265                                size_t offs)
1266 {
1267         int ret = 0;
1268         size_t name_len = strlen(xi->name);
1269         void *val = xs->base + offs;
1270         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1271
1272         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1273                             OCFS2_JOURNAL_ACCESS_WRITE);
1274         if (ret) {
1275                 mlog_errno(ret);
1276                 goto out;
1277         }
1278         /* Decrease xattr count */
1279         le16_add_cpu(&xs->header->xh_count, -1);
1280         /* Remove the xattr entry and tree root which has already be set*/
1281         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1282         memset(val, 0, size);
1283
1284         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1285         if (ret < 0)
1286                 mlog_errno(ret);
1287 out:
1288         return ret;
1289 }
1290
1291 static int ocfs2_xattr_update_entry(struct inode *inode,
1292                                     handle_t *handle,
1293                                     struct ocfs2_xattr_info *xi,
1294                                     struct ocfs2_xattr_search *xs,
1295                                     struct ocfs2_xattr_value_buf *vb,
1296                                     size_t offs)
1297 {
1298         int ret;
1299
1300         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1301                             OCFS2_JOURNAL_ACCESS_WRITE);
1302         if (ret) {
1303                 mlog_errno(ret);
1304                 goto out;
1305         }
1306
1307         xs->here->xe_name_offset = cpu_to_le16(offs);
1308         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1309         if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1310                 ocfs2_xattr_set_local(xs->here, 1);
1311         else
1312                 ocfs2_xattr_set_local(xs->here, 0);
1313         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1314
1315         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1316         if (ret < 0)
1317                 mlog_errno(ret);
1318 out:
1319         return ret;
1320 }
1321
1322 /*
1323  * ocfs2_xattr_set_value_outside()
1324  *
1325  * Set large size value in B tree.
1326  */
1327 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1328                                          struct ocfs2_xattr_info *xi,
1329                                          struct ocfs2_xattr_search *xs,
1330                                          struct ocfs2_xattr_set_ctxt *ctxt,
1331                                          struct ocfs2_xattr_value_buf *vb,
1332                                          size_t offs)
1333 {
1334         size_t name_len = strlen(xi->name);
1335         void *val = xs->base + offs;
1336         struct ocfs2_xattr_value_root *xv = NULL;
1337         size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1338         int ret = 0;
1339
1340         memset(val, 0, size);
1341         memcpy(val, xi->name, name_len);
1342         xv = (struct ocfs2_xattr_value_root *)
1343                 (val + OCFS2_XATTR_SIZE(name_len));
1344         xv->xr_clusters = 0;
1345         xv->xr_last_eb_blk = 0;
1346         xv->xr_list.l_tree_depth = 0;
1347         xv->xr_list.l_count = cpu_to_le16(1);
1348         xv->xr_list.l_next_free_rec = 0;
1349         vb->vb_xv = xv;
1350
1351         ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1352         if (ret < 0) {
1353                 mlog_errno(ret);
1354                 return ret;
1355         }
1356         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1357         if (ret < 0) {
1358                 mlog_errno(ret);
1359                 return ret;
1360         }
1361         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv,
1362                                               xi->value, xi->value_len);
1363         if (ret < 0)
1364                 mlog_errno(ret);
1365
1366         return ret;
1367 }
1368
1369 /*
1370  * ocfs2_xattr_set_entry_local()
1371  *
1372  * Set, replace or remove extended attribute in local.
1373  */
1374 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1375                                         struct ocfs2_xattr_info *xi,
1376                                         struct ocfs2_xattr_search *xs,
1377                                         struct ocfs2_xattr_entry *last,
1378                                         size_t min_offs)
1379 {
1380         size_t name_len = strlen(xi->name);
1381         int i;
1382
1383         if (xi->value && xs->not_found) {
1384                 /* Insert the new xattr entry. */
1385                 le16_add_cpu(&xs->header->xh_count, 1);
1386                 ocfs2_xattr_set_type(last, xi->name_index);
1387                 ocfs2_xattr_set_local(last, 1);
1388                 last->xe_name_len = name_len;
1389         } else {
1390                 void *first_val;
1391                 void *val;
1392                 size_t offs, size;
1393
1394                 first_val = xs->base + min_offs;
1395                 offs = le16_to_cpu(xs->here->xe_name_offset);
1396                 val = xs->base + offs;
1397
1398                 if (le64_to_cpu(xs->here->xe_value_size) >
1399                     OCFS2_XATTR_INLINE_SIZE)
1400                         size = OCFS2_XATTR_SIZE(name_len) +
1401                                 OCFS2_XATTR_ROOT_SIZE;
1402                 else
1403                         size = OCFS2_XATTR_SIZE(name_len) +
1404                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1405
1406                 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1407                                 OCFS2_XATTR_SIZE(xi->value_len)) {
1408                         /* The old and the new value have the
1409                            same size. Just replace the value. */
1410                         ocfs2_xattr_set_local(xs->here, 1);
1411                         xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1412                         /* Clear value bytes. */
1413                         memset(val + OCFS2_XATTR_SIZE(name_len),
1414                                0,
1415                                OCFS2_XATTR_SIZE(xi->value_len));
1416                         memcpy(val + OCFS2_XATTR_SIZE(name_len),
1417                                xi->value,
1418                                xi->value_len);
1419                         return;
1420                 }
1421                 /* Remove the old name+value. */
1422                 memmove(first_val + size, first_val, val - first_val);
1423                 memset(first_val, 0, size);
1424                 xs->here->xe_name_hash = 0;
1425                 xs->here->xe_name_offset = 0;
1426                 ocfs2_xattr_set_local(xs->here, 1);
1427                 xs->here->xe_value_size = 0;
1428
1429                 min_offs += size;
1430
1431                 /* Adjust all value offsets. */
1432                 last = xs->header->xh_entries;
1433                 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1434                         size_t o = le16_to_cpu(last->xe_name_offset);
1435
1436                         if (o < offs)
1437                                 last->xe_name_offset = cpu_to_le16(o + size);
1438                         last += 1;
1439                 }
1440
1441                 if (!xi->value) {
1442                         /* Remove the old entry. */
1443                         last -= 1;
1444                         memmove(xs->here, xs->here + 1,
1445                                 (void *)last - (void *)xs->here);
1446                         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1447                         le16_add_cpu(&xs->header->xh_count, -1);
1448                 }
1449         }
1450         if (xi->value) {
1451                 /* Insert the new name+value. */
1452                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1453                                 OCFS2_XATTR_SIZE(xi->value_len);
1454                 void *val = xs->base + min_offs - size;
1455
1456                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1457                 memset(val, 0, size);
1458                 memcpy(val, xi->name, name_len);
1459                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1460                        xi->value,
1461                        xi->value_len);
1462                 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1463                 ocfs2_xattr_set_local(xs->here, 1);
1464                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1465         }
1466
1467         return;
1468 }
1469
1470 /*
1471  * ocfs2_xattr_set_entry()
1472  *
1473  * Set extended attribute entry into inode or block.
1474  *
1475  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1476  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1477  * then set value in B tree with set_value_outside().
1478  */
1479 static int ocfs2_xattr_set_entry(struct inode *inode,
1480                                  struct ocfs2_xattr_info *xi,
1481                                  struct ocfs2_xattr_search *xs,
1482                                  struct ocfs2_xattr_set_ctxt *ctxt,
1483                                  int flag)
1484 {
1485         struct ocfs2_xattr_entry *last;
1486         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1487         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1488         size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1489         size_t size_l = 0;
1490         handle_t *handle = ctxt->handle;
1491         int free, i, ret;
1492         struct ocfs2_xattr_info xi_l = {
1493                 .name_index = xi->name_index,
1494                 .name = xi->name,
1495                 .value = xi->value,
1496                 .value_len = xi->value_len,
1497         };
1498         struct ocfs2_xattr_value_buf vb = {
1499                 .vb_bh = xs->xattr_bh,
1500                 .vb_access = ocfs2_journal_access_di,
1501         };
1502
1503         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1504                 BUG_ON(xs->xattr_bh == xs->inode_bh);
1505                 vb.vb_access = ocfs2_journal_access_xb;
1506         } else
1507                 BUG_ON(xs->xattr_bh != xs->inode_bh);
1508
1509         /* Compute min_offs, last and free space. */
1510         last = xs->header->xh_entries;
1511
1512         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1513                 size_t offs = le16_to_cpu(last->xe_name_offset);
1514                 if (offs < min_offs)
1515                         min_offs = offs;
1516                 last += 1;
1517         }
1518
1519         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1520         if (free < 0)
1521                 return -EIO;
1522
1523         if (!xs->not_found) {
1524                 size_t size = 0;
1525                 if (ocfs2_xattr_is_local(xs->here))
1526                         size = OCFS2_XATTR_SIZE(name_len) +
1527                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1528                 else
1529                         size = OCFS2_XATTR_SIZE(name_len) +
1530                                 OCFS2_XATTR_ROOT_SIZE;
1531                 free += (size + sizeof(struct ocfs2_xattr_entry));
1532         }
1533         /* Check free space in inode or block */
1534         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1535                 if (free < sizeof(struct ocfs2_xattr_entry) +
1536                            OCFS2_XATTR_SIZE(name_len) +
1537                            OCFS2_XATTR_ROOT_SIZE) {
1538                         ret = -ENOSPC;
1539                         goto out;
1540                 }
1541                 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1542                 xi_l.value = (void *)&def_xv;
1543                 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1544         } else if (xi->value) {
1545                 if (free < sizeof(struct ocfs2_xattr_entry) +
1546                            OCFS2_XATTR_SIZE(name_len) +
1547                            OCFS2_XATTR_SIZE(xi->value_len)) {
1548                         ret = -ENOSPC;
1549                         goto out;
1550                 }
1551         }
1552
1553         if (!xs->not_found) {
1554                 /* For existing extended attribute */
1555                 size_t size = OCFS2_XATTR_SIZE(name_len) +
1556                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1557                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1558                 void *val = xs->base + offs;
1559
1560                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1561                         /* Replace existing local xattr with tree root */
1562                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1563                                                             ctxt, &vb, offs);
1564                         if (ret < 0)
1565                                 mlog_errno(ret);
1566                         goto out;
1567                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1568                         /* For existing xattr which has value outside */
1569                         vb.vb_xv = (struct ocfs2_xattr_value_root *)
1570                                 (val + OCFS2_XATTR_SIZE(name_len));
1571
1572                         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1573                                 /*
1574                                  * If new value need set outside also,
1575                                  * first truncate old value to new value,
1576                                  * then set new value with set_value_outside().
1577                                  */
1578                                 ret = ocfs2_xattr_value_truncate(inode,
1579                                                                  &vb,
1580                                                                  xi->value_len,
1581                                                                  ctxt);
1582                                 if (ret < 0) {
1583                                         mlog_errno(ret);
1584                                         goto out;
1585                                 }
1586
1587                                 ret = ocfs2_xattr_update_entry(inode,
1588                                                                handle,
1589                                                                xi,
1590                                                                xs,
1591                                                                &vb,
1592                                                                offs);
1593                                 if (ret < 0) {
1594                                         mlog_errno(ret);
1595                                         goto out;
1596                                 }
1597
1598                                 ret = __ocfs2_xattr_set_value_outside(inode,
1599                                                                 handle,
1600                                                                 vb.vb_xv,
1601                                                                 xi->value,
1602                                                                 xi->value_len);
1603                                 if (ret < 0)
1604                                         mlog_errno(ret);
1605                                 goto out;
1606                         } else {
1607                                 /*
1608                                  * If new value need set in local,
1609                                  * just trucate old value to zero.
1610                                  */
1611                                  ret = ocfs2_xattr_value_truncate(inode,
1612                                                                   &vb,
1613                                                                   0,
1614                                                                   ctxt);
1615                                 if (ret < 0)
1616                                         mlog_errno(ret);
1617                         }
1618                 }
1619         }
1620
1621         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1622                                       OCFS2_JOURNAL_ACCESS_WRITE);
1623         if (ret) {
1624                 mlog_errno(ret);
1625                 goto out;
1626         }
1627
1628         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1629                 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1630                                    OCFS2_JOURNAL_ACCESS_WRITE);
1631                 if (ret) {
1632                         mlog_errno(ret);
1633                         goto out;
1634                 }
1635         }
1636
1637         /*
1638          * Set value in local, include set tree root in local.
1639          * This is the first step for value size >INLINE_SIZE.
1640          */
1641         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1642
1643         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1644                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1645                 if (ret < 0) {
1646                         mlog_errno(ret);
1647                         goto out;
1648                 }
1649         }
1650
1651         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1652             (flag & OCFS2_INLINE_XATTR_FL)) {
1653                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1654                 unsigned int xattrsize = osb->s_xattr_inline_size;
1655
1656                 /*
1657                  * Adjust extent record count or inline data size
1658                  * to reserve space for extended attribute.
1659                  */
1660                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1661                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1662                         le16_add_cpu(&idata->id_count, -xattrsize);
1663                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1664                         struct ocfs2_extent_list *el = &di->id2.i_list;
1665                         le16_add_cpu(&el->l_count, -(xattrsize /
1666                                         sizeof(struct ocfs2_extent_rec)));
1667                 }
1668                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1669         }
1670         /* Update xattr flag */
1671         spin_lock(&oi->ip_lock);
1672         oi->ip_dyn_features |= flag;
1673         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1674         spin_unlock(&oi->ip_lock);
1675
1676         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1677         if (ret < 0)
1678                 mlog_errno(ret);
1679
1680         if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1681                 /*
1682                  * Set value outside in B tree.
1683                  * This is the second step for value size > INLINE_SIZE.
1684                  */
1685                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1686                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1687                                                     &vb, offs);
1688                 if (ret < 0) {
1689                         int ret2;
1690
1691                         mlog_errno(ret);
1692                         /*
1693                          * If set value outside failed, we have to clean
1694                          * the junk tree root we have already set in local.
1695                          */
1696                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1697                                                    xi, xs, &vb, offs);
1698                         if (ret2 < 0)
1699                                 mlog_errno(ret2);
1700                 }
1701         }
1702 out:
1703         return ret;
1704 }
1705
1706 static int ocfs2_remove_value_outside(struct inode*inode,
1707                                       struct ocfs2_xattr_value_buf *vb,
1708                                       struct ocfs2_xattr_header *header)
1709 {
1710         int ret = 0, i;
1711         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1712         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1713
1714         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1715
1716         ctxt.handle = ocfs2_start_trans(osb,
1717                                         ocfs2_remove_extent_credits(osb->sb));
1718         if (IS_ERR(ctxt.handle)) {
1719                 ret = PTR_ERR(ctxt.handle);
1720                 mlog_errno(ret);
1721                 goto out;
1722         }
1723
1724         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1725                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1726
1727                 if (!ocfs2_xattr_is_local(entry)) {
1728                         void *val;
1729
1730                         val = (void *)header +
1731                                 le16_to_cpu(entry->xe_name_offset);
1732                         vb->vb_xv = (struct ocfs2_xattr_value_root *)
1733                                 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1734                         ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1735                         if (ret < 0) {
1736                                 mlog_errno(ret);
1737                                 break;
1738                         }
1739                 }
1740         }
1741
1742         ocfs2_commit_trans(osb, ctxt.handle);
1743         ocfs2_schedule_truncate_log_flush(osb, 1);
1744         ocfs2_run_deallocs(osb, &ctxt.dealloc);
1745 out:
1746         return ret;
1747 }
1748
1749 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1750                                     struct buffer_head *di_bh)
1751 {
1752
1753         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1754         struct ocfs2_xattr_header *header;
1755         int ret;
1756         struct ocfs2_xattr_value_buf vb = {
1757                 .vb_bh = di_bh,
1758                 .vb_access = ocfs2_journal_access_di,
1759         };
1760
1761         header = (struct ocfs2_xattr_header *)
1762                  ((void *)di + inode->i_sb->s_blocksize -
1763                  le16_to_cpu(di->i_xattr_inline_size));
1764
1765         ret = ocfs2_remove_value_outside(inode, &vb, header);
1766
1767         return ret;
1768 }
1769
1770 static int ocfs2_xattr_block_remove(struct inode *inode,
1771                                     struct buffer_head *blk_bh)
1772 {
1773         struct ocfs2_xattr_block *xb;
1774         int ret = 0;
1775         struct ocfs2_xattr_value_buf vb = {
1776                 .vb_bh = blk_bh,
1777                 .vb_access = ocfs2_journal_access_xb,
1778         };
1779
1780         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1781         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1782                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1783                 ret = ocfs2_remove_value_outside(inode, &vb, header);
1784         } else
1785                 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1786
1787         return ret;
1788 }
1789
1790 static int ocfs2_xattr_free_block(struct inode *inode,
1791                                   u64 block)
1792 {
1793         struct inode *xb_alloc_inode;
1794         struct buffer_head *xb_alloc_bh = NULL;
1795         struct buffer_head *blk_bh = NULL;
1796         struct ocfs2_xattr_block *xb;
1797         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1798         handle_t *handle;
1799         int ret = 0;
1800         u64 blk, bg_blkno;
1801         u16 bit;
1802
1803         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1804         if (ret < 0) {
1805                 mlog_errno(ret);
1806                 goto out;
1807         }
1808
1809         ret = ocfs2_xattr_block_remove(inode, blk_bh);
1810         if (ret < 0) {
1811                 mlog_errno(ret);
1812                 goto out;
1813         }
1814
1815         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1816         blk = le64_to_cpu(xb->xb_blkno);
1817         bit = le16_to_cpu(xb->xb_suballoc_bit);
1818         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1819
1820         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1821                                 EXTENT_ALLOC_SYSTEM_INODE,
1822                                 le16_to_cpu(xb->xb_suballoc_slot));
1823         if (!xb_alloc_inode) {
1824                 ret = -ENOMEM;
1825                 mlog_errno(ret);
1826                 goto out;
1827         }
1828         mutex_lock(&xb_alloc_inode->i_mutex);
1829
1830         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1831         if (ret < 0) {
1832                 mlog_errno(ret);
1833                 goto out_mutex;
1834         }
1835
1836         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1837         if (IS_ERR(handle)) {
1838                 ret = PTR_ERR(handle);
1839                 mlog_errno(ret);
1840                 goto out_unlock;
1841         }
1842
1843         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1844                                        bit, bg_blkno, 1);
1845         if (ret < 0)
1846                 mlog_errno(ret);
1847
1848         ocfs2_commit_trans(osb, handle);
1849 out_unlock:
1850         ocfs2_inode_unlock(xb_alloc_inode, 1);
1851         brelse(xb_alloc_bh);
1852 out_mutex:
1853         mutex_unlock(&xb_alloc_inode->i_mutex);
1854         iput(xb_alloc_inode);
1855 out:
1856         brelse(blk_bh);
1857         return ret;
1858 }
1859
1860 /*
1861  * ocfs2_xattr_remove()
1862  *
1863  * Free extended attribute resources associated with this inode.
1864  */
1865 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1866 {
1867         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1868         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1869         handle_t *handle;
1870         int ret;
1871
1872         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1873                 return 0;
1874
1875         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1876                 return 0;
1877
1878         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1879                 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1880                 if (ret < 0) {
1881                         mlog_errno(ret);
1882                         goto out;
1883                 }
1884         }
1885
1886         if (di->i_xattr_loc) {
1887                 ret = ocfs2_xattr_free_block(inode,
1888                                              le64_to_cpu(di->i_xattr_loc));
1889                 if (ret < 0) {
1890                         mlog_errno(ret);
1891                         goto out;
1892                 }
1893         }
1894
1895         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1896                                    OCFS2_INODE_UPDATE_CREDITS);
1897         if (IS_ERR(handle)) {
1898                 ret = PTR_ERR(handle);
1899                 mlog_errno(ret);
1900                 goto out;
1901         }
1902         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1903                                       OCFS2_JOURNAL_ACCESS_WRITE);
1904         if (ret) {
1905                 mlog_errno(ret);
1906                 goto out_commit;
1907         }
1908
1909         di->i_xattr_loc = 0;
1910
1911         spin_lock(&oi->ip_lock);
1912         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1913         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1914         spin_unlock(&oi->ip_lock);
1915
1916         ret = ocfs2_journal_dirty(handle, di_bh);
1917         if (ret < 0)
1918                 mlog_errno(ret);
1919 out_commit:
1920         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1921 out:
1922         return ret;
1923 }
1924
1925 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1926                                         struct ocfs2_dinode *di)
1927 {
1928         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1929         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1930         int free;
1931
1932         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1933                 return 0;
1934
1935         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1936                 struct ocfs2_inline_data *idata = &di->id2.i_data;
1937                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1938         } else if (ocfs2_inode_is_fast_symlink(inode)) {
1939                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1940                         le64_to_cpu(di->i_size);
1941         } else {
1942                 struct ocfs2_extent_list *el = &di->id2.i_list;
1943                 free = (le16_to_cpu(el->l_count) -
1944                         le16_to_cpu(el->l_next_free_rec)) *
1945                         sizeof(struct ocfs2_extent_rec);
1946         }
1947         if (free >= xattrsize)
1948                 return 1;
1949
1950         return 0;
1951 }
1952
1953 /*
1954  * ocfs2_xattr_ibody_find()
1955  *
1956  * Find extended attribute in inode block and
1957  * fill search info into struct ocfs2_xattr_search.
1958  */
1959 static int ocfs2_xattr_ibody_find(struct inode *inode,
1960                                   int name_index,
1961                                   const char *name,
1962                                   struct ocfs2_xattr_search *xs)
1963 {
1964         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1965         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1966         int ret;
1967         int has_space = 0;
1968
1969         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1970                 return 0;
1971
1972         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1973                 down_read(&oi->ip_alloc_sem);
1974                 has_space = ocfs2_xattr_has_space_inline(inode, di);
1975                 up_read(&oi->ip_alloc_sem);
1976                 if (!has_space)
1977                         return 0;
1978         }
1979
1980         xs->xattr_bh = xs->inode_bh;
1981         xs->end = (void *)di + inode->i_sb->s_blocksize;
1982         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1983                 xs->header = (struct ocfs2_xattr_header *)
1984                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1985         else
1986                 xs->header = (struct ocfs2_xattr_header *)
1987                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1988         xs->base = (void *)xs->header;
1989         xs->here = xs->header->xh_entries;
1990
1991         /* Find the named attribute. */
1992         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1993                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1994                 if (ret && ret != -ENODATA)
1995                         return ret;
1996                 xs->not_found = ret;
1997         }
1998
1999         return 0;
2000 }
2001
2002 /*
2003  * ocfs2_xattr_ibody_set()
2004  *
2005  * Set, replace or remove an extended attribute into inode block.
2006  *
2007  */
2008 static int ocfs2_xattr_ibody_set(struct inode *inode,
2009                                  struct ocfs2_xattr_info *xi,
2010                                  struct ocfs2_xattr_search *xs,
2011                                  struct ocfs2_xattr_set_ctxt *ctxt)
2012 {
2013         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2014         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2015         int ret;
2016
2017         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2018                 return -ENOSPC;
2019
2020         down_write(&oi->ip_alloc_sem);
2021         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2022                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2023                         ret = -ENOSPC;
2024                         goto out;
2025                 }
2026         }
2027
2028         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2029                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2030 out:
2031         up_write(&oi->ip_alloc_sem);
2032
2033         return ret;
2034 }
2035
2036 /*
2037  * ocfs2_xattr_block_find()
2038  *
2039  * Find extended attribute in external block and
2040  * fill search info into struct ocfs2_xattr_search.
2041  */
2042 static int ocfs2_xattr_block_find(struct inode *inode,
2043                                   int name_index,
2044                                   const char *name,
2045                                   struct ocfs2_xattr_search *xs)
2046 {
2047         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2048         struct buffer_head *blk_bh = NULL;
2049         struct ocfs2_xattr_block *xb;
2050         int ret = 0;
2051
2052         if (!di->i_xattr_loc)
2053                 return ret;
2054
2055         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2056                                      &blk_bh);
2057         if (ret < 0) {
2058                 mlog_errno(ret);
2059                 return ret;
2060         }
2061
2062         xs->xattr_bh = blk_bh;
2063         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2064
2065         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2066                 xs->header = &xb->xb_attrs.xb_header;
2067                 xs->base = (void *)xs->header;
2068                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2069                 xs->here = xs->header->xh_entries;
2070
2071                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2072         } else
2073                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2074                                                    name_index,
2075                                                    name, xs);
2076
2077         if (ret && ret != -ENODATA) {
2078                 xs->xattr_bh = NULL;
2079                 goto cleanup;
2080         }
2081         xs->not_found = ret;
2082         return 0;
2083 cleanup:
2084         brelse(blk_bh);
2085
2086         return ret;
2087 }
2088
2089 /*
2090  * ocfs2_xattr_block_set()
2091  *
2092  * Set, replace or remove an extended attribute into external block.
2093  *
2094  */
2095 static int ocfs2_xattr_block_set(struct inode *inode,
2096                                  struct ocfs2_xattr_info *xi,
2097                                  struct ocfs2_xattr_search *xs,
2098                                  struct ocfs2_xattr_set_ctxt *ctxt)
2099 {
2100         struct buffer_head *new_bh = NULL;
2101         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2102         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
2103         handle_t *handle = ctxt->handle;
2104         struct ocfs2_xattr_block *xblk = NULL;
2105         u16 suballoc_bit_start;
2106         u32 num_got;
2107         u64 first_blkno;
2108         int ret;
2109
2110         if (!xs->xattr_bh) {
2111                 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
2112                                               xs->inode_bh,
2113                                               OCFS2_JOURNAL_ACCESS_CREATE);
2114                 if (ret < 0) {
2115                         mlog_errno(ret);
2116                         goto end;
2117                 }
2118
2119                 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
2120                                            &suballoc_bit_start, &num_got,
2121                                            &first_blkno);
2122                 if (ret < 0) {
2123                         mlog_errno(ret);
2124                         goto end;
2125                 }
2126
2127                 new_bh = sb_getblk(inode->i_sb, first_blkno);
2128                 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2129
2130                 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2131                                               new_bh,
2132                                               OCFS2_JOURNAL_ACCESS_CREATE);
2133                 if (ret < 0) {
2134                         mlog_errno(ret);
2135                         goto end;
2136                 }
2137
2138                 /* Initialize ocfs2_xattr_block */
2139                 xs->xattr_bh = new_bh;
2140                 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2141                 memset(xblk, 0, inode->i_sb->s_blocksize);
2142                 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2143                 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2144                 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2145                 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2146                 xblk->xb_blkno = cpu_to_le64(first_blkno);
2147
2148                 xs->header = &xblk->xb_attrs.xb_header;
2149                 xs->base = (void *)xs->header;
2150                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2151                 xs->here = xs->header->xh_entries;
2152
2153                 ret = ocfs2_journal_dirty(handle, new_bh);
2154                 if (ret < 0) {
2155                         mlog_errno(ret);
2156                         goto end;
2157                 }
2158                 di->i_xattr_loc = cpu_to_le64(first_blkno);
2159                 ocfs2_journal_dirty(handle, xs->inode_bh);
2160         } else
2161                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2162
2163         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2164                 /* Set extended attribute into external block */
2165                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2166                                             OCFS2_HAS_XATTR_FL);
2167                 if (!ret || ret != -ENOSPC)
2168                         goto end;
2169
2170                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2171                 if (ret)
2172                         goto end;
2173         }
2174
2175         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2176
2177 end:
2178
2179         return ret;
2180 }
2181
2182 /* Check whether the new xattr can be inserted into the inode. */
2183 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2184                                        struct ocfs2_xattr_info *xi,
2185                                        struct ocfs2_xattr_search *xs)
2186 {
2187         u64 value_size;
2188         struct ocfs2_xattr_entry *last;
2189         int free, i;
2190         size_t min_offs = xs->end - xs->base;
2191
2192         if (!xs->header)
2193                 return 0;
2194
2195         last = xs->header->xh_entries;
2196
2197         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2198                 size_t offs = le16_to_cpu(last->xe_name_offset);
2199                 if (offs < min_offs)
2200                         min_offs = offs;
2201                 last += 1;
2202         }
2203
2204         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2205         if (free < 0)
2206                 return 0;
2207
2208         BUG_ON(!xs->not_found);
2209
2210         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2211                 value_size = OCFS2_XATTR_ROOT_SIZE;
2212         else
2213                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2214
2215         if (free >= sizeof(struct ocfs2_xattr_entry) +
2216                    OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2217                 return 1;
2218
2219         return 0;
2220 }
2221
2222 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2223                                      struct ocfs2_dinode *di,
2224                                      struct ocfs2_xattr_info *xi,
2225                                      struct ocfs2_xattr_search *xis,
2226                                      struct ocfs2_xattr_search *xbs,
2227                                      int *clusters_need,
2228                                      int *meta_need,
2229                                      int *credits_need)
2230 {
2231         int ret = 0, old_in_xb = 0;
2232         int clusters_add = 0, meta_add = 0, credits = 0;
2233         struct buffer_head *bh = NULL;
2234         struct ocfs2_xattr_block *xb = NULL;
2235         struct ocfs2_xattr_entry *xe = NULL;
2236         struct ocfs2_xattr_value_root *xv = NULL;
2237         char *base = NULL;
2238         int name_offset, name_len = 0;
2239         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2240                                                     xi->value_len);
2241         u64 value_size;
2242
2243         /*
2244          * Calculate the clusters we need to write.
2245          * No matter whether we replace an old one or add a new one,
2246          * we need this for writing.
2247          */
2248         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2249                 credits += new_clusters *
2250                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2251
2252         if (xis->not_found && xbs->not_found) {
2253                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2254
2255                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2256                         clusters_add += new_clusters;
2257                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2258                                                         &def_xv.xv.xr_list,
2259                                                         new_clusters);
2260                 }
2261
2262                 goto meta_guess;
2263         }
2264
2265         if (!xis->not_found) {
2266                 xe = xis->here;
2267                 name_offset = le16_to_cpu(xe->xe_name_offset);
2268                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2269                 base = xis->base;
2270                 credits += OCFS2_INODE_UPDATE_CREDITS;
2271         } else {
2272                 int i, block_off = 0;
2273                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2274                 xe = xbs->here;
2275                 name_offset = le16_to_cpu(xe->xe_name_offset);
2276                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2277                 i = xbs->here - xbs->header->xh_entries;
2278                 old_in_xb = 1;
2279
2280                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2281                         ret = ocfs2_xattr_bucket_get_name_value(inode,
2282                                                         bucket_xh(xbs->bucket),
2283                                                         i, &block_off,
2284                                                         &name_offset);
2285                         base = bucket_block(xbs->bucket, block_off);
2286                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2287                 } else {
2288                         base = xbs->base;
2289                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2290                 }
2291         }
2292
2293         /*
2294          * delete a xattr doesn't need metadata and cluster allocation.
2295          * so just calculate the credits and return.
2296          *
2297          * The credits for removing the value tree will be extended
2298          * by ocfs2_remove_extent itself.
2299          */
2300         if (!xi->value) {
2301                 if (!ocfs2_xattr_is_local(xe))
2302                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2303
2304                 goto out;
2305         }
2306
2307         /* do cluster allocation guess first. */
2308         value_size = le64_to_cpu(xe->xe_value_size);
2309
2310         if (old_in_xb) {
2311                 /*
2312                  * In xattr set, we always try to set the xe in inode first,
2313                  * so if it can be inserted into inode successfully, the old
2314                  * one will be removed from the xattr block, and this xattr
2315                  * will be inserted into inode as a new xattr in inode.
2316                  */
2317                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2318                         clusters_add += new_clusters;
2319                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
2320                                     OCFS2_INODE_UPDATE_CREDITS;
2321                         if (!ocfs2_xattr_is_local(xe))
2322                                 credits += ocfs2_calc_extend_credits(
2323                                                         inode->i_sb,
2324                                                         &def_xv.xv.xr_list,
2325                                                         new_clusters);
2326                         goto out;
2327                 }
2328         }
2329
2330         if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2331                 /* the new values will be stored outside. */
2332                 u32 old_clusters = 0;
2333
2334                 if (!ocfs2_xattr_is_local(xe)) {
2335                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2336                                                                  value_size);
2337                         xv = (struct ocfs2_xattr_value_root *)
2338                              (base + name_offset + name_len);
2339                         value_size = OCFS2_XATTR_ROOT_SIZE;
2340                 } else
2341                         xv = &def_xv.xv;
2342
2343                 if (old_clusters >= new_clusters) {
2344                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2345                         goto out;
2346                 } else {
2347                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2348                         clusters_add += new_clusters - old_clusters;
2349                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2350                                                              &xv->xr_list,
2351                                                              new_clusters -
2352                                                              old_clusters);
2353                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2354                                 goto out;
2355                 }
2356         } else {
2357                 /*
2358                  * Now the new value will be stored inside. So if the new
2359                  * value is smaller than the size of value root or the old
2360                  * value, we don't need any allocation, otherwise we have
2361                  * to guess metadata allocation.
2362                  */
2363                 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2364                     (!ocfs2_xattr_is_local(xe) &&
2365                      OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2366                         goto out;
2367         }
2368
2369 meta_guess:
2370         /* calculate metadata allocation. */
2371         if (di->i_xattr_loc) {
2372                 if (!xbs->xattr_bh) {
2373                         ret = ocfs2_read_xattr_block(inode,
2374                                                      le64_to_cpu(di->i_xattr_loc),
2375                                                      &bh);
2376                         if (ret) {
2377                                 mlog_errno(ret);
2378                                 goto out;
2379                         }
2380
2381                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2382                 } else
2383                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2384
2385                 /*
2386                  * If there is already an xattr tree, good, we can calculate
2387                  * like other b-trees. Otherwise we may have the chance of
2388                  * create a tree, the credit calculation is borrowed from
2389                  * ocfs2_calc_extend_credits with root_el = NULL. And the
2390                  * new tree will be cluster based, so no meta is needed.
2391                  */
2392                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2393                         struct ocfs2_extent_list *el =
2394                                  &xb->xb_attrs.xb_root.xt_list;
2395                         meta_add += ocfs2_extend_meta_needed(el);
2396                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2397                                                              el, 1);
2398                 } else
2399                         credits += OCFS2_SUBALLOC_ALLOC + 1;
2400
2401                 /*
2402                  * This cluster will be used either for new bucket or for
2403                  * new xattr block.
2404                  * If the cluster size is the same as the bucket size, one
2405                  * more is needed since we may need to extend the bucket
2406                  * also.
2407                  */
2408                 clusters_add += 1;
2409                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2410                 if (OCFS2_XATTR_BUCKET_SIZE ==
2411                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2412                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2413                         clusters_add += 1;
2414                 }
2415         } else {
2416                 meta_add += 1;
2417                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2418         }
2419 out:
2420         if (clusters_need)
2421                 *clusters_need = clusters_add;
2422         if (meta_need)
2423                 *meta_need = meta_add;
2424         if (credits_need)
2425                 *credits_need = credits;
2426         brelse(bh);
2427         return ret;
2428 }
2429
2430 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2431                                      struct ocfs2_dinode *di,
2432                                      struct ocfs2_xattr_info *xi,
2433                                      struct ocfs2_xattr_search *xis,
2434                                      struct ocfs2_xattr_search *xbs,
2435                                      struct ocfs2_xattr_set_ctxt *ctxt,
2436                                      int *credits)
2437 {
2438         int clusters_add, meta_add, ret;
2439         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2440
2441         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2442
2443         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2444
2445         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2446                                         &clusters_add, &meta_add, credits);
2447         if (ret) {
2448                 mlog_errno(ret);
2449                 return ret;
2450         }
2451
2452         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2453              "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2454
2455         if (meta_add) {
2456                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2457                                                         &ctxt->meta_ac);
2458                 if (ret) {
2459                         mlog_errno(ret);
2460                         goto out;
2461                 }
2462         }
2463
2464         if (clusters_add) {
2465                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2466                 if (ret)
2467                         mlog_errno(ret);
2468         }
2469 out:
2470         if (ret) {
2471                 if (ctxt->meta_ac) {
2472                         ocfs2_free_alloc_context(ctxt->meta_ac);
2473                         ctxt->meta_ac = NULL;
2474                 }
2475
2476                 /*
2477                  * We cannot have an error and a non null ctxt->data_ac.
2478                  */
2479         }
2480
2481         return ret;
2482 }
2483
2484 static int __ocfs2_xattr_set_handle(struct inode *inode,
2485                                     struct ocfs2_dinode *di,
2486                                     struct ocfs2_xattr_info *xi,
2487                                     struct ocfs2_xattr_search *xis,
2488                                     struct ocfs2_xattr_search *xbs,
2489                                     struct ocfs2_xattr_set_ctxt *ctxt)
2490 {
2491         int ret = 0, credits, old_found;
2492
2493         if (!xi->value) {
2494                 /* Remove existing extended attribute */
2495                 if (!xis->not_found)
2496                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2497                 else if (!xbs->not_found)
2498                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2499         } else {
2500                 /* We always try to set extended attribute into inode first*/
2501                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2502                 if (!ret && !xbs->not_found) {
2503                         /*
2504                          * If succeed and that extended attribute existing in
2505                          * external block, then we will remove it.
2506                          */
2507                         xi->value = NULL;
2508                         xi->value_len = 0;
2509
2510                         old_found = xis->not_found;
2511                         xis->not_found = -ENODATA;
2512                         ret = ocfs2_calc_xattr_set_need(inode,
2513                                                         di,
2514                                                         xi,
2515                                                         xis,
2516                                                         xbs,
2517                                                         NULL,
2518                                                         NULL,
2519                                                         &credits);
2520                         xis->not_found = old_found;
2521                         if (ret) {
2522                                 mlog_errno(ret);
2523                                 goto out;
2524                         }
2525
2526                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2527                                         ctxt->handle->h_buffer_credits);
2528                         if (ret) {
2529                                 mlog_errno(ret);
2530                                 goto out;
2531                         }
2532                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2533                 } else if (ret == -ENOSPC) {
2534                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2535                                 ret = ocfs2_xattr_block_find(inode,
2536                                                              xi->name_index,
2537                                                              xi->name, xbs);
2538                                 if (ret)
2539                                         goto out;
2540
2541                                 old_found = xis->not_found;
2542                                 xis->not_found = -ENODATA;
2543                                 ret = ocfs2_calc_xattr_set_need(inode,
2544                                                                 di,
2545                                                                 xi,
2546                                                                 xis,
2547                                                                 xbs,
2548                                                                 NULL,
2549                                                                 NULL,
2550                                                                 &credits);
2551                                 xis->not_found = old_found;
2552                                 if (ret) {
2553                                         mlog_errno(ret);
2554                                         goto out;
2555                                 }
2556
2557                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2558                                         ctxt->handle->h_buffer_credits);
2559                                 if (ret) {
2560                                         mlog_errno(ret);
2561                                         goto out;
2562                                 }
2563                         }
2564                         /*
2565                          * If no space in inode, we will set extended attribute
2566                          * into external block.
2567                          */
2568                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2569                         if (ret)
2570                                 goto out;
2571                         if (!xis->not_found) {
2572                                 /*
2573                                  * If succeed and that extended attribute
2574                                  * existing in inode, we will remove it.
2575                                  */
2576                                 xi->value = NULL;
2577                                 xi->value_len = 0;
2578                                 xbs->not_found = -ENODATA;
2579                                 ret = ocfs2_calc_xattr_set_need(inode,
2580                                                                 di,
2581                                                                 xi,
2582                                                                 xis,
2583                                                                 xbs,
2584                                                                 NULL,
2585                                                                 NULL,
2586                                                                 &credits);
2587                                 if (ret) {
2588                                         mlog_errno(ret);
2589                                         goto out;
2590                                 }
2591
2592                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2593                                                 ctxt->handle->h_buffer_credits);
2594                                 if (ret) {
2595                                         mlog_errno(ret);
2596                                         goto out;
2597                                 }
2598                                 ret = ocfs2_xattr_ibody_set(inode, xi,
2599                                                             xis, ctxt);
2600                         }
2601                 }
2602         }
2603
2604         if (!ret) {
2605                 /* Update inode ctime. */
2606                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2607                                               xis->inode_bh,
2608                                               OCFS2_JOURNAL_ACCESS_WRITE);
2609                 if (ret) {
2610                         mlog_errno(ret);
2611                         goto out;
2612                 }
2613
2614                 inode->i_ctime = CURRENT_TIME;
2615                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2616                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2617                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2618         }
2619 out:
2620         return ret;
2621 }
2622
2623 /*
2624  * This function only called duing creating inode
2625  * for init security/acl xattrs of the new inode.
2626  * All transanction credits have been reserved in mknod.
2627  */
2628 int ocfs2_xattr_set_handle(handle_t *handle,
2629                            struct inode *inode,
2630                            struct buffer_head *di_bh,
2631                            int name_index,
2632                            const char *name,
2633                            const void *value,
2634                            size_t value_len,
2635                            int flags,
2636                            struct ocfs2_alloc_context *meta_ac,
2637                            struct ocfs2_alloc_context *data_ac)
2638 {
2639         struct ocfs2_dinode *di;
2640         int ret;
2641
2642         struct ocfs2_xattr_info xi = {
2643                 .name_index = name_index,
2644                 .name = name,
2645                 .value = value,
2646                 .value_len = value_len,
2647         };
2648
2649         struct ocfs2_xattr_search xis = {
2650                 .not_found = -ENODATA,
2651         };
2652
2653         struct ocfs2_xattr_search xbs = {
2654                 .not_found = -ENODATA,
2655         };
2656
2657         struct ocfs2_xattr_set_ctxt ctxt = {
2658                 .handle = handle,
2659                 .meta_ac = meta_ac,
2660                 .data_ac = data_ac,
2661         };
2662
2663         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2664                 return -EOPNOTSUPP;
2665
2666         /*
2667          * In extreme situation, may need xattr bucket when
2668          * block size is too small. And we have already reserved
2669          * the credits for bucket in mknod.
2670          */
2671         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2672                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2673                 if (!xbs.bucket) {
2674                         mlog_errno(-ENOMEM);
2675                         return -ENOMEM;
2676                 }
2677         }
2678
2679         xis.inode_bh = xbs.inode_bh = di_bh;
2680         di = (struct ocfs2_dinode *)di_bh->b_data;
2681
2682         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2683
2684         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2685         if (ret)
2686                 goto cleanup;
2687         if (xis.not_found) {
2688                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2689                 if (ret)
2690                         goto cleanup;
2691         }
2692
2693         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2694
2695 cleanup:
2696         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2697         brelse(xbs.xattr_bh);
2698         ocfs2_xattr_bucket_free(xbs.bucket);
2699
2700         return ret;
2701 }
2702
2703 /*
2704  * ocfs2_xattr_set()
2705  *
2706  * Set, replace or remove an extended attribute for this inode.
2707  * value is NULL to remove an existing extended attribute, else either
2708  * create or replace an extended attribute.
2709  */
2710 int ocfs2_xattr_set(struct inode *inode,
2711                     int name_index,
2712                     const char *name,
2713                     const void *value,
2714                     size_t value_len,
2715                     int flags)
2716 {
2717         struct buffer_head *di_bh = NULL;
2718         struct ocfs2_dinode *di;
2719         int ret, credits;
2720         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2721         struct inode *tl_inode = osb->osb_tl_inode;
2722         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2723
2724         struct ocfs2_xattr_info xi = {
2725                 .name_index = name_index,
2726                 .name = name,
2727                 .value = value,
2728                 .value_len = value_len,
2729         };
2730
2731         struct ocfs2_xattr_search xis = {
2732                 .not_found = -ENODATA,
2733         };
2734
2735         struct ocfs2_xattr_search xbs = {
2736                 .not_found = -ENODATA,
2737         };
2738
2739         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2740                 return -EOPNOTSUPP;
2741
2742         /*
2743          * Only xbs will be used on indexed trees.  xis doesn't need a
2744          * bucket.
2745          */
2746         xbs.bucket = ocfs2_xattr_bucket_new(inode);
2747         if (!xbs.bucket) {
2748                 mlog_errno(-ENOMEM);
2749                 return -ENOMEM;
2750         }
2751
2752         ret = ocfs2_inode_lock(inode, &di_bh, 1);
2753         if (ret < 0) {
2754                 mlog_errno(ret);
2755                 goto cleanup_nolock;
2756         }
2757         xis.inode_bh = xbs.inode_bh = di_bh;
2758         di = (struct ocfs2_dinode *)di_bh->b_data;
2759
2760         down_write(&OCFS2_I(inode)->ip_xattr_sem);
2761         /*
2762          * Scan inode and external block to find the same name
2763          * extended attribute and collect search infomation.
2764          */
2765         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2766         if (ret)
2767                 goto cleanup;
2768         if (xis.not_found) {
2769                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2770                 if (ret)
2771                         goto cleanup;
2772         }
2773
2774         if (xis.not_found && xbs.not_found) {
2775                 ret = -ENODATA;
2776                 if (flags & XATTR_REPLACE)
2777                         goto cleanup;
2778                 ret = 0;
2779                 if (!value)
2780                         goto cleanup;
2781         } else {
2782                 ret = -EEXIST;
2783                 if (flags & XATTR_CREATE)
2784                         goto cleanup;
2785         }
2786
2787
2788         mutex_lock(&tl_inode->i_mutex);
2789
2790         if (ocfs2_truncate_log_needs_flush(osb)) {
2791                 ret = __ocfs2_flush_truncate_log(osb);
2792                 if (ret < 0) {
2793                         mutex_unlock(&tl_inode->i_mutex);
2794                         mlog_errno(ret);
2795                         goto cleanup;
2796                 }
2797         }
2798         mutex_unlock(&tl_inode->i_mutex);
2799
2800         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2801                                         &xbs, &ctxt, &credits);
2802         if (ret) {
2803                 mlog_errno(ret);
2804                 goto cleanup;
2805         }
2806
2807         /* we need to update inode's ctime field, so add credit for it. */
2808         credits += OCFS2_INODE_UPDATE_CREDITS;
2809         ctxt.handle = ocfs2_start_trans(osb, credits);
2810         if (IS_ERR(ctxt.handle)) {
2811                 ret = PTR_ERR(ctxt.handle);
2812                 mlog_errno(ret);
2813                 goto cleanup;
2814         }
2815
2816         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2817
2818         ocfs2_commit_trans(osb, ctxt.handle);
2819
2820         if (ctxt.data_ac)
2821                 ocfs2_free_alloc_context(ctxt.data_ac);
2822         if (ctxt.meta_ac)
2823                 ocfs2_free_alloc_context(ctxt.meta_ac);
2824         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2825                 ocfs2_schedule_truncate_log_flush(osb, 1);
2826         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2827 cleanup:
2828         up_write(&OCFS2_I(inode)->ip_xattr_sem);
2829         ocfs2_inode_unlock(inode, 1);
2830 cleanup_nolock:
2831         brelse(di_bh);
2832         brelse(xbs.xattr_bh);
2833         ocfs2_xattr_bucket_free(xbs.bucket);
2834
2835         return ret;
2836 }
2837
2838 /*
2839  * Find the xattr extent rec which may contains name_hash.
2840  * e_cpos will be the first name hash of the xattr rec.
2841  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2842  */
2843 static int ocfs2_xattr_get_rec(struct inode *inode,
2844                                u32 name_hash,
2845                                u64 *p_blkno,
2846                                u32 *e_cpos,
2847                                u32 *num_clusters,
2848                                struct ocfs2_extent_list *el)
2849 {
2850         int ret = 0, i;
2851         struct buffer_head *eb_bh = NULL;
2852         struct ocfs2_extent_block *eb;
2853         struct ocfs2_extent_rec *rec = NULL;
2854         u64 e_blkno = 0;
2855
2856         if (el->l_tree_depth) {
2857                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
2858                                       &eb_bh);
2859                 if (ret) {
2860                         mlog_errno(ret);
2861                         goto out;
2862                 }
2863
2864                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2865                 el = &eb->h_list;
2866
2867                 if (el->l_tree_depth) {
2868                         ocfs2_error(inode->i_sb,
2869                                     "Inode %lu has non zero tree depth in "
2870                                     "xattr tree block %llu\n", inode->i_ino,
2871                                     (unsigned long long)eb_bh->b_blocknr);
2872                         ret = -EROFS;
2873                         goto out;
2874                 }
2875         }
2876
2877         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2878                 rec = &el->l_recs[i];
2879
2880                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2881                         e_blkno = le64_to_cpu(rec->e_blkno);
2882                         break;
2883                 }
2884         }
2885
2886         if (!e_blkno) {
2887                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2888                             "record (%u, %u, 0) in xattr", inode->i_ino,
2889                             le32_to_cpu(rec->e_cpos),
2890                             ocfs2_rec_clusters(el, rec));
2891                 ret = -EROFS;
2892                 goto out;
2893         }
2894
2895         *p_blkno = le64_to_cpu(rec->e_blkno);
2896         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2897         if (e_cpos)
2898                 *e_cpos = le32_to_cpu(rec->e_cpos);
2899 out:
2900         brelse(eb_bh);
2901         return ret;
2902 }
2903
2904 typedef int (xattr_bucket_func)(struct inode *inode,
2905                                 struct ocfs2_xattr_bucket *bucket,
2906                                 void *para);
2907
2908 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2909                                    struct ocfs2_xattr_bucket *bucket,
2910                                    int name_index,
2911                                    const char *name,
2912                                    u32 name_hash,
2913                                    u16 *xe_index,
2914                                    int *found)
2915 {
2916         int i, ret = 0, cmp = 1, block_off, new_offset;
2917         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2918         size_t name_len = strlen(name);
2919         struct ocfs2_xattr_entry *xe = NULL;
2920         char *xe_name;
2921
2922         /*
2923          * We don't use binary search in the bucket because there
2924          * may be multiple entries with the same name hash.
2925          */
2926         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2927                 xe = &xh->xh_entries[i];
2928
2929                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2930                         continue;
2931                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2932                         break;
2933
2934                 cmp = name_index - ocfs2_xattr_get_type(xe);
2935                 if (!cmp)
2936                         cmp = name_len - xe->xe_name_len;
2937                 if (cmp)
2938                         continue;
2939
2940                 ret = ocfs2_xattr_bucket_get_name_value(inode,
2941                                                         xh,
2942                                                         i,
2943                                                         &block_off,
2944                                                         &new_offset);
2945                 if (ret) {
2946                         mlog_errno(ret);
2947                         break;
2948                 }
2949
2950
2951                 xe_name = bucket_block(bucket, block_off) + new_offset;
2952                 if (!memcmp(name, xe_name, name_len)) {
2953                         *xe_index = i;
2954                         *found = 1;
2955                         ret = 0;
2956                         break;
2957                 }
2958         }
2959
2960         return ret;
2961 }
2962
2963 /*
2964  * Find the specified xattr entry in a series of buckets.
2965  * This series start from p_blkno and last for num_clusters.
2966  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2967  * the num of the valid buckets.
2968  *
2969  * Return the buffer_head this xattr should reside in. And if the xattr's
2970  * hash is in the gap of 2 buckets, return the lower bucket.
2971  */
2972 static int ocfs2_xattr_bucket_find(struct inode *inode,
2973                                    int name_index,
2974                                    const char *name,
2975                                    u32 name_hash,
2976                                    u64 p_blkno,
2977                                    u32 first_hash,
2978                                    u32 num_clusters,
2979                                    struct ocfs2_xattr_search *xs)
2980 {
2981         int ret, found = 0;
2982         struct ocfs2_xattr_header *xh = NULL;
2983         struct ocfs2_xattr_entry *xe = NULL;
2984         u16 index = 0;
2985         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2986         int low_bucket = 0, bucket, high_bucket;
2987         struct ocfs2_xattr_bucket *search;
2988         u32 last_hash;
2989         u64 blkno, lower_blkno = 0;
2990
2991         search = ocfs2_xattr_bucket_new(inode);
2992         if (!search) {
2993                 ret = -ENOMEM;
2994                 mlog_errno(ret);
2995                 goto out;
2996         }
2997
2998         ret = ocfs2_read_xattr_bucket(search, p_blkno);
2999         if (ret) {
3000                 mlog_errno(ret);
3001                 goto out;
3002         }
3003
3004         xh = bucket_xh(search);
3005         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3006         while (low_bucket <= high_bucket) {
3007                 ocfs2_xattr_bucket_relse(search);
3008
3009                 bucket = (low_bucket + high_bucket) / 2;
3010                 blkno = p_blkno + bucket * blk_per_bucket;
3011                 ret = ocfs2_read_xattr_bucket(search, blkno);
3012                 if (ret) {
3013                         mlog_errno(ret);
3014                         goto out;
3015                 }
3016
3017                 xh = bucket_xh(search);
3018                 xe = &xh->xh_entries[0];
3019                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3020                         high_bucket = bucket - 1;
3021                         continue;
3022                 }
3023
3024                 /*
3025                  * Check whether the hash of the last entry in our
3026                  * bucket is larger than the search one. for an empty
3027                  * bucket, the last one is also the first one.
3028                  */
3029                 if (xh->xh_count)
3030                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3031
3032                 last_hash = le32_to_cpu(xe->xe_name_hash);
3033
3034                 /* record lower_blkno which may be the insert place. */
3035                 lower_blkno = blkno;
3036
3037                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3038                         low_bucket = bucket + 1;
3039                         continue;
3040                 }
3041
3042                 /* the searched xattr should reside in this bucket if exists. */
3043                 ret = ocfs2_find_xe_in_bucket(inode, search,
3044                                               name_index, name, name_hash,
3045                                               &index, &found);
3046                 if (ret) {
3047                         mlog_errno(ret);
3048                         goto out;
3049                 }
3050                 break;
3051         }
3052
3053         /*
3054          * Record the bucket we have found.
3055          * When the xattr's hash value is in the gap of 2 buckets, we will
3056          * always set it to the previous bucket.
3057          */
3058         if (!lower_blkno)
3059                 lower_blkno = p_blkno;
3060
3061         /* This should be in cache - we just read it during the search */
3062         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3063         if (ret) {
3064                 mlog_errno(ret);
3065                 goto out;
3066         }
3067
3068         xs->header = bucket_xh(xs->bucket);
3069         xs->base = bucket_block(xs->bucket, 0);
3070         xs->end = xs->base + inode->i_sb->s_blocksize;
3071
3072         if (found) {
3073                 xs->here = &xs->header->xh_entries[index];
3074                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3075                      (unsigned long long)bucket_blkno(xs->bucket), index);
3076         } else
3077                 ret = -ENODATA;
3078
3079 out:
3080         ocfs2_xattr_bucket_free(search);
3081         return ret;
3082 }
3083
3084 static int ocfs2_xattr_index_block_find(struct inode *inode,
3085                                         struct buffer_head *root_bh,
3086                                         int name_index,
3087                                         const char *name,
3088                                         struct ocfs2_xattr_search *xs)
3089 {
3090         int ret;
3091         struct ocfs2_xattr_block *xb =
3092                         (struct ocfs2_xattr_block *)root_bh->b_data;
3093         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3094         struct ocfs2_extent_list *el = &xb_root->xt_list;
3095         u64 p_blkno = 0;
3096         u32 first_hash, num_clusters = 0;
3097         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3098
3099         if (le16_to_cpu(el->l_next_free_rec) == 0)
3100                 return -ENODATA;
3101
3102         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3103              name, name_hash, name_index);
3104
3105         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3106                                   &num_clusters, el);
3107         if (ret) {
3108                 mlog_errno(ret);
3109                 goto out;
3110         }
3111
3112         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3113
3114         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3115              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3116              first_hash);
3117
3118         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3119                                       p_blkno, first_hash, num_clusters, xs);
3120
3121 out:
3122         return ret;
3123 }
3124
3125 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3126                                        u64 blkno,
3127                                        u32 clusters,
3128                                        xattr_bucket_func *func,
3129                                        void *para)
3130 {
3131         int i, ret = 0;
3132         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3133         u32 num_buckets = clusters * bpc;
3134         struct ocfs2_xattr_bucket *bucket;
3135
3136         bucket = ocfs2_xattr_bucket_new(inode);
3137         if (!bucket) {
3138                 mlog_errno(-ENOMEM);
3139                 return -ENOMEM;
3140         }
3141
3142         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3143              clusters, (unsigned long long)blkno);
3144
3145         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3146                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3147                 if (ret) {
3148                         mlog_errno(ret);
3149                         break;
3150                 }
3151
3152                 /*
3153                  * The real bucket num in this series of blocks is stored
3154                  * in the 1st bucket.
3155                  */
3156                 if (i == 0)
3157                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3158
3159                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3160                      (unsigned long long)blkno,
3161                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3162                 if (func) {
3163                         ret = func(inode, bucket, para);
3164                         if (ret && ret != -ERANGE)
3165                                 mlog_errno(ret);
3166                         /* Fall through to bucket_relse() */
3167                 }
3168
3169                 ocfs2_xattr_bucket_relse(bucket);
3170                 if (ret)
3171                         break;
3172         }
3173
3174         ocfs2_xattr_bucket_free(bucket);
3175         return ret;
3176 }
3177
3178 struct ocfs2_xattr_tree_list {
3179         char *buffer;
3180         size_t buffer_size;
3181         size_t result;
3182 };
3183
3184 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
3185                                              struct ocfs2_xattr_header *xh,
3186                                              int index,
3187                                              int *block_off,
3188                                              int *new_offset)
3189 {
3190         u16 name_offset;
3191
3192         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3193                 return -EINVAL;
3194
3195         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3196
3197         *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
3198         *new_offset = name_offset % inode->i_sb->s_blocksize;
3199
3200         return 0;
3201 }
3202
3203 static int ocfs2_list_xattr_bucket(struct inode *inode,
3204                                    struct ocfs2_xattr_bucket *bucket,
3205                                    void *para)
3206 {
3207         int ret = 0, type;
3208         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3209         int i, block_off, new_offset;
3210         const char *prefix, *name;
3211
3212         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3213                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3214                 type = ocfs2_xattr_get_type(entry);
3215                 prefix = ocfs2_xattr_prefix(type);
3216
3217                 if (prefix) {
3218                         ret = ocfs2_xattr_bucket_get_name_value(inode,
3219                                                                 bucket_xh(bucket),
3220                                                                 i,
3221                                                                 &block_off,
3222                                                                 &new_offset);
3223                         if (ret)
3224                                 break;
3225
3226                         name = (const char *)bucket_block(bucket, block_off) +
3227                                 new_offset;
3228                         ret = ocfs2_xattr_list_entry(xl->buffer,
3229                                                      xl->buffer_size,
3230                                                      &xl->result,
3231                                                      prefix, name,
3232                                                      entry->xe_name_len);
3233                         if (ret)
3234                                 break;
3235                 }
3236         }
3237
3238         return ret;
3239 }
3240
3241 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3242                                              struct ocfs2_xattr_tree_root *xt,
3243                                              char *buffer,
3244                                              size_t buffer_size)
3245 {
3246         struct ocfs2_extent_list *el = &xt->xt_list;
3247         int ret = 0;
3248         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3249         u64 p_blkno = 0;
3250         struct ocfs2_xattr_tree_list xl = {
3251                 .buffer = buffer,
3252                 .buffer_size = buffer_size,
3253                 .result = 0,
3254         };
3255
3256         if (le16_to_cpu(el->l_next_free_rec) == 0)
3257                 return 0;
3258
3259         while (name_hash > 0) {
3260                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3261                                           &e_cpos, &num_clusters, el);
3262                 if (ret) {
3263                         mlog_errno(ret);
3264                         goto out;
3265                 }
3266
3267                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
3268                                                   ocfs2_list_xattr_bucket,
3269                                                   &xl);
3270                 if (ret) {
3271                         if (ret != -ERANGE)
3272                                 mlog_errno(ret);
3273                         goto out;
3274                 }
3275
3276                 if (e_cpos == 0)
3277                         break;
3278
3279                 name_hash = e_cpos - 1;
3280         }
3281
3282         ret = xl.result;
3283 out:
3284         return ret;
3285 }
3286
3287 static int cmp_xe(const void *a, const void *b)
3288 {
3289         const struct ocfs2_xattr_entry *l = a, *r = b;
3290         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3291         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3292
3293         if (l_hash > r_hash)
3294                 return 1;
3295         if (l_hash < r_hash)
3296                 return -1;
3297         return 0;
3298 }
3299
3300 static void swap_xe(void *a, void *b, int size)
3301 {
3302         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3303
3304         tmp = *l;
3305         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3306         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3307 }
3308
3309 /*
3310  * When the ocfs2_xattr_block is filled up, new bucket will be created
3311  * and all the xattr entries will be moved to the new bucket.
3312  * The header goes at the start of the bucket, and the names+values are
3313  * filled from the end.  This is why *target starts as the last buffer.
3314  * Note: we need to sort the entries since they are not saved in order
3315  * in the ocfs2_xattr_block.
3316  */
3317 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3318                                            struct buffer_head *xb_bh,
3319                                            struct ocfs2_xattr_bucket *bucket)
3320 {
3321         int i, blocksize = inode->i_sb->s_blocksize;
3322         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3323         u16 offset, size, off_change;
3324         struct ocfs2_xattr_entry *xe;
3325         struct ocfs2_xattr_block *xb =
3326                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3327         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3328         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3329         u16 count = le16_to_cpu(xb_xh->xh_count);
3330         char *src = xb_bh->b_data;
3331         char *target = bucket_block(bucket, blks - 1);
3332
3333         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3334              (unsigned long long)xb_bh->b_blocknr,
3335              (unsigned long long)bucket_blkno(bucket));
3336
3337         for (i = 0; i < blks; i++)
3338                 memset(bucket_block(bucket, i), 0, blocksize);
3339
3340         /*
3341          * Since the xe_name_offset is based on ocfs2_xattr_header,
3342          * there is a offset change corresponding to the change of
3343          * ocfs2_xattr_header's position.
3344          */
3345         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3346         xe = &xb_xh->xh_entries[count - 1];
3347         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3348         size = blocksize - offset;
3349
3350         /* copy all the names and values. */
3351         memcpy(target + offset, src + offset, size);
3352
3353         /* Init new header now. */
3354         xh->xh_count = xb_xh->xh_count;
3355         xh->xh_num_buckets = cpu_to_le16(1);
3356         xh->xh_name_value_len = cpu_to_le16(size);
3357         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3358
3359         /* copy all the entries. */
3360         target = bucket_block(bucket, 0);
3361         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3362         size = count * sizeof(struct ocfs2_xattr_entry);
3363         memcpy(target + offset, (char *)xb_xh + offset, size);
3364
3365         /* Change the xe offset for all the xe because of the move. */
3366         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3367                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3368         for (i = 0; i < count; i++)
3369                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3370
3371         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3372              offset, size, off_change);
3373
3374         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3375              cmp_xe, swap_xe);
3376 }
3377
3378 /*
3379  * After we move xattr from block to index btree, we have to
3380  * update ocfs2_xattr_search to the new xe and base.
3381  *
3382  * When the entry is in xattr block, xattr_bh indicates the storage place.
3383  * While if the entry is in index b-tree, "bucket" indicates the
3384  * real place of the xattr.
3385  */
3386 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3387                                             struct ocfs2_xattr_search *xs,
3388                                             struct buffer_head *old_bh)
3389 {
3390         char *buf = old_bh->b_data;
3391         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3392         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3393         int i;
3394
3395         xs->header = bucket_xh(xs->bucket);
3396         xs->base = bucket_block(xs->bucket, 0);
3397         xs->end = xs->base + inode->i_sb->s_blocksize;
3398
3399         if (xs->not_found)
3400                 return;
3401
3402         i = xs->here - old_xh->xh_entries;
3403         xs->here = &xs->header->xh_entries[i];
3404 }
3405
3406 static int ocfs2_xattr_create_index_block(struct inode *inode,
3407                                           struct ocfs2_xattr_search *xs,
3408                                           struct ocfs2_xattr_set_ctxt *ctxt)
3409 {
3410         int ret;
3411         u32 bit_off, len;
3412         u64 blkno;
3413         handle_t *handle = ctxt->handle;
3414         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3415         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3416         struct buffer_head *xb_bh = xs->xattr_bh;
3417         struct ocfs2_xattr_block *xb =
3418                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3419         struct ocfs2_xattr_tree_root *xr;
3420         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3421
3422         mlog(0, "create xattr index block for %llu\n",
3423              (unsigned long long)xb_bh->b_blocknr);
3424
3425         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3426         BUG_ON(!xs->bucket);
3427
3428         /*
3429          * XXX:
3430          * We can use this lock for now, and maybe move to a dedicated mutex
3431          * if performance becomes a problem later.
3432          */
3433         down_write(&oi->ip_alloc_sem);
3434
3435         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3436                                       OCFS2_JOURNAL_ACCESS_WRITE);
3437         if (ret) {
3438                 mlog_errno(ret);
3439                 goto out;
3440         }
3441
3442         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3443                                      1, 1, &bit_off, &len);
3444         if (ret) {
3445                 mlog_errno(ret);
3446                 goto out;
3447         }
3448
3449         /*
3450          * The bucket may spread in many blocks, and
3451          * we will only touch the 1st block and the last block
3452          * in the whole bucket(one for entry and one for data).
3453          */
3454         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3455
3456         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3457              (unsigned long long)blkno);
3458
3459         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3460         if (ret) {
3461                 mlog_errno(ret);
3462                 goto out;
3463         }
3464
3465         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3466                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3467         if (ret) {
3468                 mlog_errno(ret);
3469                 goto out;
3470         }
3471
3472         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3473         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3474
3475         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3476
3477         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3478         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3479                offsetof(struct ocfs2_xattr_block, xb_attrs));
3480
3481         xr = &xb->xb_attrs.xb_root;
3482         xr->xt_clusters = cpu_to_le32(1);
3483         xr->xt_last_eb_blk = 0;
3484         xr->xt_list.l_tree_depth = 0;
3485         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3486         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3487
3488         xr->xt_list.l_recs[0].e_cpos = 0;
3489         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3490         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3491
3492         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3493
3494         ocfs2_journal_dirty(handle, xb_bh);
3495
3496 out:
3497         up_write(&oi->ip_alloc_sem);
3498
3499         return ret;
3500 }
3501
3502 static int cmp_xe_offset(const void *a, const void *b)
3503 {
3504         const struct ocfs2_xattr_entry *l = a, *r = b;
3505         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3506         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3507
3508         if (l_name_offset < r_name_offset)
3509                 return 1;
3510         if (l_name_offset > r_name_offset)
3511                 return -1;
3512         return 0;
3513 }
3514
3515 /*
3516  * defrag a xattr bucket if we find that the bucket has some
3517  * holes beteen name/value pairs.
3518  * We will move all the name/value pairs to the end of the bucket
3519  * so that we can spare some space for insertion.
3520  */
3521 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3522                                      handle_t *handle,
3523                                      struct ocfs2_xattr_bucket *bucket)
3524 {
3525         int ret, i;
3526         size_t end, offset, len, value_len;
3527         struct ocfs2_xattr_header *xh;
3528         char *entries, *buf, *bucket_buf = NULL;
3529         u64 blkno = bucket_blkno(bucket);
3530         u16 xh_free_start;
3531         size_t blocksize = inode->i_sb->s_blocksize;
3532         struct ocfs2_xattr_entry *xe;
3533
3534         /*
3535          * In order to make the operation more efficient and generic,
3536          * we copy all the blocks into a contiguous memory and do the
3537          * defragment there, so if anything is error, we will not touch
3538          * the real block.
3539          */
3540         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3541         if (!bucket_buf) {
3542                 ret = -EIO;
3543                 goto out;
3544         }
3545
3546         buf = bucket_buf;
3547         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3548                 memcpy(buf, bucket_block(bucket, i), blocksize);
3549
3550         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3551                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3552         if (ret < 0) {
3553                 mlog_errno(ret);
3554                 goto out;
3555         }
3556
3557         xh = (struct ocfs2_xattr_header *)bucket_buf;
3558         entries = (char *)xh->xh_entries;
3559         xh_free_start = le16_to_cpu(xh->xh_free_start);
3560
3561         mlog(0, "adjust xattr bucket in %llu, count = %u, "
3562              "xh_free_start = %u, xh_name_value_len = %u.\n",
3563              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3564              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3565
3566         /*
3567          * sort all the entries by their offset.
3568          * the largest will be the first, so that we can
3569          * move them to the end one by one.
3570          */
3571         sort(entries, le16_to_cpu(xh->xh_count),
3572              sizeof(struct ocfs2_xattr_entry),
3573              cmp_xe_offset, swap_xe);
3574
3575         /* Move all name/values to the end of the bucket. */
3576         xe = xh->xh_entries;
3577         end = OCFS2_XATTR_BUCKET_SIZE;
3578         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3579                 offset = le16_to_cpu(xe->xe_name_offset);
3580                 if (ocfs2_xattr_is_local(xe))
3581                         value_len = OCFS2_XATTR_SIZE(
3582                                         le64_to_cpu(xe->xe_value_size));
3583                 else
3584                         value_len = OCFS2_XATTR_ROOT_SIZE;
3585                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3586
3587                 /*
3588                  * We must make sure that the name/value pair
3589                  * exist in the same block. So adjust end to
3590                  * the previous block end if needed.
3591                  */
3592                 if (((end - len) / blocksize !=
3593                         (end - 1) / blocksize))
3594                         end = end - end % blocksize;
3595
3596                 if (end > offset + len) {
3597                         memmove(bucket_buf + end - len,
3598                                 bucket_buf + offset, len);
3599                         xe->xe_name_offset = cpu_to_le16(end - len);
3600                 }
3601
3602                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3603                                 "bucket %llu\n", (unsigned long long)blkno);
3604
3605                 end -= len;
3606         }
3607
3608         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3609                         "bucket %llu\n", (unsigned long long)blkno);
3610
3611         if (xh_free_start == end)
3612                 goto out;
3613
3614         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3615         xh->xh_free_start = cpu_to_le16(end);
3616
3617         /* sort the entries by their name_hash. */
3618         sort(entries, le16_to_cpu(xh->xh_count),
3619              sizeof(struct ocfs2_xattr_entry),
3620              cmp_xe, swap_xe);
3621
3622         buf = bucket_buf;
3623         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3624                 memcpy(bucket_block(bucket, i), buf, blocksize);
3625         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3626
3627 out:
3628         kfree(bucket_buf);
3629         return ret;
3630 }
3631
3632 /*
3633  * prev_blkno points to the start of an existing extent.  new_blkno
3634  * points to a newly allocated extent.  Because we know each of our
3635  * clusters contains more than bucket, we can easily split one cluster
3636  * at a bucket boundary.  So we take the last cluster of the existing
3637  * extent and split it down the middle.  We move the last half of the
3638  * buckets in the last cluster of the existing extent over to the new
3639  * extent.
3640  *
3641  * first_bh is the buffer at prev_blkno so we can update the existing
3642  * extent's bucket count.  header_bh is the bucket were we were hoping
3643  * to insert our xattr.  If the bucket move places the target in the new
3644  * extent, we'll update first_bh and header_bh after modifying the old
3645  * extent.
3646  *
3647  * first_hash will be set as the 1st xe's name_hash in the new extent.
3648  */
3649 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3650                                                handle_t *handle,
3651                                                struct ocfs2_xattr_bucket *first,
3652                                                struct ocfs2_xattr_bucket *target,
3653                                                u64 new_blkno,
3654                                                u32 num_clusters,
3655                                                u32 *first_hash)
3656 {
3657         int ret;
3658         struct super_block *sb = inode->i_sb;
3659         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
3660         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
3661         int to_move = num_buckets / 2;
3662         u64 src_blkno;
3663         u64 last_cluster_blkno = bucket_blkno(first) +
3664                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
3665
3666         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3667         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3668
3669         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3670              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3671
3672         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3673                                      last_cluster_blkno, new_blkno,
3674                                      to_move, first_hash);
3675         if (ret) {
3676                 mlog_errno(ret);
3677                 goto out;
3678         }
3679
3680         /* This is the first bucket that got moved */
3681         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3682
3683         /*
3684          * If the target bucket was part of the moved buckets, we need to
3685          * update first and target.
3686          */
3687         if (bucket_blkno(target) >= src_blkno) {
3688                 /* Find the block for the new target bucket */
3689                 src_blkno = new_blkno +
3690                         (bucket_blkno(target) - src_blkno);
3691
3692                 ocfs2_xattr_bucket_relse(first);
3693                 ocfs2_xattr_bucket_relse(target);
3694
3695                 /*
3696                  * These shouldn't fail - the buffers are in the
3697                  * journal from ocfs2_cp_xattr_bucket().
3698                  */
3699                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
3700                 if (ret) {
3701                         mlog_errno(ret);
3702                         goto out;
3703                 }
3704                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
3705                 if (ret)
3706                         mlog_errno(ret);
3707
3708         }
3709
3710 out:
3711         return ret;
3712 }
3713
3714 /*
3715  * Find the suitable pos when we divide a bucket into 2.
3716  * We have to make sure the xattrs with the same hash value exist
3717  * in the same bucket.
3718  *
3719  * If this ocfs2_xattr_header covers more than one hash value, find a
3720  * place where the hash value changes.  Try to find the most even split.
3721  * The most common case is that all entries have different hash values,
3722  * and the first check we make will find a place to split.
3723  */
3724 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3725 {
3726         struct ocfs2_xattr_entry *entries = xh->xh_entries;
3727         int count = le16_to_cpu(xh->xh_count);
3728         int delta, middle = count / 2;
3729
3730         /*
3731          * We start at the middle.  Each step gets farther away in both
3732          * directions.  We therefore hit the change in hash value
3733          * nearest to the middle.  Note that this loop does not execute for
3734          * count < 2.
3735          */
3736         for (delta = 0; delta < middle; delta++) {
3737                 /* Let's check delta earlier than middle */
3738                 if (cmp_xe(&entries[middle - delta - 1],
3739                            &entries[middle - delta]))
3740                         return middle - delta;
3741
3742                 /* For even counts, don't walk off the end */
3743                 if ((middle + delta + 1) == count)
3744                         continue;
3745
3746                 /* Now try delta past middle */
3747                 if (cmp_xe(&entries[middle + delta],
3748                            &entries[middle + delta + 1]))
3749                         return middle + delta + 1;
3750         }
3751
3752         /* Every entry had the same hash */
3753         return count;
3754 }
3755
3756 /*
3757  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3758  * first_hash will record the 1st hash of the new bucket.
3759  *
3760  * Normally half of the xattrs will be moved.  But we have to make
3761  * sure that the xattrs with the same hash value are stored in the
3762  * same bucket. If all the xattrs in this bucket have the same hash
3763  * value, the new bucket will be initialized as an empty one and the
3764  * first_hash will be initialized as (hash_value+1).
3765  */
3766 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3767                                     handle_t *handle,
3768                                     u64 blk,
3769                                     u64 new_blk,
3770                                     u32 *first_hash,
3771                                     int new_bucket_head)
3772 {
3773         int ret, i;
3774         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3775         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3776         struct ocfs2_xattr_header *xh;
3777         struct ocfs2_xattr_entry *xe;
3778         int blocksize = inode->i_sb->s_blocksize;
3779
3780         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3781              (unsigned long long)blk, (unsigned long long)new_blk);
3782
3783         s_bucket = ocfs2_xattr_bucket_new(inode);
3784         t_bucket = ocfs2_xattr_bucket_new(inode);
3785         if (!s_bucket || !t_bucket) {
3786                 ret = -ENOMEM;
3787                 mlog_errno(ret);
3788                 goto out;
3789         }
3790
3791         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3792         if (ret) {
3793                 mlog_errno(ret);
3794                 goto out;
3795         }
3796
3797         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3798                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3799         if (ret) {
3800                 mlog_errno(ret);
3801                 goto out;
3802         }
3803
3804         /*
3805          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3806          * there's no need to read it.
3807          */
3808         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3809         if (ret) {
3810                 mlog_errno(ret);
3811                 goto out;
3812         }
3813
3814         /*
3815          * Hey, if we're overwriting t_bucket, what difference does
3816          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
3817          * same part of ocfs2_cp_xattr_bucket().
3818          */
3819         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3820                                                 new_bucket_head ?
3821                                                 OCFS2_JOURNAL_ACCESS_CREATE :
3822                                                 OCFS2_JOURNAL_ACCESS_WRITE);
3823         if (ret) {
3824                 mlog_errno(ret);
3825                 goto out;
3826         }
3827
3828         xh = bucket_xh(s_bucket);
3829         count = le16_to_cpu(xh->xh_count);
3830         start = ocfs2_xattr_find_divide_pos(xh);
3831
3832         if (start == count) {
3833                 xe = &xh->xh_entries[start-1];
3834
3835                 /*
3836                  * initialized a new empty bucket here.
3837                  * The hash value is set as one larger than
3838                  * that of the last entry in the previous bucket.
3839                  */
3840                 for (i = 0; i < t_bucket->bu_blocks; i++)
3841                         memset(bucket_block(t_bucket, i), 0, blocksize);
3842
3843                 xh = bucket_xh(t_bucket);
3844                 xh->xh_free_start = cpu_to_le16(blocksize);
3845                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3846                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3847
3848                 goto set_num_buckets;
3849         }
3850
3851         /* copy the whole bucket to the new first. */
3852         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3853
3854         /* update the new bucket. */
3855         xh = bucket_xh(t_bucket);
3856
3857         /*
3858          * Calculate the total name/value len and xh_free_start for
3859          * the old bucket first.
3860          */
3861         name_offset = OCFS2_XATTR_BUCKET_SIZE;
3862         name_value_len = 0;
3863         for (i = 0; i < start; i++) {
3864                 xe = &xh->xh_entries[i];
3865                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3866                 if (ocfs2_xattr_is_local(xe))
3867                         xe_len +=
3868                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3869                 else
3870                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3871                 name_value_len += xe_len;
3872                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3873                         name_offset = le16_to_cpu(xe->xe_name_offset);
3874         }
3875
3876         /*
3877          * Now begin the modification to the new bucket.
3878          *
3879          * In the new bucket, We just move the xattr entry to the beginning
3880          * and don't touch the name/value. So there will be some holes in the
3881          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3882          * called.
3883          */
3884         xe = &xh->xh_entries[start];
3885         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3886         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3887              (int)((char *)xe - (char *)xh),
3888              (int)((char *)xh->xh_entries - (char *)xh));
3889         memmove((char *)xh->xh_entries, (char *)xe, len);
3890         xe = &xh->xh_entries[count - start];
3891         len = sizeof(struct ocfs2_xattr_entry) * start;
3892         memset((char *)xe, 0, len);
3893
3894         le16_add_cpu(&xh->xh_count, -start);
3895         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3896
3897         /* Calculate xh_free_start for the new bucket. */
3898         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3899         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3900                 xe = &xh->xh_entries[i];
3901                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3902                 if (ocfs2_xattr_is_local(xe))
3903                         xe_len +=
3904                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3905                 else
3906                         xe_len += OCFS2_XATTR_ROOT_SIZE;
3907                 if (le16_to_cpu(xe->xe_name_offset) <
3908                     le16_to_cpu(xh->xh_free_start))
3909                         xh->xh_free_start = xe->xe_name_offset;
3910         }
3911
3912 set_num_buckets:
3913         /* set xh->xh_num_buckets for the new xh. */
3914         if (new_bucket_head)
3915                 xh->xh_num_buckets = cpu_to_le16(1);
3916         else
3917                 xh->xh_num_buckets = 0;
3918
3919         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3920
3921         /* store the first_hash of the new bucket. */
3922         if (first_hash)
3923                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3924
3925         /*
3926          * Now only update the 1st block of the old bucket.  If we
3927          * just added a new empty bucket, there is no need to modify
3928          * it.
3929          */
3930         if (start == count)
3931                 goto out;
3932
3933         xh = bucket_xh(s_bucket);
3934         memset(&xh->xh_entries[start], 0,
3935                sizeof(struct ocfs2_xattr_entry) * (count - start));
3936         xh->xh_count = cpu_to_le16(start);
3937         xh->xh_free_start = cpu_to_le16(name_offset);
3938         xh->xh_name_value_len = cpu_to_le16(name_value_len);
3939
3940         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3941
3942 out:
3943         ocfs2_xattr_bucket_free(s_bucket);
3944         ocfs2_xattr_bucket_free(t_bucket);
3945
3946         return ret;
3947 }
3948
3949 /*
3950  * Copy xattr from one bucket to another bucket.
3951  *
3952  * The caller must make sure that the journal transaction
3953  * has enough space for journaling.
3954  */
3955 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3956                                  handle_t *handle,
3957                                  u64 s_blkno,
3958                                  u64 t_blkno,
3959                                  int t_is_new)
3960 {
3961         int ret;
3962         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3963
3964         BUG_ON(s_blkno == t_blkno);
3965
3966         mlog(0, "cp bucket %llu to %llu, target is %d\n",
3967              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3968              t_is_new);
3969
3970         s_bucket = ocfs2_xattr_bucket_new(inode);
3971         t_bucket = ocfs2_xattr_bucket_new(inode);
3972         if (!s_bucket || !t_bucket) {
3973                 ret = -ENOMEM;
3974                 mlog_errno(ret);
3975                 goto out;
3976         }
3977
3978         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3979         if (ret)
3980                 goto out;
3981
3982         /*
3983          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
3984          * there's no need to read it.
3985          */
3986         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3987         if (ret)
3988                 goto out;
3989
3990         /*
3991          * Hey, if we're overwriting t_bucket, what difference does
3992          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
3993          * cluster to fill, we came here from
3994          * ocfs2_mv_xattr_buckets(), and it is really new -
3995          * ACCESS_CREATE is required.  But we also might have moved data
3996          * out of t_bucket before extending back into it.
3997          * ocfs2_add_new_xattr_bucket() can do this - its call to
3998          * ocfs2_add_new_xattr_cluster() may have created a new extent
3999          * and copied out the end of the old extent.  Then it re-extends
4000          * the old extent back to create space for new xattrs.  That's
4001          * how we get here, and the bucket isn't really new.
4002          */
4003         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4004                                                 t_is_new ?
4005                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4006                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4007         if (ret)
4008                 goto out;
4009
4010         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4011         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4012
4013 out:
4014         ocfs2_xattr_bucket_free(t_bucket);
4015         ocfs2_xattr_bucket_free(s_bucket);
4016
4017         return ret;
4018 }
4019
4020 /*
4021  * src_blk points to the start of an existing extent.  last_blk points to
4022  * last cluster in that extent.  to_blk points to a newly allocated
4023  * extent.  We copy the buckets from the cluster at last_blk to the new
4024  * extent.  If start_bucket is non-zero, we skip that many buckets before
4025  * we start copying.  The new extent's xh_num_buckets gets set to the
4026  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4027  * by the same amount.
4028  */
4029 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4030                                   u64 src_blk, u64 last_blk, u64 to_blk,
4031                                   unsigned int start_bucket,
4032                                   u32 *first_hash)
4033 {
4034         int i, ret, credits;
4035         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4036         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4037         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4038         struct ocfs2_xattr_bucket *old_first, *new_first;
4039
4040         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4041              (unsigned long long)last_blk, (unsigned long long)to_blk);
4042
4043         BUG_ON(start_bucket >= num_buckets);
4044         if (start_bucket) {
4045                 num_buckets -= start_bucket;
4046                 last_blk += (start_bucket * blks_per_bucket);
4047         }
4048
4049         /* The first bucket of the original extent */
4050         old_first = ocfs2_xattr_bucket_new(inode);
4051         /* The first bucket of the new extent */
4052         new_first = ocfs2_xattr_bucket_new(inode);
4053         if (!old_first || !new_first) {
4054                 ret = -ENOMEM;
4055                 mlog_errno(ret);
4056                 goto out;
4057         }
4058
4059         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4060         if (ret) {
4061                 mlog_errno(ret);
4062                 goto out;
4063         }
4064
4065         /*
4066          * We need to update the first bucket of the old extent and all
4067          * the buckets going to the new extent.
4068          */
4069         credits = ((num_buckets + 1) * blks_per_bucket) +
4070                 handle->h_buffer_credits;
4071         ret = ocfs2_extend_trans(handle, credits);
4072         if (ret) {
4073                 mlog_errno(ret);
4074                 goto out;
4075         }
4076
4077         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4078                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4079         if (ret) {
4080                 mlog_errno(ret);
4081                 goto out;
4082         }
4083
4084         for (i = 0; i < num_buckets; i++) {
4085                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4086                                             last_blk + (i * blks_per_bucket),
4087                                             to_blk + (i * blks_per_bucket),
4088                                             1);
4089                 if (ret) {
4090                         mlog_errno(ret);
4091                         goto out;
4092                 }
4093         }
4094
4095         /*
4096          * Get the new bucket ready before we dirty anything
4097          * (This actually shouldn't fail, because we already dirtied
4098          * it once in ocfs2_cp_xattr_bucket()).
4099          */
4100         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4101         if (ret) {
4102                 mlog_errno(ret);
4103                 goto out;
4104         }
4105         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4106                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4107         if (ret) {
4108                 mlog_errno(ret);
4109                 goto out;
4110         }
4111
4112         /* Now update the headers */
4113         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4114         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4115
4116         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4117         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4118
4119         if (first_hash)
4120                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4121
4122 out:
4123         ocfs2_xattr_bucket_free(new_first);
4124         ocfs2_xattr_bucket_free(old_first);
4125         return ret;
4126 }
4127
4128 /*
4129  * Move some xattrs in this cluster to the new cluster.
4130  * This function should only be called when bucket size == cluster size.
4131  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4132  */
4133 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4134                                       handle_t *handle,
4135                                       u64 prev_blk,
4136                                       u64 new_blk,
4137                                       u32 *first_hash)
4138 {
4139         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4140         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4141
4142         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4143
4144         ret = ocfs2_extend_trans(handle, credits);
4145         if (ret) {
4146                 mlog_errno(ret);
4147                 return ret;
4148         }
4149
4150         /* Move half of the xattr in start_blk to the next bucket. */
4151         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4152                                           new_blk, first_hash, 1);
4153 }
4154
4155 /*
4156  * Move some xattrs from the old cluster to the new one since they are not
4157  * contiguous in ocfs2 xattr tree.
4158  *
4159  * new_blk starts a new separate cluster, and we will move some xattrs from
4160  * prev_blk to it. v_start will be set as the first name hash value in this
4161  * new cluster so that it can be used as e_cpos during tree insertion and
4162  * don't collide with our original b-tree operations. first_bh and header_bh
4163  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4164  * to extend the insert bucket.
4165  *
4166  * The problem is how much xattr should we move to the new one and when should
4167  * we update first_bh and header_bh?
4168  * 1. If cluster size > bucket size, that means the previous cluster has more
4169  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4170  *    update the first_bh and header_bh if the insert bucket has been moved
4171  *    to the new cluster.
4172  * 2. If cluster_size == bucket_size:
4173  *    a) If the previous extent rec has more than one cluster and the insert
4174  *       place isn't in the last cluster, copy the entire last cluster to the
4175  *       new one. This time, we don't need to upate the first_bh and header_bh
4176  *       since they will not be moved into the new cluster.
4177  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4178  *       the new one. And we set the extend flag to zero if the insert place is
4179  *       moved into the new allocated cluster since no extend is needed.
4180  */
4181 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4182                                             handle_t *handle,
4183                                             struct ocfs2_xattr_bucket *first,
4184                                             struct ocfs2_xattr_bucket *target,
4185                                             u64 new_blk,
4186                                             u32 prev_clusters,
4187                                             u32 *v_start,
4188                                             int *extend)
4189 {
4190         int ret;
4191
4192         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4193              (unsigned long long)bucket_blkno(first), prev_clusters,
4194              (unsigned long long)new_blk);
4195
4196         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4197                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4198                                                           handle,
4199                                                           first, target,
4200                                                           new_blk,
4201                                                           prev_clusters,
4202                                                           v_start);
4203                 if (ret)
4204                         mlog_errno(ret);
4205         } else {
4206                 /* The start of the last cluster in the first extent */
4207                 u64 last_blk = bucket_blkno(first) +
4208                         ((prev_clusters - 1) *
4209                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4210
4211                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4212                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4213                                                      bucket_blkno(first),
4214                                                      last_blk, new_blk, 0,
4215                                                      v_start);
4216                         if (ret)
4217                                 mlog_errno(ret);
4218                 } else {
4219                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4220                                                          last_blk, new_blk,
4221                                                          v_start);
4222                         if (ret)
4223                                 mlog_errno(ret);
4224
4225                         if ((bucket_blkno(target) == last_blk) && extend)
4226                                 *extend = 0;
4227                 }
4228         }
4229
4230         return ret;
4231 }
4232
4233 /*
4234  * Add a new cluster for xattr storage.
4235  *
4236  * If the new cluster is contiguous with the previous one, it will be
4237  * appended to the same extent record, and num_clusters will be updated.
4238  * If not, we will insert a new extent for it and move some xattrs in
4239  * the last cluster into the new allocated one.
4240  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4241  * lose the benefits of hashing because we'll have to search large leaves.
4242  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4243  * if it's bigger).
4244  *
4245  * first_bh is the first block of the previous extent rec and header_bh
4246  * indicates the bucket we will insert the new xattrs. They will be updated
4247  * when the header_bh is moved into the new cluster.
4248  */
4249 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4250                                        struct buffer_head *root_bh,
4251                                        struct ocfs2_xattr_bucket *first,
4252                                        struct ocfs2_xattr_bucket *target,
4253                                        u32 *num_clusters,
4254                                        u32 prev_cpos,
4255                                        int *extend,
4256                                        struct ocfs2_xattr_set_ctxt *ctxt)
4257 {
4258         int ret;
4259         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4260         u32 prev_clusters = *num_clusters;
4261         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4262         u64 block;
4263         handle_t *handle = ctxt->handle;
4264         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4265         struct ocfs2_extent_tree et;
4266
4267         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4268              "previous xattr blkno = %llu\n",
4269              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4270              prev_cpos, (unsigned long long)bucket_blkno(first));
4271
4272         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4273
4274         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4275                                       OCFS2_JOURNAL_ACCESS_WRITE);
4276         if (ret < 0) {
4277                 mlog_errno(ret);
4278                 goto leave;
4279         }
4280
4281         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4282                                      clusters_to_add, &bit_off, &num_bits);
4283         if (ret < 0) {
4284                 if (ret != -ENOSPC)
4285                         mlog_errno(ret);
4286                 goto leave;
4287         }
4288
4289         BUG_ON(num_bits > clusters_to_add);
4290
4291         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4292         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4293              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4294
4295         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
4296             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4297              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4298                 /*
4299                  * If this cluster is contiguous with the old one and
4300                  * adding this new cluster, we don't surpass the limit of
4301                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4302                  * initialized and used like other buckets in the previous
4303                  * cluster.
4304                  * So add it as a contiguous one. The caller will handle
4305                  * its init process.
4306                  */
4307                 v_start = prev_cpos + prev_clusters;
4308                 *num_clusters = prev_clusters + num_bits;
4309                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4310                      num_bits);
4311         } else {
4312                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4313                                                        handle,
4314                                                        first,
4315                                                        target,
4316                                                        block,
4317                                                        prev_clusters,
4318                                                        &v_start,
4319                                                        extend);
4320                 if (ret) {
4321                         mlog_errno(ret);
4322                         goto leave;
4323                 }
4324         }
4325
4326         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4327              num_bits, (unsigned long long)block, v_start);
4328         ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
4329                                   num_bits, 0, ctxt->meta_ac);
4330         if (ret < 0) {
4331                 mlog_errno(ret);
4332                 goto leave;
4333         }
4334
4335         ret = ocfs2_journal_dirty(handle, root_bh);
4336         if (ret < 0)
4337                 mlog_errno(ret);
4338
4339 leave:
4340         return ret;
4341 }
4342
4343 /*
4344  * We are given an extent.  'first' is the bucket at the very front of
4345  * the extent.  The extent has space for an additional bucket past
4346  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
4347  * of the target bucket.  We wish to shift every bucket past the target
4348  * down one, filling in that additional space.  When we get back to the
4349  * target, we split the target between itself and the now-empty bucket
4350  * at target+1 (aka, target_blkno + blks_per_bucket).
4351  */
4352 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4353                                      handle_t *handle,
4354                                      struct ocfs2_xattr_bucket *first,
4355                                      u64 target_blk,
4356                                      u32 num_clusters)
4357 {
4358         int ret, credits;
4359         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4360         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4361         u64 end_blk;
4362         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4363
4364         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4365              "from %llu, len = %u\n", (unsigned long long)target_blk,
4366              (unsigned long long)bucket_blkno(first), num_clusters);
4367
4368         /* The extent must have room for an additional bucket */
4369         BUG_ON(new_bucket >=
4370                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4371
4372         /* end_blk points to the last existing bucket */
4373         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4374
4375         /*
4376          * end_blk is the start of the last existing bucket.
4377          * Thus, (end_blk - target_blk) covers the target bucket and
4378          * every bucket after it up to, but not including, the last
4379          * existing bucket.  Then we add the last existing bucket, the
4380          * new bucket, and the first bucket (3 * blk_per_bucket).
4381          */
4382         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4383                   handle->h_buffer_credits;
4384         ret = ocfs2_extend_trans(handle, credits);
4385         if (ret) {
4386                 mlog_errno(ret);
4387                 goto out;
4388         }
4389
4390         ret = ocfs2_xattr_bucket_journal_access(handle, first,
4391                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4392         if (ret) {
4393                 mlog_errno(ret);
4394                 goto out;
4395         }
4396
4397         while (end_blk != target_blk) {
4398                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4399                                             end_blk + blk_per_bucket, 0);
4400                 if (ret)
4401                         goto out;
4402                 end_blk -= blk_per_bucket;
4403         }
4404
4405         /* Move half of the xattr in target_blkno to the next bucket. */
4406         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4407                                         target_blk + blk_per_bucket, NULL, 0);
4408
4409         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4410         ocfs2_xattr_bucket_journal_dirty(handle, first);
4411
4412 out:
4413         return ret;
4414 }
4415
4416 /*
4417  * Add new xattr bucket in an extent record and adjust the buckets
4418  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
4419  * bucket we want to insert into.
4420  *
4421  * In the easy case, we will move all the buckets after target down by
4422  * one. Half of target's xattrs will be moved to the next bucket.
4423  *
4424  * If current cluster is full, we'll allocate a new one.  This may not
4425  * be contiguous.  The underlying calls will make sure that there is
4426  * space for the insert, shifting buckets around if necessary.
4427  * 'target' may be moved by those calls.
4428  */
4429 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4430                                       struct buffer_head *xb_bh,
4431                                       struct ocfs2_xattr_bucket *target,
4432                                       struct ocfs2_xattr_set_ctxt *ctxt)
4433 {
4434         struct ocfs2_xattr_block *xb =
4435                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4436         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4437         struct ocfs2_extent_list *el = &xb_root->xt_list;
4438         u32 name_hash =
4439                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
4440         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4441         int ret, num_buckets, extend = 1;
4442         u64 p_blkno;
4443         u32 e_cpos, num_clusters;
4444         /* The bucket at the front of the extent */
4445         struct ocfs2_xattr_bucket *first;
4446
4447         mlog(0, "Add new xattr bucket starting from %llu\n",
4448              (unsigned long long)bucket_blkno(target));
4449
4450         /* The first bucket of the original extent */
4451         first = ocfs2_xattr_bucket_new(inode);
4452         if (!first) {
4453                 ret = -ENOMEM;
4454                 mlog_errno(ret);
4455                 goto out;
4456         }
4457
4458         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4459                                   &num_clusters, el);
4460         if (ret) {
4461                 mlog_errno(ret);
4462                 goto out;
4463         }
4464
4465         ret = ocfs2_read_xattr_bucket(first, p_blkno);
4466         if (ret) {
4467                 mlog_errno(ret);
4468                 goto out;
4469         }
4470
4471         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4472         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
4473                 /*
4474                  * This can move first+target if the target bucket moves
4475                  * to the new extent.
4476                  */
4477                 ret = ocfs2_add_new_xattr_cluster(inode,
4478                                                   xb_bh,
4479                                                   first,
4480                                                   target,
4481                                                   &num_clusters,
4482                                                   e_cpos,
4483                                                   &extend,
4484                                                   ctxt);
4485                 if (ret) {
4486                         mlog_errno(ret);
4487                         goto out;
4488                 }
4489         }
4490
4491         if (extend) {
4492                 ret = ocfs2_extend_xattr_bucket(inode,
4493                                                 ctxt->handle,
4494                                                 first,
4495                                                 bucket_blkno(target),
4496                                                 num_clusters);
4497                 if (ret)
4498                         mlog_errno(ret);
4499         }
4500
4501 out:
4502         ocfs2_xattr_bucket_free(first);
4503
4504         return ret;
4505 }
4506
4507 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4508                                         struct ocfs2_xattr_bucket *bucket,
4509                                         int offs)
4510 {
4511         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4512
4513         offs = offs % inode->i_sb->s_blocksize;
4514         return bucket_block(bucket, block_off) + offs;
4515 }
4516
4517 /*
4518  * Handle the normal xattr set, including replace, delete and new.
4519  *
4520  * Note: "local" indicates the real data's locality. So we can't
4521  * just its bucket locality by its length.
4522  */
4523 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4524                                          struct ocfs2_xattr_info *xi,
4525                                          struct ocfs2_xattr_search *xs,
4526                                          u32 name_hash,
4527                                          int local)
4528 {
4529         struct ocfs2_xattr_entry *last, *xe;
4530         int name_len = strlen(xi->name);
4531         struct ocfs2_xattr_header *xh = xs->header;
4532         u16 count = le16_to_cpu(xh->xh_count), start;
4533         size_t blocksize = inode->i_sb->s_blocksize;
4534         char *val;
4535         size_t offs, size, new_size;
4536
4537         last = &xh->xh_entries[count];
4538         if (!xs->not_found) {
4539                 xe = xs->here;
4540                 offs = le16_to_cpu(xe->xe_name_offset);
4541                 if (ocfs2_xattr_is_local(xe))
4542                         size = OCFS2_XATTR_SIZE(name_len) +
4543                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4544                 else
4545                         size = OCFS2_XATTR_SIZE(name_len) +
4546                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4547
4548                 /*
4549                  * If the new value will be stored outside, xi->value has been
4550                  * initalized as an empty ocfs2_xattr_value_root, and the same
4551                  * goes with xi->value_len, so we can set new_size safely here.
4552                  * See ocfs2_xattr_set_in_bucket.
4553                  */
4554                 new_size = OCFS2_XATTR_SIZE(name_len) +
4555                            OCFS2_XATTR_SIZE(xi->value_len);
4556
4557                 le16_add_cpu(&xh->xh_name_value_len, -size);
4558                 if (xi->value) {
4559                         if (new_size > size)
4560                                 goto set_new_name_value;
4561
4562                         /* Now replace the old value with new one. */
4563                         if (local)
4564                                 xe->xe_value_size = cpu_to_le64(xi->value_len);
4565                         else
4566                                 xe->xe_value_size = 0;
4567
4568                         val = ocfs2_xattr_bucket_get_val(inode,
4569                                                          xs->bucket, offs);
4570                         memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4571                                size - OCFS2_XATTR_SIZE(name_len));
4572                         if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4573                                 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4574                                        xi->value, xi->value_len);
4575
4576                         le16_add_cpu(&xh->xh_name_value_len, new_size);
4577                         ocfs2_xattr_set_local(xe, local);
4578                         return;
4579                 } else {
4580                         /*
4581                          * Remove the old entry if there is more than one.
4582                          * We don't remove the last entry so that we can
4583                          * use it to indicate the hash value of the empty
4584                          * bucket.
4585                          */
4586                         last -= 1;
4587                         le16_add_cpu(&xh->xh_count, -1);
4588                         if (xh->xh_count) {
4589                                 memmove(xe, xe + 1,
4590                                         (void *)last - (void *)xe);
4591                                 memset(last, 0,
4592                                        sizeof(struct ocfs2_xattr_entry));
4593                         } else
4594                                 xh->xh_free_start =
4595                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4596
4597                         return;
4598                 }
4599         } else {
4600                 /* find a new entry for insert. */
4601                 int low = 0, high = count - 1, tmp;
4602                 struct ocfs2_xattr_entry *tmp_xe;
4603
4604                 while (low <= high && count) {
4605                         tmp = (low + high) / 2;
4606                         tmp_xe = &xh->xh_entries[tmp];
4607
4608                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4609                                 low = tmp + 1;
4610                         else if (name_hash <
4611                                  le32_to_cpu(tmp_xe->xe_name_hash))
4612                                 high = tmp - 1;
4613                         else {
4614                                 low = tmp;
4615                                 break;
4616                         }
4617                 }
4618
4619                 xe = &xh->xh_entries[low];
4620                 if (low != count)
4621                         memmove(xe + 1, xe, (void *)last - (void *)xe);
4622
4623                 le16_add_cpu(&xh->xh_count, 1);
4624                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4625                 xe->xe_name_hash = cpu_to_le32(name_hash);
4626                 xe->xe_name_len = name_len;
4627                 ocfs2_xattr_set_type(xe, xi->name_index);
4628         }
4629
4630 set_new_name_value:
4631         /* Insert the new name+value. */
4632         size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4633
4634         /*
4635          * We must make sure that the name/value pair
4636          * exists in the same block.
4637          */
4638         offs = le16_to_cpu(xh->xh_free_start);
4639         start = offs - size;
4640
4641         if (start >> inode->i_sb->s_blocksize_bits !=
4642             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4643                 offs = offs - offs % blocksize;
4644                 xh->xh_free_start = cpu_to_le16(offs);
4645         }
4646
4647         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4648         xe->xe_name_offset = cpu_to_le16(offs - size);
4649
4650         memset(val, 0, size);
4651         memcpy(val, xi->name, name_len);
4652         memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4653
4654         xe->xe_value_size = cpu_to_le64(xi->value_len);
4655         ocfs2_xattr_set_local(xe, local);
4656         xs->here = xe;
4657         le16_add_cpu(&xh->xh_free_start, -size);
4658         le16_add_cpu(&xh->xh_name_value_len, size);
4659
4660         return;
4661 }
4662
4663 /*
4664  * Set the xattr entry in the specified bucket.
4665  * The bucket is indicated by xs->bucket and it should have the enough
4666  * space for the xattr insertion.
4667  */
4668 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4669                                            handle_t *handle,
4670                                            struct ocfs2_xattr_info *xi,
4671                                            struct ocfs2_xattr_search *xs,
4672                                            u32 name_hash,
4673                                            int local)
4674 {
4675         int ret;
4676         u64 blkno;
4677
4678         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4679              (unsigned long)xi->value_len, xi->name_index,
4680              (unsigned long long)bucket_blkno(xs->bucket));
4681
4682         if (!xs->bucket->bu_bhs[1]) {
4683                 blkno = bucket_blkno(xs->bucket);
4684                 ocfs2_xattr_bucket_relse(xs->bucket);
4685                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4686                 if (ret) {
4687                         mlog_errno(ret);
4688                         goto out;
4689                 }
4690         }
4691
4692         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4693                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4694         if (ret < 0) {
4695                 mlog_errno(ret);
4696                 goto out;
4697         }
4698
4699         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4700         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4701
4702 out:
4703         return ret;
4704 }
4705
4706 /*
4707  * Truncate the specified xe_off entry in xattr bucket.
4708  * bucket is indicated by header_bh and len is the new length.
4709  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4710  *
4711  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4712  */
4713 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4714                                              struct ocfs2_xattr_bucket *bucket,
4715                                              int xe_off,
4716                                              int len,
4717                                              struct ocfs2_xattr_set_ctxt *ctxt)
4718 {
4719         int ret, offset;
4720         u64 value_blk;
4721         struct ocfs2_xattr_entry *xe;
4722         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4723         size_t blocksize = inode->i_sb->s_blocksize;
4724         struct ocfs2_xattr_value_buf vb = {
4725                 .vb_access = ocfs2_journal_access,
4726         };
4727
4728         xe = &xh->xh_entries[xe_off];
4729
4730         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4731
4732         offset = le16_to_cpu(xe->xe_name_offset) +
4733                  OCFS2_XATTR_SIZE(xe->xe_name_len);
4734
4735         value_blk = offset / blocksize;
4736
4737         /* We don't allow ocfs2_xattr_value to be stored in different block. */
4738         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4739
4740         vb.vb_bh = bucket->bu_bhs[value_blk];
4741         BUG_ON(!vb.vb_bh);
4742
4743         vb.vb_xv = (struct ocfs2_xattr_value_root *)
4744                 (vb.vb_bh->b_data + offset % blocksize);
4745
4746         /*
4747          * From here on out we have to dirty the bucket.  The generic
4748          * value calls only modify one of the bucket's bhs, but we need
4749          * to send the bucket at once.  So if they error, they *could* have
4750          * modified something.  We have to assume they did, and dirty
4751          * the whole bucket.  This leaves us in a consistent state.
4752          */
4753         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4754              xe_off, (unsigned long long)bucket_blkno(bucket), len);
4755         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4756         if (ret) {
4757                 mlog_errno(ret);
4758                 goto out;
4759         }
4760
4761         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4762                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4763         if (ret) {
4764                 mlog_errno(ret);
4765                 goto out;
4766         }
4767
4768         xe->xe_value_size = cpu_to_le64(len);
4769
4770         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4771
4772 out:
4773         return ret;
4774 }
4775
4776 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4777                                         struct ocfs2_xattr_search *xs,
4778                                         int len,
4779                                         struct ocfs2_xattr_set_ctxt *ctxt)
4780 {
4781         int ret, offset;
4782         struct ocfs2_xattr_entry *xe = xs->here;
4783         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4784
4785         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4786
4787         offset = xe - xh->xh_entries;
4788         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
4789                                                 offset, len, ctxt);
4790         if (ret)
4791                 mlog_errno(ret);
4792
4793         return ret;
4794 }
4795
4796 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4797                                                 handle_t *handle,
4798                                                 struct ocfs2_xattr_search *xs,
4799                                                 char *val,
4800                                                 int value_len)
4801 {
4802         int ret, offset, block_off;
4803         struct ocfs2_xattr_value_root *xv;
4804         struct ocfs2_xattr_entry *xe = xs->here;
4805         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4806         void *base;
4807
4808         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4809
4810         ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
4811                                                 xe - xh->xh_entries,
4812                                                 &block_off,
4813                                                 &offset);
4814         if (ret) {
4815                 mlog_errno(ret);
4816                 goto out;
4817         }
4818
4819         base = bucket_block(xs->bucket, block_off);
4820         xv = (struct ocfs2_xattr_value_root *)(base + offset +
4821                  OCFS2_XATTR_SIZE(xe->xe_name_len));
4822
4823         ret = __ocfs2_xattr_set_value_outside(inode, handle,
4824                                               xv, val, value_len);
4825         if (ret)
4826                 mlog_errno(ret);
4827 out:
4828         return ret;
4829 }
4830
4831 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4832                                   struct buffer_head *root_bh,
4833                                   u64 blkno,
4834                                   u32 cpos,
4835                                   u32 len)
4836 {
4837         int ret;
4838         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4839         struct inode *tl_inode = osb->osb_tl_inode;
4840         handle_t *handle;
4841         struct ocfs2_xattr_block *xb =
4842                         (struct ocfs2_xattr_block *)root_bh->b_data;
4843         struct ocfs2_alloc_context *meta_ac = NULL;
4844         struct ocfs2_cached_dealloc_ctxt dealloc;
4845         struct ocfs2_extent_tree et;
4846
4847         ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4848
4849         ocfs2_init_dealloc_ctxt(&dealloc);
4850
4851         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4852              cpos, len, (unsigned long long)blkno);
4853
4854         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
4855                                                len);
4856
4857         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4858         if (ret) {
4859                 mlog_errno(ret);
4860                 return ret;
4861         }
4862
4863         mutex_lock(&tl_inode->i_mutex);
4864
4865         if (ocfs2_truncate_log_needs_flush(osb)) {
4866                 ret = __ocfs2_flush_truncate_log(osb);
4867                 if (ret < 0) {
4868                         mlog_errno(ret);
4869                         goto out;
4870                 }
4871         }
4872
4873         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
4874         if (IS_ERR(handle)) {
4875                 ret = -ENOMEM;
4876                 mlog_errno(ret);
4877                 goto out;
4878         }
4879
4880         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4881                                       OCFS2_JOURNAL_ACCESS_WRITE);
4882         if (ret) {
4883                 mlog_errno(ret);
4884                 goto out_commit;
4885         }
4886
4887         ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4888                                   &dealloc);
4889         if (ret) {
4890                 mlog_errno(ret);
4891                 goto out_commit;
4892         }
4893
4894         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4895
4896         ret = ocfs2_journal_dirty(handle, root_bh);
4897         if (ret) {
4898                 mlog_errno(ret);
4899                 goto out_commit;
4900         }
4901
4902         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4903         if (ret)
4904                 mlog_errno(ret);
4905
4906 out_commit:
4907         ocfs2_commit_trans(osb, handle);
4908 out:
4909         ocfs2_schedule_truncate_log_flush(osb, 1);
4910
4911         mutex_unlock(&tl_inode->i_mutex);
4912
4913         if (meta_ac)
4914                 ocfs2_free_alloc_context(meta_ac);
4915
4916         ocfs2_run_deallocs(osb, &dealloc);
4917
4918         return ret;
4919 }
4920
4921 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4922                                          handle_t *handle,
4923                                          struct ocfs2_xattr_search *xs)
4924 {
4925         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4926         struct ocfs2_xattr_entry *last = &xh->xh_entries[
4927                                                 le16_to_cpu(xh->xh_count) - 1];
4928         int ret = 0;
4929
4930         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4931                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4932         if (ret) {
4933                 mlog_errno(ret);
4934                 return;
4935         }
4936
4937         /* Remove the old entry. */
4938         memmove(xs->here, xs->here + 1,
4939                 (void *)last - (void *)xs->here);
4940         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4941         le16_add_cpu(&xh->xh_count, -1);
4942
4943         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4944 }
4945
4946 /*
4947  * Set the xattr name/value in the bucket specified in xs.
4948  *
4949  * As the new value in xi may be stored in the bucket or in an outside cluster,
4950  * we divide the whole process into 3 steps:
4951  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4952  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4953  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4954  * 4. If the clusters for the new outside value can't be allocated, we need
4955  *    to free the xattr we allocated in set.
4956  */
4957 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4958                                      struct ocfs2_xattr_info *xi,
4959                                      struct ocfs2_xattr_search *xs,
4960                                      struct ocfs2_xattr_set_ctxt *ctxt)
4961 {
4962         int ret, local = 1;
4963         size_t value_len;
4964         char *val = (char *)xi->value;
4965         struct ocfs2_xattr_entry *xe = xs->here;
4966         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4967                                               strlen(xi->name));
4968
4969         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4970                 /*
4971                  * We need to truncate the xattr storage first.
4972                  *
4973                  * If both the old and new value are stored to
4974                  * outside block, we only need to truncate
4975                  * the storage and then set the value outside.
4976                  *
4977                  * If the new value should be stored within block,
4978                  * we should free all the outside block first and
4979                  * the modification to the xattr block will be done
4980                  * by following steps.
4981                  */
4982                 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4983                         value_len = xi->value_len;
4984                 else
4985                         value_len = 0;
4986
4987                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4988                                                            value_len,
4989                                                            ctxt);
4990                 if (ret)
4991                         goto out;
4992
4993                 if (value_len)
4994                         goto set_value_outside;
4995         }
4996
4997         value_len = xi->value_len;
4998         /* So we have to handle the inside block change now. */
4999         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
5000                 /*
5001                  * If the new value will be stored outside of block,
5002                  * initalize a new empty value root and insert it first.
5003                  */
5004                 local = 0;
5005                 xi->value = &def_xv;
5006                 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
5007         }
5008
5009         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5010                                               name_hash, local);
5011         if (ret) {
5012                 mlog_errno(ret);
5013                 goto out;
5014         }
5015
5016         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5017                 goto out;
5018
5019         /* allocate the space now for the outside block storage. */
5020         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5021                                                    value_len, ctxt);
5022         if (ret) {
5023                 mlog_errno(ret);
5024
5025                 if (xs->not_found) {
5026                         /*
5027                          * We can't allocate enough clusters for outside
5028                          * storage and we have allocated xattr already,
5029                          * so need to remove it.
5030                          */
5031                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5032                 }
5033                 goto out;
5034         }
5035
5036 set_value_outside:
5037         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5038                                                    xs, val, value_len);
5039 out:
5040         return ret;
5041 }
5042
5043 /*
5044  * check whether the xattr bucket is filled up with the same hash value.
5045  * If we want to insert the xattr with the same hash, return -ENOSPC.
5046  * If we want to insert a xattr with different hash value, go ahead
5047  * and ocfs2_divide_xattr_bucket will handle this.
5048  */
5049 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5050                                               struct ocfs2_xattr_bucket *bucket,
5051                                               const char *name)
5052 {
5053         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5054         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5055
5056         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5057                 return 0;
5058
5059         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5060             xh->xh_entries[0].xe_name_hash) {
5061                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5062                      "hash = %u\n",
5063                      (unsigned long long)bucket_blkno(bucket),
5064                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5065                 return -ENOSPC;
5066         }
5067
5068         return 0;
5069 }
5070
5071 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5072                                              struct ocfs2_xattr_info *xi,
5073                                              struct ocfs2_xattr_search *xs,
5074                                              struct ocfs2_xattr_set_ctxt *ctxt)
5075 {
5076         struct ocfs2_xattr_header *xh;
5077         struct ocfs2_xattr_entry *xe;
5078         u16 count, header_size, xh_free_start;
5079         int free, max_free, need, old;
5080         size_t value_size = 0, name_len = strlen(xi->name);
5081         size_t blocksize = inode->i_sb->s_blocksize;
5082         int ret, allocation = 0;
5083
5084         mlog_entry("Set xattr %s in xattr index block\n", xi->name);
5085
5086 try_again:
5087         xh = xs->header;
5088         count = le16_to_cpu(xh->xh_count);
5089         xh_free_start = le16_to_cpu(xh->xh_free_start);
5090         header_size = sizeof(struct ocfs2_xattr_header) +
5091                         count * sizeof(struct ocfs2_xattr_entry);
5092         max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5093                 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5094
5095         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5096                         "of %u which exceed block size\n",
5097                         (unsigned long long)bucket_blkno(xs->bucket),
5098                         header_size);
5099
5100         if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5101                 value_size = OCFS2_XATTR_ROOT_SIZE;
5102         else if (xi->value)
5103                 value_size = OCFS2_XATTR_SIZE(xi->value_len);
5104
5105         if (xs->not_found)
5106                 need = sizeof(struct ocfs2_xattr_entry) +
5107                         OCFS2_XATTR_SIZE(name_len) + value_size;
5108         else {
5109                 need = value_size + OCFS2_XATTR_SIZE(name_len);
5110
5111                 /*
5112                  * We only replace the old value if the new length is smaller
5113                  * than the old one. Otherwise we will allocate new space in the
5114                  * bucket to store it.
5115                  */
5116                 xe = xs->here;
5117                 if (ocfs2_xattr_is_local(xe))
5118                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5119                 else
5120                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5121
5122                 if (old >= value_size)
5123                         need = 0;
5124         }
5125
5126         free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5127         /*
5128          * We need to make sure the new name/value pair
5129          * can exist in the same block.
5130          */
5131         if (xh_free_start % blocksize < need)
5132                 free -= xh_free_start % blocksize;
5133
5134         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5135              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5136              " %u\n", xs->not_found,
5137              (unsigned long long)bucket_blkno(xs->bucket),
5138              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5139              le16_to_cpu(xh->xh_name_value_len));
5140
5141         if (free < need ||
5142             (xs->not_found &&
5143              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5144                 if (need <= max_free &&
5145                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5146                         /*
5147                          * We can create the space by defragment. Since only the
5148                          * name/value will be moved, the xe shouldn't be changed
5149                          * in xs.
5150                          */
5151                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5152                                                         xs->bucket);
5153                         if (ret) {
5154                                 mlog_errno(ret);
5155                                 goto out;
5156                         }
5157
5158                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5159                         free = xh_free_start - header_size
5160                                 - OCFS2_XATTR_HEADER_GAP;
5161                         if (xh_free_start % blocksize < need)
5162                                 free -= xh_free_start % blocksize;
5163
5164                         if (free >= need)
5165                                 goto xattr_set;
5166
5167                         mlog(0, "Can't get enough space for xattr insert by "
5168                              "defragment. Need %u bytes, but we have %d, so "
5169                              "allocate new bucket for it.\n", need, free);
5170                 }
5171
5172                 /*
5173                  * We have to add new buckets or clusters and one
5174                  * allocation should leave us enough space for insert.
5175                  */
5176                 BUG_ON(allocation);
5177
5178                 /*
5179                  * We do not allow for overlapping ranges between buckets. And
5180                  * the maximum number of collisions we will allow for then is
5181                  * one bucket's worth, so check it here whether we need to
5182                  * add a new bucket for the insert.
5183                  */
5184                 ret = ocfs2_check_xattr_bucket_collision(inode,
5185                                                          xs->bucket,
5186                                                          xi->name);
5187                 if (ret) {
5188                         mlog_errno(ret);
5189                         goto out;
5190                 }
5191
5192                 ret = ocfs2_add_new_xattr_bucket(inode,
5193                                                  xs->xattr_bh,
5194                                                  xs->bucket,
5195                                                  ctxt);
5196                 if (ret) {
5197                         mlog_errno(ret);
5198                         goto out;
5199                 }
5200
5201                 /*
5202                  * ocfs2_add_new_xattr_bucket() will have updated
5203                  * xs->bucket if it moved, but it will not have updated
5204                  * any of the other search fields.  Thus, we drop it and
5205                  * re-search.  Everything should be cached, so it'll be
5206                  * quick.
5207                  */
5208                 ocfs2_xattr_bucket_relse(xs->bucket);
5209                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5210                                                    xi->name_index,
5211                                                    xi->name, xs);
5212                 if (ret && ret != -ENODATA)
5213                         goto out;
5214                 xs->not_found = ret;
5215                 allocation = 1;
5216                 goto try_again;
5217         }
5218
5219 xattr_set:
5220         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5221 out:
5222         mlog_exit(ret);
5223         return ret;
5224 }
5225
5226 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5227                                         struct ocfs2_xattr_bucket *bucket,
5228                                         void *para)
5229 {
5230         int ret = 0;
5231         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5232         u16 i;
5233         struct ocfs2_xattr_entry *xe;
5234         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5235         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5236         int credits = ocfs2_remove_extent_credits(osb->sb) +
5237                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5238
5239
5240         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5241
5242         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5243                 xe = &xh->xh_entries[i];
5244                 if (ocfs2_xattr_is_local(xe))
5245                         continue;
5246
5247                 ctxt.handle = ocfs2_start_trans(osb, credits);
5248                 if (IS_ERR(ctxt.handle)) {
5249                         ret = PTR_ERR(ctxt.handle);
5250                         mlog_errno(ret);
5251                         break;
5252                 }
5253
5254                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5255                                                         i, 0, &ctxt);
5256
5257                 ocfs2_commit_trans(osb, ctxt.handle);
5258                 if (ret) {
5259                         mlog_errno(ret);
5260                         break;
5261                 }
5262         }
5263
5264         ocfs2_schedule_truncate_log_flush(osb, 1);
5265         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5266         return ret;
5267 }
5268
5269 static int ocfs2_delete_xattr_index_block(struct inode *inode,
5270                                           struct buffer_head *xb_bh)
5271 {
5272         struct ocfs2_xattr_block *xb =
5273                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5274         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5275         int ret = 0;
5276         u32 name_hash = UINT_MAX, e_cpos, num_clusters;
5277         u64 p_blkno;
5278
5279         if (le16_to_cpu(el->l_next_free_rec) == 0)
5280                 return 0;
5281
5282         while (name_hash > 0) {
5283                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
5284                                           &e_cpos, &num_clusters, el);
5285                 if (ret) {
5286                         mlog_errno(ret);
5287                         goto out;
5288                 }
5289
5290                 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
5291                                                   ocfs2_delete_xattr_in_bucket,
5292                                                   NULL);
5293                 if (ret) {
5294                         mlog_errno(ret);
5295                         goto out;
5296                 }
5297
5298                 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
5299                                              p_blkno, e_cpos, num_clusters);
5300                 if (ret) {
5301                         mlog_errno(ret);
5302                         break;
5303                 }
5304
5305                 if (e_cpos == 0)
5306                         break;
5307
5308                 name_hash = e_cpos - 1;
5309         }
5310
5311 out:
5312         return ret;
5313 }
5314
5315 /*
5316  * 'security' attributes support
5317  */
5318 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5319                                         size_t list_size, const char *name,
5320                                         size_t name_len)
5321 {
5322         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5323         const size_t total_len = prefix_len + name_len + 1;
5324
5325         if (list && total_len <= list_size) {
5326                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5327                 memcpy(list + prefix_len, name, name_len);
5328                 list[prefix_len + name_len] = '\0';
5329         }
5330         return total_len;
5331 }
5332
5333 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5334                                     void *buffer, size_t size)
5335 {
5336         if (strcmp(name, "") == 0)
5337                 return -EINVAL;
5338         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5339                                buffer, size);
5340 }
5341
5342 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5343                                     const void *value, size_t size, int flags)
5344 {
5345         if (strcmp(name, "") == 0)
5346                 return -EINVAL;
5347
5348         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5349                                size, flags);
5350 }
5351
5352 int ocfs2_init_security_get(struct inode *inode,
5353                             struct inode *dir,
5354                             struct ocfs2_security_xattr_info *si)
5355 {
5356         /* check whether ocfs2 support feature xattr */
5357         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
5358                 return -EOPNOTSUPP;
5359         return security_inode_init_security(inode, dir, &si->name, &si->value,
5360                                             &si->value_len);
5361 }
5362
5363 int ocfs2_init_security_set(handle_t *handle,
5364                             struct inode *inode,
5365                             struct buffer_head *di_bh,
5366                             struct ocfs2_security_xattr_info *si,
5367                             struct ocfs2_alloc_context *xattr_ac,
5368                             struct ocfs2_alloc_context *data_ac)
5369 {
5370         return ocfs2_xattr_set_handle(handle, inode, di_bh,
5371                                      OCFS2_XATTR_INDEX_SECURITY,
5372                                      si->name, si->value, si->value_len, 0,
5373                                      xattr_ac, data_ac);
5374 }
5375
5376 struct xattr_handler ocfs2_xattr_security_handler = {
5377         .prefix = XATTR_SECURITY_PREFIX,
5378         .list   = ocfs2_xattr_security_list,
5379         .get    = ocfs2_xattr_security_get,
5380         .set    = ocfs2_xattr_security_set,
5381 };
5382
5383 /*
5384  * 'trusted' attributes support
5385  */
5386 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
5387                                        size_t list_size, const char *name,
5388                                        size_t name_len)
5389 {
5390         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
5391         const size_t total_len = prefix_len + name_len + 1;
5392
5393         if (list && total_len <= list_size) {
5394                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
5395                 memcpy(list + prefix_len, name, name_len);
5396                 list[prefix_len + name_len] = '\0';
5397         }
5398         return total_len;
5399 }
5400
5401 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
5402                                    void *buffer, size_t size)
5403 {
5404         if (strcmp(name, "") == 0)
5405                 return -EINVAL;
5406         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
5407                                buffer, size);
5408 }
5409
5410 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
5411                                    const void *value, size_t size, int flags)
5412 {
5413         if (strcmp(name, "") == 0)
5414                 return -EINVAL;
5415
5416         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
5417                                size, flags);
5418 }
5419
5420 struct xattr_handler ocfs2_xattr_trusted_handler = {
5421         .prefix = XATTR_TRUSTED_PREFIX,
5422         .list   = ocfs2_xattr_trusted_list,
5423         .get    = ocfs2_xattr_trusted_get,
5424         .set    = ocfs2_xattr_trusted_set,
5425 };
5426
5427 /*
5428  * 'user' attributes support
5429  */
5430 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
5431                                     size_t list_size, const char *name,
5432                                     size_t name_len)
5433 {
5434         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
5435         const size_t total_len = prefix_len + name_len + 1;
5436         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5437
5438         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5439                 return 0;
5440
5441         if (list && total_len <= list_size) {
5442                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
5443                 memcpy(list + prefix_len, name, name_len);
5444                 list[prefix_len + name_len] = '\0';
5445         }
5446         return total_len;
5447 }
5448
5449 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
5450                                 void *buffer, size_t size)
5451 {
5452         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5453
5454         if (strcmp(name, "") == 0)
5455                 return -EINVAL;
5456         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5457                 return -EOPNOTSUPP;
5458         return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
5459                                buffer, size);
5460 }
5461
5462 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
5463                                 const void *value, size_t size, int flags)
5464 {
5465         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5466
5467         if (strcmp(name, "") == 0)
5468                 return -EINVAL;
5469         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5470                 return -EOPNOTSUPP;
5471
5472         return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
5473                                size, flags);
5474 }
5475
5476 struct xattr_handler ocfs2_xattr_user_handler = {
5477         .prefix = XATTR_USER_PREFIX,
5478         .list   = ocfs2_xattr_user_list,
5479         .get    = ocfs2_xattr_user_get,
5480         .set    = ocfs2_xattr_user_set,
5481 };