855827320ff64794a33e19b6cd0f4879bdaf2f74
[safe/jmp/linux-2.6] / fs / xfs / quota / xfs_qm.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_btree.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_itable.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_bmap.h"
44 #include "xfs_rw.h"
45 #include "xfs_attr.h"
46 #include "xfs_buf_item.h"
47 #include "xfs_trans_space.h"
48 #include "xfs_utils.h"
49 #include "xfs_qm.h"
50 #include "xfs_trace.h"
51
52 /*
53  * The global quota manager. There is only one of these for the entire
54  * system, _not_ one per file system. XQM keeps track of the overall
55  * quota functionality, including maintaining the freelist and hash
56  * tables of dquots.
57  */
58 struct mutex    xfs_Gqm_lock;
59 struct xfs_qm   *xfs_Gqm;
60 uint            ndquot;
61
62 kmem_zone_t     *qm_dqzone;
63 kmem_zone_t     *qm_dqtrxzone;
64
65 static cred_t   xfs_zerocr;
66
67 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
68 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
69
70 STATIC void     xfs_qm_freelist_init(xfs_frlist_t *);
71 STATIC void     xfs_qm_freelist_destroy(xfs_frlist_t *);
72
73 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
74 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
75 STATIC int      xfs_qm_shake(int, gfp_t);
76
77 static struct shrinker xfs_qm_shaker = {
78         .shrink = xfs_qm_shake,
79         .seeks = DEFAULT_SEEKS,
80 };
81
82 #ifdef DEBUG
83 extern struct mutex     qcheck_lock;
84 #endif
85
86 #ifdef QUOTADEBUG
87 static void
88 xfs_qm_dquot_list_print(
89         struct xfs_mount *mp)
90 {
91         xfs_dquot_t     *dqp;
92         int             i = 0;
93
94         list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
95                 cmn_err(CE_DEBUG, "   %d. \"%d (%s)\"   "
96                                   "bcnt = %lld, icnt = %lld, refs = %d",
97                         i++, be32_to_cpu(dqp->q_core.d_id),
98                         DQFLAGTO_TYPESTR(dqp),
99                         (long long)be64_to_cpu(dqp->q_core.d_bcount),
100                         (long long)be64_to_cpu(dqp->q_core.d_icount),
101                         dqp->q_nrefs);
102         }
103 }
104 #else
105 static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
106 #endif
107
108 /*
109  * Initialize the XQM structure.
110  * Note that there is not one quota manager per file system.
111  */
112 STATIC struct xfs_qm *
113 xfs_Gqm_init(void)
114 {
115         xfs_dqhash_t    *udqhash, *gdqhash;
116         xfs_qm_t        *xqm;
117         size_t          hsize;
118         uint            i;
119
120         /*
121          * Initialize the dquot hash tables.
122          */
123         udqhash = kmem_zalloc_greedy(&hsize,
124                                      XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
125                                      XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
126         if (!udqhash)
127                 goto out;
128
129         gdqhash = kmem_zalloc_large(hsize);
130         if (!gdqhash)
131                 goto out_free_udqhash;
132
133         hsize /= sizeof(xfs_dqhash_t);
134         ndquot = hsize << 8;
135
136         xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
137         xqm->qm_dqhashmask = hsize - 1;
138         xqm->qm_usr_dqhtable = udqhash;
139         xqm->qm_grp_dqhtable = gdqhash;
140         ASSERT(xqm->qm_usr_dqhtable != NULL);
141         ASSERT(xqm->qm_grp_dqhtable != NULL);
142
143         for (i = 0; i < hsize; i++) {
144                 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
145                 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
146         }
147
148         /*
149          * Freelist of all dquots of all file systems
150          */
151         xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
152
153         /*
154          * dquot zone. we register our own low-memory callback.
155          */
156         if (!qm_dqzone) {
157                 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
158                                                 "xfs_dquots");
159                 qm_dqzone = xqm->qm_dqzone;
160         } else
161                 xqm->qm_dqzone = qm_dqzone;
162
163         register_shrinker(&xfs_qm_shaker);
164
165         /*
166          * The t_dqinfo portion of transactions.
167          */
168         if (!qm_dqtrxzone) {
169                 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
170                                                    "xfs_dqtrx");
171                 qm_dqtrxzone = xqm->qm_dqtrxzone;
172         } else
173                 xqm->qm_dqtrxzone = qm_dqtrxzone;
174
175         atomic_set(&xqm->qm_totaldquots, 0);
176         xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
177         xqm->qm_nrefs = 0;
178 #ifdef DEBUG
179         mutex_init(&qcheck_lock);
180 #endif
181         return xqm;
182
183  out_free_udqhash:
184         kmem_free_large(udqhash);
185  out:
186         return NULL;
187 }
188
189 /*
190  * Destroy the global quota manager when its reference count goes to zero.
191  */
192 STATIC void
193 xfs_qm_destroy(
194         struct xfs_qm   *xqm)
195 {
196         int             hsize, i;
197
198         ASSERT(xqm != NULL);
199         ASSERT(xqm->qm_nrefs == 0);
200         unregister_shrinker(&xfs_qm_shaker);
201         hsize = xqm->qm_dqhashmask + 1;
202         for (i = 0; i < hsize; i++) {
203                 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
204                 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
205         }
206         kmem_free_large(xqm->qm_usr_dqhtable);
207         kmem_free_large(xqm->qm_grp_dqhtable);
208         xqm->qm_usr_dqhtable = NULL;
209         xqm->qm_grp_dqhtable = NULL;
210         xqm->qm_dqhashmask = 0;
211         xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
212 #ifdef DEBUG
213         mutex_destroy(&qcheck_lock);
214 #endif
215         kmem_free(xqm);
216 }
217
218 /*
219  * Called at mount time to let XQM know that another file system is
220  * starting quotas. This isn't crucial information as the individual mount
221  * structures are pretty independent, but it helps the XQM keep a
222  * global view of what's going on.
223  */
224 /* ARGSUSED */
225 STATIC int
226 xfs_qm_hold_quotafs_ref(
227         struct xfs_mount *mp)
228 {
229         /*
230          * Need to lock the xfs_Gqm structure for things like this. For example,
231          * the structure could disappear between the entry to this routine and
232          * a HOLD operation if not locked.
233          */
234         mutex_lock(&xfs_Gqm_lock);
235
236         if (!xfs_Gqm) {
237                 xfs_Gqm = xfs_Gqm_init();
238                 if (!xfs_Gqm)
239                         return ENOMEM;
240         }
241
242         /*
243          * We can keep a list of all filesystems with quotas mounted for
244          * debugging and statistical purposes, but ...
245          * Just take a reference and get out.
246          */
247         xfs_Gqm->qm_nrefs++;
248         mutex_unlock(&xfs_Gqm_lock);
249
250         return 0;
251 }
252
253
254 /*
255  * Release the reference that a filesystem took at mount time,
256  * so that we know when we need to destroy the entire quota manager.
257  */
258 /* ARGSUSED */
259 STATIC void
260 xfs_qm_rele_quotafs_ref(
261         struct xfs_mount *mp)
262 {
263         xfs_dquot_t     *dqp, *nextdqp;
264
265         ASSERT(xfs_Gqm);
266         ASSERT(xfs_Gqm->qm_nrefs > 0);
267
268         /*
269          * Go thru the freelist and destroy all inactive dquots.
270          */
271         xfs_qm_freelist_lock(xfs_Gqm);
272
273         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
274              dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
275                 xfs_dqlock(dqp);
276                 nextdqp = dqp->dq_flnext;
277                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
278                         ASSERT(dqp->q_mount == NULL);
279                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
280                         ASSERT(dqp->HL_PREVP == NULL);
281                         ASSERT(list_empty(&dqp->q_mplist));
282                         XQM_FREELIST_REMOVE(dqp);
283                         xfs_dqunlock(dqp);
284                         xfs_qm_dqdestroy(dqp);
285                 } else {
286                         xfs_dqunlock(dqp);
287                 }
288                 dqp = nextdqp;
289         }
290         xfs_qm_freelist_unlock(xfs_Gqm);
291
292         /*
293          * Destroy the entire XQM. If somebody mounts with quotaon, this'll
294          * be restarted.
295          */
296         mutex_lock(&xfs_Gqm_lock);
297         if (--xfs_Gqm->qm_nrefs == 0) {
298                 xfs_qm_destroy(xfs_Gqm);
299                 xfs_Gqm = NULL;
300         }
301         mutex_unlock(&xfs_Gqm_lock);
302 }
303
304 /*
305  * Just destroy the quotainfo structure.
306  */
307 void
308 xfs_qm_unmount(
309         struct xfs_mount        *mp)
310 {
311         if (mp->m_quotainfo) {
312                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
313                 xfs_qm_destroy_quotainfo(mp);
314         }
315 }
316
317
318 /*
319  * This is called from xfs_mountfs to start quotas and initialize all
320  * necessary data structures like quotainfo.  This is also responsible for
321  * running a quotacheck as necessary.  We are guaranteed that the superblock
322  * is consistently read in at this point.
323  *
324  * If we fail here, the mount will continue with quota turned off. We don't
325  * need to inidicate success or failure at all.
326  */
327 void
328 xfs_qm_mount_quotas(
329         xfs_mount_t     *mp)
330 {
331         int             error = 0;
332         uint            sbf;
333
334         /*
335          * If quotas on realtime volumes is not supported, we disable
336          * quotas immediately.
337          */
338         if (mp->m_sb.sb_rextents) {
339                 cmn_err(CE_NOTE,
340                         "Cannot turn on quotas for realtime filesystem %s",
341                         mp->m_fsname);
342                 mp->m_qflags = 0;
343                 goto write_changes;
344         }
345
346         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
347
348         /*
349          * Allocate the quotainfo structure inside the mount struct, and
350          * create quotainode(s), and change/rev superblock if necessary.
351          */
352         error = xfs_qm_init_quotainfo(mp);
353         if (error) {
354                 /*
355                  * We must turn off quotas.
356                  */
357                 ASSERT(mp->m_quotainfo == NULL);
358                 mp->m_qflags = 0;
359                 goto write_changes;
360         }
361         /*
362          * If any of the quotas are not consistent, do a quotacheck.
363          */
364         if (XFS_QM_NEED_QUOTACHECK(mp)) {
365                 error = xfs_qm_quotacheck(mp);
366                 if (error) {
367                         /* Quotacheck failed and disabled quotas. */
368                         return;
369                 }
370         }
371         /* 
372          * If one type of quotas is off, then it will lose its
373          * quotachecked status, since we won't be doing accounting for
374          * that type anymore.
375          */
376         if (!XFS_IS_UQUOTA_ON(mp))
377                 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
378         if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
379                 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
380
381  write_changes:
382         /*
383          * We actually don't have to acquire the m_sb_lock at all.
384          * This can only be called from mount, and that's single threaded. XXX
385          */
386         spin_lock(&mp->m_sb_lock);
387         sbf = mp->m_sb.sb_qflags;
388         mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
389         spin_unlock(&mp->m_sb_lock);
390
391         if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
392                 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
393                         /*
394                          * We could only have been turning quotas off.
395                          * We aren't in very good shape actually because
396                          * the incore structures are convinced that quotas are
397                          * off, but the on disk superblock doesn't know that !
398                          */
399                         ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
400                         xfs_fs_cmn_err(CE_ALERT, mp,
401                                 "XFS mount_quotas: Superblock update failed!");
402                 }
403         }
404
405         if (error) {
406                 xfs_fs_cmn_err(CE_WARN, mp,
407                         "Failed to initialize disk quotas.");
408                 return;
409         }
410
411 #ifdef QUOTADEBUG
412         if (XFS_IS_QUOTA_ON(mp))
413                 xfs_qm_internalqcheck(mp);
414 #endif
415 }
416
417 /*
418  * Called from the vfsops layer.
419  */
420 void
421 xfs_qm_unmount_quotas(
422         xfs_mount_t     *mp)
423 {
424         /*
425          * Release the dquots that root inode, et al might be holding,
426          * before we flush quotas and blow away the quotainfo structure.
427          */
428         ASSERT(mp->m_rootip);
429         xfs_qm_dqdetach(mp->m_rootip);
430         if (mp->m_rbmip)
431                 xfs_qm_dqdetach(mp->m_rbmip);
432         if (mp->m_rsumip)
433                 xfs_qm_dqdetach(mp->m_rsumip);
434
435         /*
436          * Release the quota inodes.
437          */
438         if (mp->m_quotainfo) {
439                 if (mp->m_quotainfo->qi_uquotaip) {
440                         IRELE(mp->m_quotainfo->qi_uquotaip);
441                         mp->m_quotainfo->qi_uquotaip = NULL;
442                 }
443                 if (mp->m_quotainfo->qi_gquotaip) {
444                         IRELE(mp->m_quotainfo->qi_gquotaip);
445                         mp->m_quotainfo->qi_gquotaip = NULL;
446                 }
447         }
448 }
449
450 /*
451  * Flush all dquots of the given file system to disk. The dquots are
452  * _not_ purged from memory here, just their data written to disk.
453  */
454 STATIC int
455 xfs_qm_dqflush_all(
456         xfs_mount_t     *mp,
457         int             sync_mode)
458 {
459         int             recl;
460         xfs_dquot_t     *dqp;
461         int             niters;
462         int             error;
463
464         if (mp->m_quotainfo == NULL)
465                 return 0;
466         niters = 0;
467 again:
468         mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
469         list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist, q_mplist) {
470                 xfs_dqlock(dqp);
471                 if (! XFS_DQ_IS_DIRTY(dqp)) {
472                         xfs_dqunlock(dqp);
473                         continue;
474                 }
475
476                 /* XXX a sentinel would be better */
477                 recl = mp->m_quotainfo->qi_dqreclaims;
478                 if (!xfs_dqflock_nowait(dqp)) {
479                         /*
480                          * If we can't grab the flush lock then check
481                          * to see if the dquot has been flushed delayed
482                          * write.  If so, grab its buffer and send it
483                          * out immediately.  We'll be able to acquire
484                          * the flush lock when the I/O completes.
485                          */
486                         xfs_qm_dqflock_pushbuf_wait(dqp);
487                 }
488                 /*
489                  * Let go of the mplist lock. We don't want to hold it
490                  * across a disk write.
491                  */
492                 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
493                 error = xfs_qm_dqflush(dqp, sync_mode);
494                 xfs_dqunlock(dqp);
495                 if (error)
496                         return error;
497
498                 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
499                 if (recl != mp->m_quotainfo->qi_dqreclaims) {
500                         mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
501                         /* XXX restart limit */
502                         goto again;
503                 }
504         }
505
506         mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
507         /* return ! busy */
508         return 0;
509 }
510 /*
511  * Release the group dquot pointers the user dquots may be
512  * carrying around as a hint. mplist is locked on entry and exit.
513  */
514 STATIC void
515 xfs_qm_detach_gdquots(
516         xfs_mount_t     *mp)
517 {
518         xfs_dquot_t     *dqp, *gdqp;
519         int             nrecl;
520
521  again:
522         ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
523         list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist, q_mplist) {
524                 xfs_dqlock(dqp);
525                 if ((gdqp = dqp->q_gdquot)) {
526                         xfs_dqlock(gdqp);
527                         dqp->q_gdquot = NULL;
528                 }
529                 xfs_dqunlock(dqp);
530
531                 if (gdqp) {
532                         /*
533                          * Can't hold the mplist lock across a dqput.
534                          * XXXmust convert to marker based iterations here.
535                          */
536                         nrecl = mp->m_quotainfo->qi_dqreclaims;
537                         mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
538                         xfs_qm_dqput(gdqp);
539
540                         mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
541                         if (nrecl != mp->m_quotainfo->qi_dqreclaims)
542                                 goto again;
543                 }
544         }
545 }
546
547 /*
548  * Go through all the incore dquots of this file system and take them
549  * off the mplist and hashlist, if the dquot type matches the dqtype
550  * parameter. This is used when turning off quota accounting for
551  * users and/or groups, as well as when the filesystem is unmounting.
552  */
553 STATIC int
554 xfs_qm_dqpurge_int(
555         xfs_mount_t     *mp,
556         uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
557 {
558         xfs_dquot_t     *dqp, *n;
559         uint            dqtype;
560         int             nrecl;
561         int             nmisses;
562
563         if (mp->m_quotainfo == NULL)
564                 return 0;
565
566         dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
567         dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
568         dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
569
570         mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
571
572         /*
573          * In the first pass through all incore dquots of this filesystem,
574          * we release the group dquot pointers the user dquots may be
575          * carrying around as a hint. We need to do this irrespective of
576          * what's being turned off.
577          */
578         xfs_qm_detach_gdquots(mp);
579
580       again:
581         nmisses = 0;
582         ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
583         /*
584          * Try to get rid of all of the unwanted dquots. The idea is to
585          * get them off mplist and hashlist, but leave them on freelist.
586          */
587         list_for_each_entry_safe(dqp, n, &mp->m_quotainfo->qi_dqlist, q_mplist) {
588                 /*
589                  * It's OK to look at the type without taking dqlock here.
590                  * We're holding the mplist lock here, and that's needed for
591                  * a dqreclaim.
592                  */
593                 if ((dqp->dq_flags & dqtype) == 0)
594                         continue;
595
596                 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
597                         nrecl = mp->m_quotainfo->qi_dqreclaims;
598                         mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
599                         mutex_lock(&dqp->q_hash->qh_lock);
600                         mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
601
602                         /*
603                          * XXXTheoretically, we can get into a very long
604                          * ping pong game here.
605                          * No one can be adding dquots to the mplist at
606                          * this point, but somebody might be taking things off.
607                          */
608                         if (nrecl != mp->m_quotainfo->qi_dqreclaims) {
609                                 mutex_unlock(&dqp->q_hash->qh_lock);
610                                 goto again;
611                         }
612                 }
613
614                 /*
615                  * Take the dquot off the mplist and hashlist. It may remain on
616                  * freelist in INACTIVE state.
617                  */
618                 nmisses += xfs_qm_dqpurge(dqp);
619         }
620         mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
621         return nmisses;
622 }
623
624 int
625 xfs_qm_dqpurge_all(
626         xfs_mount_t     *mp,
627         uint            flags)
628 {
629         int             ndquots;
630
631         /*
632          * Purge the dquot cache.
633          * None of the dquots should really be busy at this point.
634          */
635         if (mp->m_quotainfo) {
636                 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
637                         delay(ndquots * 10);
638                 }
639         }
640         return 0;
641 }
642
643 STATIC int
644 xfs_qm_dqattach_one(
645         xfs_inode_t     *ip,
646         xfs_dqid_t      id,
647         uint            type,
648         uint            doalloc,
649         xfs_dquot_t     *udqhint, /* hint */
650         xfs_dquot_t     **IO_idqpp)
651 {
652         xfs_dquot_t     *dqp;
653         int             error;
654
655         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
656         error = 0;
657
658         /*
659          * See if we already have it in the inode itself. IO_idqpp is
660          * &i_udquot or &i_gdquot. This made the code look weird, but
661          * made the logic a lot simpler.
662          */
663         dqp = *IO_idqpp;
664         if (dqp) {
665                 trace_xfs_dqattach_found(dqp);
666                 return 0;
667         }
668
669         /*
670          * udqhint is the i_udquot field in inode, and is non-NULL only
671          * when the type arg is group/project. Its purpose is to save a
672          * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
673          * the user dquot.
674          */
675         if (udqhint) {
676                 ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
677                 xfs_dqlock(udqhint);
678
679                 /*
680                  * No need to take dqlock to look at the id.
681                  *
682                  * The ID can't change until it gets reclaimed, and it won't
683                  * be reclaimed as long as we have a ref from inode and we
684                  * hold the ilock.
685                  */
686                 dqp = udqhint->q_gdquot;
687                 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
688                         xfs_dqlock(dqp);
689                         XFS_DQHOLD(dqp);
690                         ASSERT(*IO_idqpp == NULL);
691                         *IO_idqpp = dqp;
692
693                         xfs_dqunlock(dqp);
694                         xfs_dqunlock(udqhint);
695                         return 0;
696                 }
697
698                 /*
699                  * We can't hold a dquot lock when we call the dqget code.
700                  * We'll deadlock in no time, because of (not conforming to)
701                  * lock ordering - the inodelock comes before any dquot lock,
702                  * and we may drop and reacquire the ilock in xfs_qm_dqget().
703                  */
704                 xfs_dqunlock(udqhint);
705         }
706
707         /*
708          * Find the dquot from somewhere. This bumps the
709          * reference count of dquot and returns it locked.
710          * This can return ENOENT if dquot didn't exist on
711          * disk and we didn't ask it to allocate;
712          * ESRCH if quotas got turned off suddenly.
713          */
714         error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
715         if (error)
716                 return error;
717
718         trace_xfs_dqattach_get(dqp);
719
720         /*
721          * dqget may have dropped and re-acquired the ilock, but it guarantees
722          * that the dquot returned is the one that should go in the inode.
723          */
724         *IO_idqpp = dqp;
725         xfs_dqunlock(dqp);
726         return 0;
727 }
728
729
730 /*
731  * Given a udquot and gdquot, attach a ptr to the group dquot in the
732  * udquot as a hint for future lookups. The idea sounds simple, but the
733  * execution isn't, because the udquot might have a group dquot attached
734  * already and getting rid of that gets us into lock ordering constraints.
735  * The process is complicated more by the fact that the dquots may or may not
736  * be locked on entry.
737  */
738 STATIC void
739 xfs_qm_dqattach_grouphint(
740         xfs_dquot_t     *udq,
741         xfs_dquot_t     *gdq)
742 {
743         xfs_dquot_t     *tmp;
744
745         xfs_dqlock(udq);
746
747         if ((tmp = udq->q_gdquot)) {
748                 if (tmp == gdq) {
749                         xfs_dqunlock(udq);
750                         return;
751                 }
752
753                 udq->q_gdquot = NULL;
754                 /*
755                  * We can't keep any dqlocks when calling dqrele,
756                  * because the freelist lock comes before dqlocks.
757                  */
758                 xfs_dqunlock(udq);
759                 /*
760                  * we took a hard reference once upon a time in dqget,
761                  * so give it back when the udquot no longer points at it
762                  * dqput() does the unlocking of the dquot.
763                  */
764                 xfs_qm_dqrele(tmp);
765
766                 xfs_dqlock(udq);
767                 xfs_dqlock(gdq);
768
769         } else {
770                 ASSERT(XFS_DQ_IS_LOCKED(udq));
771                 xfs_dqlock(gdq);
772         }
773
774         ASSERT(XFS_DQ_IS_LOCKED(udq));
775         ASSERT(XFS_DQ_IS_LOCKED(gdq));
776         /*
777          * Somebody could have attached a gdquot here,
778          * when we dropped the uqlock. If so, just do nothing.
779          */
780         if (udq->q_gdquot == NULL) {
781                 XFS_DQHOLD(gdq);
782                 udq->q_gdquot = gdq;
783         }
784
785         xfs_dqunlock(gdq);
786         xfs_dqunlock(udq);
787 }
788
789
790 /*
791  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
792  * into account.
793  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
794  * Inode may get unlocked and relocked in here, and the caller must deal with
795  * the consequences.
796  */
797 int
798 xfs_qm_dqattach_locked(
799         xfs_inode_t     *ip,
800         uint            flags)
801 {
802         xfs_mount_t     *mp = ip->i_mount;
803         uint            nquotas = 0;
804         int             error = 0;
805
806         if (!XFS_IS_QUOTA_RUNNING(mp) ||
807             !XFS_IS_QUOTA_ON(mp) ||
808             !XFS_NOT_DQATTACHED(mp, ip) ||
809             ip->i_ino == mp->m_sb.sb_uquotino ||
810             ip->i_ino == mp->m_sb.sb_gquotino)
811                 return 0;
812
813         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
814
815         if (XFS_IS_UQUOTA_ON(mp)) {
816                 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
817                                                 flags & XFS_QMOPT_DQALLOC,
818                                                 NULL, &ip->i_udquot);
819                 if (error)
820                         goto done;
821                 nquotas++;
822         }
823
824         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
825         if (XFS_IS_OQUOTA_ON(mp)) {
826                 error = XFS_IS_GQUOTA_ON(mp) ?
827                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
828                                                 flags & XFS_QMOPT_DQALLOC,
829                                                 ip->i_udquot, &ip->i_gdquot) :
830                         xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
831                                                 flags & XFS_QMOPT_DQALLOC,
832                                                 ip->i_udquot, &ip->i_gdquot);
833                 /*
834                  * Don't worry about the udquot that we may have
835                  * attached above. It'll get detached, if not already.
836                  */
837                 if (error)
838                         goto done;
839                 nquotas++;
840         }
841
842         /*
843          * Attach this group quota to the user quota as a hint.
844          * This WON'T, in general, result in a thrash.
845          */
846         if (nquotas == 2) {
847                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
848                 ASSERT(ip->i_udquot);
849                 ASSERT(ip->i_gdquot);
850
851                 /*
852                  * We may or may not have the i_udquot locked at this point,
853                  * but this check is OK since we don't depend on the i_gdquot to
854                  * be accurate 100% all the time. It is just a hint, and this
855                  * will succeed in general.
856                  */
857                 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
858                         goto done;
859                 /*
860                  * Attach i_gdquot to the gdquot hint inside the i_udquot.
861                  */
862                 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
863         }
864
865  done:
866 #ifdef QUOTADEBUG
867         if (! error) {
868                 if (XFS_IS_UQUOTA_ON(mp))
869                         ASSERT(ip->i_udquot);
870                 if (XFS_IS_OQUOTA_ON(mp))
871                         ASSERT(ip->i_gdquot);
872         }
873         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
874 #endif
875         return error;
876 }
877
878 int
879 xfs_qm_dqattach(
880         struct xfs_inode        *ip,
881         uint                    flags)
882 {
883         int                     error;
884
885         xfs_ilock(ip, XFS_ILOCK_EXCL);
886         error = xfs_qm_dqattach_locked(ip, flags);
887         xfs_iunlock(ip, XFS_ILOCK_EXCL);
888
889         return error;
890 }
891
892 /*
893  * Release dquots (and their references) if any.
894  * The inode should be locked EXCL except when this's called by
895  * xfs_ireclaim.
896  */
897 void
898 xfs_qm_dqdetach(
899         xfs_inode_t     *ip)
900 {
901         if (!(ip->i_udquot || ip->i_gdquot))
902                 return;
903
904         trace_xfs_dquot_dqdetach(ip);
905
906         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
907         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
908         if (ip->i_udquot) {
909                 xfs_qm_dqrele(ip->i_udquot);
910                 ip->i_udquot = NULL;
911         }
912         if (ip->i_gdquot) {
913                 xfs_qm_dqrele(ip->i_gdquot);
914                 ip->i_gdquot = NULL;
915         }
916 }
917
918 int
919 xfs_qm_sync(
920         xfs_mount_t     *mp,
921         int             flags)
922 {
923         int             recl, restarts;
924         xfs_dquot_t     *dqp;
925         int             error;
926
927         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
928                 return 0;
929
930         restarts = 0;
931
932   again:
933         mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
934         /*
935          * dqpurge_all() also takes the mplist lock and iterate thru all dquots
936          * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
937          * when we have the mplist lock, we know that dquots will be consistent
938          * as long as we have it locked.
939          */
940         if (!XFS_IS_QUOTA_ON(mp)) {
941                 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
942                 return 0;
943         }
944         ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
945         list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist, q_mplist) {
946                 /*
947                  * If this is vfs_sync calling, then skip the dquots that
948                  * don't 'seem' to be dirty. ie. don't acquire dqlock.
949                  * This is very similar to what xfs_sync does with inodes.
950                  */
951                 if (flags & SYNC_TRYLOCK) {
952                         if (!XFS_DQ_IS_DIRTY(dqp))
953                                 continue;
954                         if (!xfs_qm_dqlock_nowait(dqp))
955                                 continue;
956                 } else {
957                         xfs_dqlock(dqp);
958                 }
959
960                 /*
961                  * Now, find out for sure if this dquot is dirty or not.
962                  */
963                 if (! XFS_DQ_IS_DIRTY(dqp)) {
964                         xfs_dqunlock(dqp);
965                         continue;
966                 }
967
968                 /* XXX a sentinel would be better */
969                 recl = mp->m_quotainfo->qi_dqreclaims;
970                 if (!xfs_dqflock_nowait(dqp)) {
971                         if (flags & SYNC_TRYLOCK) {
972                                 xfs_dqunlock(dqp);
973                                 continue;
974                         }
975                         /*
976                          * If we can't grab the flush lock then if the caller
977                          * really wanted us to give this our best shot, so
978                          * see if we can give a push to the buffer before we wait
979                          * on the flush lock. At this point, we know that
980                          * even though the dquot is being flushed,
981                          * it has (new) dirty data.
982                          */
983                         xfs_qm_dqflock_pushbuf_wait(dqp);
984                 }
985                 /*
986                  * Let go of the mplist lock. We don't want to hold it
987                  * across a disk write
988                  */
989                 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
990                 error = xfs_qm_dqflush(dqp, flags);
991                 xfs_dqunlock(dqp);
992                 if (error && XFS_FORCED_SHUTDOWN(mp))
993                         return 0;       /* Need to prevent umount failure */
994                 else if (error)
995                         return error;
996
997                 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
998                 if (recl != mp->m_quotainfo->qi_dqreclaims) {
999                         if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1000                                 break;
1001
1002                         mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1003                         goto again;
1004                 }
1005         }
1006
1007         mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1008         return 0;
1009 }
1010
1011 /*
1012  * The hash chains and the mplist use the same xfs_dqhash structure as
1013  * their list head, but we can take the mplist qh_lock and one of the
1014  * hash qh_locks at the same time without any problem as they aren't
1015  * related.
1016  */
1017 static struct lock_class_key xfs_quota_mplist_class;
1018
1019 /*
1020  * This initializes all the quota information that's kept in the
1021  * mount structure
1022  */
1023 STATIC int
1024 xfs_qm_init_quotainfo(
1025         xfs_mount_t     *mp)
1026 {
1027         xfs_quotainfo_t *qinf;
1028         int             error;
1029         xfs_dquot_t     *dqp;
1030
1031         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1032
1033         /*
1034          * Tell XQM that we exist as soon as possible.
1035          */
1036         if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1037                 return error;
1038         }
1039
1040         qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1041
1042         /*
1043          * See if quotainodes are setup, and if not, allocate them,
1044          * and change the superblock accordingly.
1045          */
1046         if ((error = xfs_qm_init_quotainos(mp))) {
1047                 kmem_free(qinf);
1048                 mp->m_quotainfo = NULL;
1049                 return error;
1050         }
1051
1052         INIT_LIST_HEAD(&qinf->qi_dqlist);
1053         mutex_init(&qinf->qi_dqlist_lock);
1054         lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
1055
1056         qinf->qi_dqreclaims = 0;
1057
1058         /* mutex used to serialize quotaoffs */
1059         mutex_init(&qinf->qi_quotaofflock);
1060
1061         /* Precalc some constants */
1062         qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1063         ASSERT(qinf->qi_dqchunklen);
1064         qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1065         do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1066
1067         mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1068
1069         /*
1070          * We try to get the limits from the superuser's limits fields.
1071          * This is quite hacky, but it is standard quota practice.
1072          * We look at the USR dquot with id == 0 first, but if user quotas
1073          * are not enabled we goto the GRP dquot with id == 0.
1074          * We don't really care to keep separate default limits for user
1075          * and group quotas, at least not at this point.
1076          */
1077         error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1078                              XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
1079                              (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1080                                 XFS_DQ_PROJ),
1081                              XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1082                              &dqp);
1083         if (! error) {
1084                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1085
1086                 /*
1087                  * The warnings and timers set the grace period given to
1088                  * a user or group before he or she can not perform any
1089                  * more writing. If it is zero, a default is used.
1090                  */
1091                 qinf->qi_btimelimit = ddqp->d_btimer ?
1092                         be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
1093                 qinf->qi_itimelimit = ddqp->d_itimer ?
1094                         be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
1095                 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
1096                         be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
1097                 qinf->qi_bwarnlimit = ddqp->d_bwarns ?
1098                         be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
1099                 qinf->qi_iwarnlimit = ddqp->d_iwarns ?
1100                         be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
1101                 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
1102                         be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
1103                 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
1104                 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
1105                 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
1106                 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
1107                 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1108                 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1109  
1110                 /*
1111                  * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1112                  * we don't want this dquot cached. We haven't done a
1113                  * quotacheck yet, and quotacheck doesn't like incore dquots.
1114                  */
1115                 xfs_qm_dqdestroy(dqp);
1116         } else {
1117                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1118                 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1119                 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1120                 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1121                 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1122                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1123         }
1124
1125         return 0;
1126 }
1127
1128
1129 /*
1130  * Gets called when unmounting a filesystem or when all quotas get
1131  * turned off.
1132  * This purges the quota inodes, destroys locks and frees itself.
1133  */
1134 void
1135 xfs_qm_destroy_quotainfo(
1136         xfs_mount_t     *mp)
1137 {
1138         xfs_quotainfo_t *qi;
1139
1140         qi = mp->m_quotainfo;
1141         ASSERT(qi != NULL);
1142         ASSERT(xfs_Gqm != NULL);
1143
1144         /*
1145          * Release the reference that XQM kept, so that we know
1146          * when the XQM structure should be freed. We cannot assume
1147          * that xfs_Gqm is non-null after this point.
1148          */
1149         xfs_qm_rele_quotafs_ref(mp);
1150
1151         ASSERT(list_empty(&qi->qi_dqlist));
1152         mutex_destroy(&qi->qi_dqlist_lock);
1153
1154         if (qi->qi_uquotaip) {
1155                 IRELE(qi->qi_uquotaip);
1156                 qi->qi_uquotaip = NULL; /* paranoia */
1157         }
1158         if (qi->qi_gquotaip) {
1159                 IRELE(qi->qi_gquotaip);
1160                 qi->qi_gquotaip = NULL;
1161         }
1162         mutex_destroy(&qi->qi_quotaofflock);
1163         kmem_free(qi);
1164         mp->m_quotainfo = NULL;
1165 }
1166
1167
1168
1169 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1170
1171 /* ARGSUSED */
1172 STATIC void
1173 xfs_qm_list_init(
1174         xfs_dqlist_t    *list,
1175         char            *str,
1176         int             n)
1177 {
1178         mutex_init(&list->qh_lock);
1179         list->qh_next = NULL;
1180         list->qh_version = 0;
1181         list->qh_nelems = 0;
1182 }
1183
1184 STATIC void
1185 xfs_qm_list_destroy(
1186         xfs_dqlist_t    *list)
1187 {
1188         mutex_destroy(&(list->qh_lock));
1189 }
1190
1191
1192 /*
1193  * Stripped down version of dqattach. This doesn't attach, or even look at the
1194  * dquots attached to the inode. The rationale is that there won't be any
1195  * attached at the time this is called from quotacheck.
1196  */
1197 STATIC int
1198 xfs_qm_dqget_noattach(
1199         xfs_inode_t     *ip,
1200         xfs_dquot_t     **O_udqpp,
1201         xfs_dquot_t     **O_gdqpp)
1202 {
1203         int             error;
1204         xfs_mount_t     *mp;
1205         xfs_dquot_t     *udqp, *gdqp;
1206
1207         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1208         mp = ip->i_mount;
1209         udqp = NULL;
1210         gdqp = NULL;
1211
1212         if (XFS_IS_UQUOTA_ON(mp)) {
1213                 ASSERT(ip->i_udquot == NULL);
1214                 /*
1215                  * We want the dquot allocated if it doesn't exist.
1216                  */
1217                 if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1218                                          XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1219                                          &udqp))) {
1220                         /*
1221                          * Shouldn't be able to turn off quotas here.
1222                          */
1223                         ASSERT(error != ESRCH);
1224                         ASSERT(error != ENOENT);
1225                         return error;
1226                 }
1227                 ASSERT(udqp);
1228         }
1229
1230         if (XFS_IS_OQUOTA_ON(mp)) {
1231                 ASSERT(ip->i_gdquot == NULL);
1232                 if (udqp)
1233                         xfs_dqunlock(udqp);
1234                 error = XFS_IS_GQUOTA_ON(mp) ?
1235                                 xfs_qm_dqget(mp, ip,
1236                                              ip->i_d.di_gid, XFS_DQ_GROUP,
1237                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1238                                              &gdqp) :
1239                                 xfs_qm_dqget(mp, ip,
1240                                              ip->i_d.di_projid, XFS_DQ_PROJ,
1241                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1242                                              &gdqp);
1243                 if (error) {
1244                         if (udqp)
1245                                 xfs_qm_dqrele(udqp);
1246                         ASSERT(error != ESRCH);
1247                         ASSERT(error != ENOENT);
1248                         return error;
1249                 }
1250                 ASSERT(gdqp);
1251
1252                 /* Reacquire the locks in the right order */
1253                 if (udqp) {
1254                         if (! xfs_qm_dqlock_nowait(udqp)) {
1255                                 xfs_dqunlock(gdqp);
1256                                 xfs_dqlock(udqp);
1257                                 xfs_dqlock(gdqp);
1258                         }
1259                 }
1260         }
1261
1262         *O_udqpp = udqp;
1263         *O_gdqpp = gdqp;
1264
1265 #ifdef QUOTADEBUG
1266         if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1267         if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1268 #endif
1269         return 0;
1270 }
1271
1272 /*
1273  * Create an inode and return with a reference already taken, but unlocked
1274  * This is how we create quota inodes
1275  */
1276 STATIC int
1277 xfs_qm_qino_alloc(
1278         xfs_mount_t     *mp,
1279         xfs_inode_t     **ip,
1280         __int64_t       sbfields,
1281         uint            flags)
1282 {
1283         xfs_trans_t     *tp;
1284         int             error;
1285         int             committed;
1286
1287         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1288         if ((error = xfs_trans_reserve(tp,
1289                                       XFS_QM_QINOCREATE_SPACE_RES(mp),
1290                                       XFS_CREATE_LOG_RES(mp), 0,
1291                                       XFS_TRANS_PERM_LOG_RES,
1292                                       XFS_CREATE_LOG_COUNT))) {
1293                 xfs_trans_cancel(tp, 0);
1294                 return error;
1295         }
1296
1297         if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
1298                                    &xfs_zerocr, 0, 1, ip, &committed))) {
1299                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1300                                  XFS_TRANS_ABORT);
1301                 return error;
1302         }
1303
1304         /*
1305          * Keep an extra reference to this quota inode. This inode is
1306          * locked exclusively and joined to the transaction already.
1307          */
1308         ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1309         IHOLD(*ip);
1310
1311         /*
1312          * Make the changes in the superblock, and log those too.
1313          * sbfields arg may contain fields other than *QUOTINO;
1314          * VERSIONNUM for example.
1315          */
1316         spin_lock(&mp->m_sb_lock);
1317         if (flags & XFS_QMOPT_SBVERSION) {
1318 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1319                 unsigned oldv = mp->m_sb.sb_versionnum;
1320 #endif
1321                 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1322                 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1323                                    XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1324                        (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1325                         XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1326
1327                 xfs_sb_version_addquota(&mp->m_sb);
1328                 mp->m_sb.sb_uquotino = NULLFSINO;
1329                 mp->m_sb.sb_gquotino = NULLFSINO;
1330
1331                 /* qflags will get updated _after_ quotacheck */
1332                 mp->m_sb.sb_qflags = 0;
1333 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1334                 cmn_err(CE_NOTE,
1335                         "Old superblock version %x, converting to %x.",
1336                         oldv, mp->m_sb.sb_versionnum);
1337 #endif
1338         }
1339         if (flags & XFS_QMOPT_UQUOTA)
1340                 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1341         else
1342                 mp->m_sb.sb_gquotino = (*ip)->i_ino;
1343         spin_unlock(&mp->m_sb_lock);
1344         xfs_mod_sb(tp, sbfields);
1345
1346         if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1347                 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1348                 return error;
1349         }
1350         return 0;
1351 }
1352
1353
1354 STATIC void
1355 xfs_qm_reset_dqcounts(
1356         xfs_mount_t     *mp,
1357         xfs_buf_t       *bp,
1358         xfs_dqid_t      id,
1359         uint            type)
1360 {
1361         xfs_disk_dquot_t        *ddq;
1362         int                     j;
1363
1364         trace_xfs_reset_dqcounts(bp, _RET_IP_);
1365
1366         /*
1367          * Reset all counters and timers. They'll be
1368          * started afresh by xfs_qm_quotacheck.
1369          */
1370 #ifdef DEBUG
1371         j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1372         do_div(j, sizeof(xfs_dqblk_t));
1373         ASSERT(XFS_QM_DQPERBLK(mp) == j);
1374 #endif
1375         ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1376         for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1377                 /*
1378                  * Do a sanity check, and if needed, repair the dqblk. Don't
1379                  * output any warnings because it's perfectly possible to
1380                  * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1381                  */
1382                 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1383                                       "xfs_quotacheck");
1384                 ddq->d_bcount = 0;
1385                 ddq->d_icount = 0;
1386                 ddq->d_rtbcount = 0;
1387                 ddq->d_btimer = 0;
1388                 ddq->d_itimer = 0;
1389                 ddq->d_rtbtimer = 0;
1390                 ddq->d_bwarns = 0;
1391                 ddq->d_iwarns = 0;
1392                 ddq->d_rtbwarns = 0;
1393                 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1394         }
1395 }
1396
1397 STATIC int
1398 xfs_qm_dqiter_bufs(
1399         xfs_mount_t     *mp,
1400         xfs_dqid_t      firstid,
1401         xfs_fsblock_t   bno,
1402         xfs_filblks_t   blkcnt,
1403         uint            flags)
1404 {
1405         xfs_buf_t       *bp;
1406         int             error;
1407         int             notcommitted;
1408         int             incr;
1409         int             type;
1410
1411         ASSERT(blkcnt > 0);
1412         notcommitted = 0;
1413         incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1414                 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1415         type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1416                 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1417         error = 0;
1418
1419         /*
1420          * Blkcnt arg can be a very big number, and might even be
1421          * larger than the log itself. So, we have to break it up into
1422          * manageable-sized transactions.
1423          * Note that we don't start a permanent transaction here; we might
1424          * not be able to get a log reservation for the whole thing up front,
1425          * and we don't really care to either, because we just discard
1426          * everything if we were to crash in the middle of this loop.
1427          */
1428         while (blkcnt--) {
1429                 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1430                               XFS_FSB_TO_DADDR(mp, bno),
1431                               (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1432                 if (error)
1433                         break;
1434
1435                 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1436                 xfs_bdwrite(mp, bp);
1437                 /*
1438                  * goto the next block.
1439                  */
1440                 bno++;
1441                 firstid += XFS_QM_DQPERBLK(mp);
1442         }
1443         return error;
1444 }
1445
1446 /*
1447  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1448  * caller supplied function for every chunk of dquots that we find.
1449  */
1450 STATIC int
1451 xfs_qm_dqiterate(
1452         xfs_mount_t     *mp,
1453         xfs_inode_t     *qip,
1454         uint            flags)
1455 {
1456         xfs_bmbt_irec_t         *map;
1457         int                     i, nmaps;       /* number of map entries */
1458         int                     error;          /* return value */
1459         xfs_fileoff_t           lblkno;
1460         xfs_filblks_t           maxlblkcnt;
1461         xfs_dqid_t              firstid;
1462         xfs_fsblock_t           rablkno;
1463         xfs_filblks_t           rablkcnt;
1464
1465         error = 0;
1466         /*
1467          * This looks racy, but we can't keep an inode lock across a
1468          * trans_reserve. But, this gets called during quotacheck, and that
1469          * happens only at mount time which is single threaded.
1470          */
1471         if (qip->i_d.di_nblocks == 0)
1472                 return 0;
1473
1474         map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1475
1476         lblkno = 0;
1477         maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1478         do {
1479                 nmaps = XFS_DQITER_MAP_SIZE;
1480                 /*
1481                  * We aren't changing the inode itself. Just changing
1482                  * some of its data. No new blocks are added here, and
1483                  * the inode is never added to the transaction.
1484                  */
1485                 xfs_ilock(qip, XFS_ILOCK_SHARED);
1486                 error = xfs_bmapi(NULL, qip, lblkno,
1487                                   maxlblkcnt - lblkno,
1488                                   XFS_BMAPI_METADATA,
1489                                   NULL,
1490                                   0, map, &nmaps, NULL, NULL);
1491                 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1492                 if (error)
1493                         break;
1494
1495                 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1496                 for (i = 0; i < nmaps; i++) {
1497                         ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1498                         ASSERT(map[i].br_blockcount);
1499
1500
1501                         lblkno += map[i].br_blockcount;
1502
1503                         if (map[i].br_startblock == HOLESTARTBLOCK)
1504                                 continue;
1505
1506                         firstid = (xfs_dqid_t) map[i].br_startoff *
1507                                 XFS_QM_DQPERBLK(mp);
1508                         /*
1509                          * Do a read-ahead on the next extent.
1510                          */
1511                         if ((i+1 < nmaps) &&
1512                             (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1513                                 rablkcnt =  map[i+1].br_blockcount;
1514                                 rablkno = map[i+1].br_startblock;
1515                                 while (rablkcnt--) {
1516                                         xfs_baread(mp->m_ddev_targp,
1517                                                XFS_FSB_TO_DADDR(mp, rablkno),
1518                                                (int)XFS_QI_DQCHUNKLEN(mp));
1519                                         rablkno++;
1520                                 }
1521                         }
1522                         /*
1523                          * Iterate thru all the blks in the extent and
1524                          * reset the counters of all the dquots inside them.
1525                          */
1526                         if ((error = xfs_qm_dqiter_bufs(mp,
1527                                                        firstid,
1528                                                        map[i].br_startblock,
1529                                                        map[i].br_blockcount,
1530                                                        flags))) {
1531                                 break;
1532                         }
1533                 }
1534
1535                 if (error)
1536                         break;
1537         } while (nmaps > 0);
1538
1539         kmem_free(map);
1540
1541         return error;
1542 }
1543
1544 /*
1545  * Called by dqusage_adjust in doing a quotacheck.
1546  * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1547  * this updates its incore copy as well as the buffer copy. This is
1548  * so that once the quotacheck is done, we can just log all the buffers,
1549  * as opposed to logging numerous updates to individual dquots.
1550  */
1551 STATIC void
1552 xfs_qm_quotacheck_dqadjust(
1553         xfs_dquot_t             *dqp,
1554         xfs_qcnt_t              nblks,
1555         xfs_qcnt_t              rtblks)
1556 {
1557         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1558
1559         trace_xfs_dqadjust(dqp);
1560
1561         /*
1562          * Adjust the inode count and the block count to reflect this inode's
1563          * resource usage.
1564          */
1565         be64_add_cpu(&dqp->q_core.d_icount, 1);
1566         dqp->q_res_icount++;
1567         if (nblks) {
1568                 be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1569                 dqp->q_res_bcount += nblks;
1570         }
1571         if (rtblks) {
1572                 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1573                 dqp->q_res_rtbcount += rtblks;
1574         }
1575
1576         /*
1577          * Set default limits, adjust timers (since we changed usages)
1578          */
1579         if (! XFS_IS_SUSER_DQUOT(dqp)) {
1580                 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1581                 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1582         }
1583
1584         dqp->dq_flags |= XFS_DQ_DIRTY;
1585 }
1586
1587 STATIC int
1588 xfs_qm_get_rtblks(
1589         xfs_inode_t     *ip,
1590         xfs_qcnt_t      *O_rtblks)
1591 {
1592         xfs_filblks_t   rtblks;                 /* total rt blks */
1593         xfs_extnum_t    idx;                    /* extent record index */
1594         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1595         xfs_extnum_t    nextents;               /* number of extent entries */
1596         int             error;
1597
1598         ASSERT(XFS_IS_REALTIME_INODE(ip));
1599         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1600         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1601                 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1602                         return error;
1603         }
1604         rtblks = 0;
1605         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1606         for (idx = 0; idx < nextents; idx++)
1607                 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1608         *O_rtblks = (xfs_qcnt_t)rtblks;
1609         return 0;
1610 }
1611
1612 /*
1613  * callback routine supplied to bulkstat(). Given an inumber, find its
1614  * dquots and update them to account for resources taken by that inode.
1615  */
1616 /* ARGSUSED */
1617 STATIC int
1618 xfs_qm_dqusage_adjust(
1619         xfs_mount_t     *mp,            /* mount point for filesystem */
1620         xfs_ino_t       ino,            /* inode number to get data for */
1621         void            __user *buffer, /* not used */
1622         int             ubsize,         /* not used */
1623         void            *private_data,  /* not used */
1624         xfs_daddr_t     bno,            /* starting block of inode cluster */
1625         int             *ubused,        /* not used */
1626         void            *dip,           /* on-disk inode pointer (not used) */
1627         int             *res)           /* result code value */
1628 {
1629         xfs_inode_t     *ip;
1630         xfs_dquot_t     *udqp, *gdqp;
1631         xfs_qcnt_t      nblks, rtblks;
1632         int             error;
1633
1634         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1635
1636         /*
1637          * rootino must have its resources accounted for, not so with the quota
1638          * inodes.
1639          */
1640         if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1641                 *res = BULKSTAT_RV_NOTHING;
1642                 return XFS_ERROR(EINVAL);
1643         }
1644
1645         /*
1646          * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1647          * interface expects the inode to be exclusively locked because that's
1648          * the case in all other instances. It's OK that we do this because
1649          * quotacheck is done only at mount time.
1650          */
1651         if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1652                 *res = BULKSTAT_RV_NOTHING;
1653                 return error;
1654         }
1655
1656         /*
1657          * Obtain the locked dquots. In case of an error (eg. allocation
1658          * fails for ENOSPC), we return the negative of the error number
1659          * to bulkstat, so that it can get propagated to quotacheck() and
1660          * making us disable quotas for the file system.
1661          */
1662         if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1663                 xfs_iput(ip, XFS_ILOCK_EXCL);
1664                 *res = BULKSTAT_RV_GIVEUP;
1665                 return error;
1666         }
1667
1668         rtblks = 0;
1669         if (! XFS_IS_REALTIME_INODE(ip)) {
1670                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1671         } else {
1672                 /*
1673                  * Walk thru the extent list and count the realtime blocks.
1674                  */
1675                 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1676                         xfs_iput(ip, XFS_ILOCK_EXCL);
1677                         if (udqp)
1678                                 xfs_qm_dqput(udqp);
1679                         if (gdqp)
1680                                 xfs_qm_dqput(gdqp);
1681                         *res = BULKSTAT_RV_GIVEUP;
1682                         return error;
1683                 }
1684                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1685         }
1686         ASSERT(ip->i_delayed_blks == 0);
1687
1688         /*
1689          * We can't release the inode while holding its dquot locks.
1690          * The inode can go into inactive and might try to acquire the dquotlocks.
1691          * So, just unlock here and do a vn_rele at the end.
1692          */
1693         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1694
1695         /*
1696          * Add the (disk blocks and inode) resources occupied by this
1697          * inode to its dquots. We do this adjustment in the incore dquot,
1698          * and also copy the changes to its buffer.
1699          * We don't care about putting these changes in a transaction
1700          * envelope because if we crash in the middle of a 'quotacheck'
1701          * we have to start from the beginning anyway.
1702          * Once we're done, we'll log all the dquot bufs.
1703          *
1704          * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1705          * and quotaoffs don't race. (Quotachecks happen at mount time only).
1706          */
1707         if (XFS_IS_UQUOTA_ON(mp)) {
1708                 ASSERT(udqp);
1709                 xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1710                 xfs_qm_dqput(udqp);
1711         }
1712         if (XFS_IS_OQUOTA_ON(mp)) {
1713                 ASSERT(gdqp);
1714                 xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1715                 xfs_qm_dqput(gdqp);
1716         }
1717         /*
1718          * Now release the inode. This will send it to 'inactive', and
1719          * possibly even free blocks.
1720          */
1721         IRELE(ip);
1722
1723         /*
1724          * Goto next inode.
1725          */
1726         *res = BULKSTAT_RV_DIDONE;
1727         return 0;
1728 }
1729
1730 /*
1731  * Walk thru all the filesystem inodes and construct a consistent view
1732  * of the disk quota world. If the quotacheck fails, disable quotas.
1733  */
1734 int
1735 xfs_qm_quotacheck(
1736         xfs_mount_t     *mp)
1737 {
1738         int             done, count, error;
1739         xfs_ino_t       lastino;
1740         size_t          structsz;
1741         xfs_inode_t     *uip, *gip;
1742         uint            flags;
1743
1744         count = INT_MAX;
1745         structsz = 1;
1746         lastino = 0;
1747         flags = 0;
1748
1749         ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1750         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1751
1752         /*
1753          * There should be no cached dquots. The (simplistic) quotacheck
1754          * algorithm doesn't like that.
1755          */
1756         ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1757
1758         cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1759
1760         /*
1761          * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1762          * their counters to zero. We need a clean slate.
1763          * We don't log our changes till later.
1764          */
1765         if ((uip = XFS_QI_UQIP(mp))) {
1766                 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1767                         goto error_return;
1768                 flags |= XFS_UQUOTA_CHKD;
1769         }
1770
1771         if ((gip = XFS_QI_GQIP(mp))) {
1772                 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1773                                         XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1774                         goto error_return;
1775                 flags |= XFS_OQUOTA_CHKD;
1776         }
1777
1778         do {
1779                 /*
1780                  * Iterate thru all the inodes in the file system,
1781                  * adjusting the corresponding dquot counters in core.
1782                  */
1783                 if ((error = xfs_bulkstat(mp, &lastino, &count,
1784                                      xfs_qm_dqusage_adjust, NULL,
1785                                      structsz, NULL, BULKSTAT_FG_IGET, &done)))
1786                         break;
1787
1788         } while (! done);
1789
1790         /*
1791          * We've made all the changes that we need to make incore.
1792          * Flush them down to disk buffers if everything was updated
1793          * successfully.
1794          */
1795         if (!error)
1796                 error = xfs_qm_dqflush_all(mp, 0);
1797
1798         /*
1799          * We can get this error if we couldn't do a dquot allocation inside
1800          * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1801          * dirty dquots that might be cached, we just want to get rid of them
1802          * and turn quotaoff. The dquots won't be attached to any of the inodes
1803          * at this point (because we intentionally didn't in dqget_noattach).
1804          */
1805         if (error) {
1806                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1807                 goto error_return;
1808         }
1809
1810         /*
1811          * We didn't log anything, because if we crashed, we'll have to
1812          * start the quotacheck from scratch anyway. However, we must make
1813          * sure that our dquot changes are secure before we put the
1814          * quotacheck'd stamp on the superblock. So, here we do a synchronous
1815          * flush.
1816          */
1817         XFS_bflush(mp->m_ddev_targp);
1818
1819         /*
1820          * If one type of quotas is off, then it will lose its
1821          * quotachecked status, since we won't be doing accounting for
1822          * that type anymore.
1823          */
1824         mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1825         mp->m_qflags |= flags;
1826
1827         xfs_qm_dquot_list_print(mp);
1828
1829  error_return:
1830         if (error) {
1831                 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1832                         "Disabling quotas.",
1833                         mp->m_fsname, error);
1834                 /*
1835                  * We must turn off quotas.
1836                  */
1837                 ASSERT(mp->m_quotainfo != NULL);
1838                 ASSERT(xfs_Gqm != NULL);
1839                 xfs_qm_destroy_quotainfo(mp);
1840                 if (xfs_mount_reset_sbqflags(mp)) {
1841                         cmn_err(CE_WARN, "XFS quotacheck %s: "
1842                                 "Failed to reset quota flags.", mp->m_fsname);
1843                 }
1844         } else {
1845                 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1846         }
1847         return (error);
1848 }
1849
1850 /*
1851  * This is called after the superblock has been read in and we're ready to
1852  * iget the quota inodes.
1853  */
1854 STATIC int
1855 xfs_qm_init_quotainos(
1856         xfs_mount_t     *mp)
1857 {
1858         xfs_inode_t     *uip, *gip;
1859         int             error;
1860         __int64_t       sbflags;
1861         uint            flags;
1862
1863         ASSERT(mp->m_quotainfo);
1864         uip = gip = NULL;
1865         sbflags = 0;
1866         flags = 0;
1867
1868         /*
1869          * Get the uquota and gquota inodes
1870          */
1871         if (xfs_sb_version_hasquota(&mp->m_sb)) {
1872                 if (XFS_IS_UQUOTA_ON(mp) &&
1873                     mp->m_sb.sb_uquotino != NULLFSINO) {
1874                         ASSERT(mp->m_sb.sb_uquotino > 0);
1875                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1876                                              0, 0, &uip, 0)))
1877                                 return XFS_ERROR(error);
1878                 }
1879                 if (XFS_IS_OQUOTA_ON(mp) &&
1880                     mp->m_sb.sb_gquotino != NULLFSINO) {
1881                         ASSERT(mp->m_sb.sb_gquotino > 0);
1882                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1883                                              0, 0, &gip, 0))) {
1884                                 if (uip)
1885                                         IRELE(uip);
1886                                 return XFS_ERROR(error);
1887                         }
1888                 }
1889         } else {
1890                 flags |= XFS_QMOPT_SBVERSION;
1891                 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1892                             XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1893         }
1894
1895         /*
1896          * Create the two inodes, if they don't exist already. The changes
1897          * made above will get added to a transaction and logged in one of
1898          * the qino_alloc calls below.  If the device is readonly,
1899          * temporarily switch to read-write to do this.
1900          */
1901         if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1902                 if ((error = xfs_qm_qino_alloc(mp, &uip,
1903                                               sbflags | XFS_SB_UQUOTINO,
1904                                               flags | XFS_QMOPT_UQUOTA)))
1905                         return XFS_ERROR(error);
1906
1907                 flags &= ~XFS_QMOPT_SBVERSION;
1908         }
1909         if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1910                 flags |= (XFS_IS_GQUOTA_ON(mp) ?
1911                                 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1912                 error = xfs_qm_qino_alloc(mp, &gip,
1913                                           sbflags | XFS_SB_GQUOTINO, flags);
1914                 if (error) {
1915                         if (uip)
1916                                 IRELE(uip);
1917
1918                         return XFS_ERROR(error);
1919                 }
1920         }
1921
1922         XFS_QI_UQIP(mp) = uip;
1923         XFS_QI_GQIP(mp) = gip;
1924
1925         return 0;
1926 }
1927
1928
1929 /*
1930  * Traverse the freelist of dquots and attempt to reclaim a maximum of
1931  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1932  * favor the lookup function ...
1933  * XXXsup merge this with qm_reclaim_one().
1934  */
1935 STATIC int
1936 xfs_qm_shake_freelist(
1937         int howmany)
1938 {
1939         int             nreclaimed;
1940         xfs_dqhash_t    *hash;
1941         xfs_dquot_t     *dqp, *nextdqp;
1942         int             restarts;
1943         int             nflushes;
1944
1945         if (howmany <= 0)
1946                 return 0;
1947
1948         nreclaimed = 0;
1949         restarts = 0;
1950         nflushes = 0;
1951
1952 #ifdef QUOTADEBUG
1953         cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
1954 #endif
1955         /* lock order is : hashchainlock, freelistlock, mplistlock */
1956  tryagain:
1957         xfs_qm_freelist_lock(xfs_Gqm);
1958
1959         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
1960              ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
1961               nreclaimed < howmany); ) {
1962                 struct xfs_mount *mp = dqp->q_mount;
1963                 xfs_dqlock(dqp);
1964
1965                 /*
1966                  * We are racing with dqlookup here. Naturally we don't
1967                  * want to reclaim a dquot that lookup wants.
1968                  */
1969                 if (dqp->dq_flags & XFS_DQ_WANT) {
1970                         xfs_dqunlock(dqp);
1971                         xfs_qm_freelist_unlock(xfs_Gqm);
1972                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1973                                 return nreclaimed;
1974                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1975                         goto tryagain;
1976                 }
1977
1978                 /*
1979                  * If the dquot is inactive, we are assured that it is
1980                  * not on the mplist or the hashlist, and that makes our
1981                  * life easier.
1982                  */
1983                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1984                         ASSERT(mp == NULL);
1985                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1986                         ASSERT(dqp->HL_PREVP == NULL);
1987                         ASSERT(list_empty(&dqp->q_mplist));
1988                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1989                         nextdqp = dqp->dq_flnext;
1990                         goto off_freelist;
1991                 }
1992
1993                 ASSERT(!list_empty(&dqp->q_mplist));
1994                 /*
1995                  * Try to grab the flush lock. If this dquot is in the process of
1996                  * getting flushed to disk, we don't want to reclaim it.
1997                  */
1998                 if (!xfs_dqflock_nowait(dqp)) {
1999                         xfs_dqunlock(dqp);
2000                         dqp = dqp->dq_flnext;
2001                         continue;
2002                 }
2003
2004                 /*
2005                  * We have the flush lock so we know that this is not in the
2006                  * process of being flushed. So, if this is dirty, flush it
2007                  * DELWRI so that we don't get a freelist infested with
2008                  * dirty dquots.
2009                  */
2010                 if (XFS_DQ_IS_DIRTY(dqp)) {
2011                         int     error;
2012
2013                         trace_xfs_dqshake_dirty(dqp);
2014
2015                         /*
2016                          * We flush it delayed write, so don't bother
2017                          * releasing the mplock.
2018                          */
2019                         error = xfs_qm_dqflush(dqp, 0);
2020                         if (error) {
2021                                 xfs_fs_cmn_err(CE_WARN, mp,
2022                         "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
2023                         }
2024                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2025                         dqp = dqp->dq_flnext;
2026                         continue;
2027                 }
2028                 /*
2029                  * We're trying to get the hashlock out of order. This races
2030                  * with dqlookup; so, we giveup and goto the next dquot if
2031                  * we couldn't get the hashlock. This way, we won't starve
2032                  * a dqlookup process that holds the hashlock that is
2033                  * waiting for the freelist lock.
2034                  */
2035                 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
2036                         xfs_dqfunlock(dqp);
2037                         xfs_dqunlock(dqp);
2038                         dqp = dqp->dq_flnext;
2039                         continue;
2040                 }
2041                 /*
2042                  * This races with dquot allocation code as well as dqflush_all
2043                  * and reclaim code. So, if we failed to grab the mplist lock,
2044                  * giveup everything and start over.
2045                  */
2046                 hash = dqp->q_hash;
2047                 ASSERT(hash);
2048                 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
2049                         /* XXX put a sentinel so that we can come back here */
2050                         xfs_dqfunlock(dqp);
2051                         xfs_dqunlock(dqp);
2052                         mutex_unlock(&hash->qh_lock);
2053                         xfs_qm_freelist_unlock(xfs_Gqm);
2054                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2055                                 return nreclaimed;
2056                         goto tryagain;
2057                 }
2058
2059                 trace_xfs_dqshake_unlink(dqp);
2060
2061 #ifdef QUOTADEBUG
2062                 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2063                         dqp, be32_to_cpu(dqp->q_core.d_id));
2064 #endif
2065                 ASSERT(dqp->q_nrefs == 0);
2066                 nextdqp = dqp->dq_flnext;
2067                 XQM_HASHLIST_REMOVE(hash, dqp);
2068                 list_del_init(&dqp->q_mplist);
2069                 mp->m_quotainfo->qi_dquots--;
2070                 mp->m_quotainfo->qi_dqreclaims++;
2071                 xfs_dqfunlock(dqp);
2072                 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
2073                 mutex_unlock(&hash->qh_lock);
2074
2075  off_freelist:
2076                 XQM_FREELIST_REMOVE(dqp);
2077                 xfs_dqunlock(dqp);
2078                 nreclaimed++;
2079                 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2080                 xfs_qm_dqdestroy(dqp);
2081                 dqp = nextdqp;
2082         }
2083         xfs_qm_freelist_unlock(xfs_Gqm);
2084         return nreclaimed;
2085 }
2086
2087
2088 /*
2089  * The kmem_shake interface is invoked when memory is running low.
2090  */
2091 /* ARGSUSED */
2092 STATIC int
2093 xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2094 {
2095         int     ndqused, nfree, n;
2096
2097         if (!kmem_shake_allow(gfp_mask))
2098                 return 0;
2099         if (!xfs_Gqm)
2100                 return 0;
2101
2102         nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2103         /* incore dquots in all f/s's */
2104         ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2105
2106         ASSERT(ndqused >= 0);
2107
2108         if (nfree <= ndqused && nfree < ndquot)
2109                 return 0;
2110
2111         ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2112         n = nfree - ndqused - ndquot;           /* # over target */
2113
2114         return xfs_qm_shake_freelist(MAX(nfree, n));
2115 }
2116
2117
2118 /*
2119  * Just pop the least recently used dquot off the freelist and
2120  * recycle it. The returned dquot is locked.
2121  */
2122 STATIC xfs_dquot_t *
2123 xfs_qm_dqreclaim_one(void)
2124 {
2125         xfs_dquot_t     *dqpout;
2126         xfs_dquot_t     *dqp;
2127         int             restarts;
2128         int             nflushes;
2129
2130         restarts = 0;
2131         dqpout = NULL;
2132         nflushes = 0;
2133
2134         /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2135  startagain:
2136         xfs_qm_freelist_lock(xfs_Gqm);
2137
2138         FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2139                 struct xfs_mount *mp = dqp->q_mount;
2140                 xfs_dqlock(dqp);
2141
2142                 /*
2143                  * We are racing with dqlookup here. Naturally we don't
2144                  * want to reclaim a dquot that lookup wants. We release the
2145                  * freelist lock and start over, so that lookup will grab
2146                  * both the dquot and the freelistlock.
2147                  */
2148                 if (dqp->dq_flags & XFS_DQ_WANT) {
2149                         ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2150
2151                         trace_xfs_dqreclaim_want(dqp);
2152
2153                         xfs_dqunlock(dqp);
2154                         xfs_qm_freelist_unlock(xfs_Gqm);
2155                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2156                                 return NULL;
2157                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2158                         goto startagain;
2159                 }
2160
2161                 /*
2162                  * If the dquot is inactive, we are assured that it is
2163                  * not on the mplist or the hashlist, and that makes our
2164                  * life easier.
2165                  */
2166                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2167                         ASSERT(mp == NULL);
2168                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2169                         ASSERT(dqp->HL_PREVP == NULL);
2170                         ASSERT(list_empty(&dqp->q_mplist));
2171                         XQM_FREELIST_REMOVE(dqp);
2172                         xfs_dqunlock(dqp);
2173                         dqpout = dqp;
2174                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2175                         break;
2176                 }
2177
2178                 ASSERT(dqp->q_hash);
2179                 ASSERT(!list_empty(&dqp->q_mplist));
2180
2181                 /*
2182                  * Try to grab the flush lock. If this dquot is in the process of
2183                  * getting flushed to disk, we don't want to reclaim it.
2184                  */
2185                 if (!xfs_dqflock_nowait(dqp)) {
2186                         xfs_dqunlock(dqp);
2187                         continue;
2188                 }
2189
2190                 /*
2191                  * We have the flush lock so we know that this is not in the
2192                  * process of being flushed. So, if this is dirty, flush it
2193                  * DELWRI so that we don't get a freelist infested with
2194                  * dirty dquots.
2195                  */
2196                 if (XFS_DQ_IS_DIRTY(dqp)) {
2197                         int     error;
2198
2199                         trace_xfs_dqreclaim_dirty(dqp);
2200
2201                         /*
2202                          * We flush it delayed write, so don't bother
2203                          * releasing the freelist lock.
2204                          */
2205                         error = xfs_qm_dqflush(dqp, 0);
2206                         if (error) {
2207                                 xfs_fs_cmn_err(CE_WARN, mp,
2208                         "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2209                         }
2210                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2211                         continue;
2212                 }
2213
2214                 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
2215                         xfs_dqfunlock(dqp);
2216                         xfs_dqunlock(dqp);
2217                         continue;
2218                 }
2219
2220                 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2221                         goto mplistunlock;
2222
2223                 trace_xfs_dqreclaim_unlink(dqp);
2224
2225                 ASSERT(dqp->q_nrefs == 0);
2226                 list_del_init(&dqp->q_mplist);
2227                 mp->m_quotainfo->qi_dquots--;
2228                 mp->m_quotainfo->qi_dqreclaims++;
2229                 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2230                 XQM_FREELIST_REMOVE(dqp);
2231                 dqpout = dqp;
2232                 mutex_unlock(&dqp->q_hash->qh_lock);
2233  mplistunlock:
2234                 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
2235                 xfs_dqfunlock(dqp);
2236                 xfs_dqunlock(dqp);
2237                 if (dqpout)
2238                         break;
2239         }
2240
2241         xfs_qm_freelist_unlock(xfs_Gqm);
2242         return dqpout;
2243 }
2244
2245
2246 /*------------------------------------------------------------------*/
2247
2248 /*
2249  * Return a new incore dquot. Depending on the number of
2250  * dquots in the system, we either allocate a new one on the kernel heap,
2251  * or reclaim a free one.
2252  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2253  * to reclaim an existing one from the freelist.
2254  */
2255 boolean_t
2256 xfs_qm_dqalloc_incore(
2257         xfs_dquot_t **O_dqpp)
2258 {
2259         xfs_dquot_t     *dqp;
2260
2261         /*
2262          * Check against high water mark to see if we want to pop
2263          * a nincompoop dquot off the freelist.
2264          */
2265         if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2266                 /*
2267                  * Try to recycle a dquot from the freelist.
2268                  */
2269                 if ((dqp = xfs_qm_dqreclaim_one())) {
2270                         XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2271                         /*
2272                          * Just zero the core here. The rest will get
2273                          * reinitialized by caller. XXX we shouldn't even
2274                          * do this zero ...
2275                          */
2276                         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2277                         *O_dqpp = dqp;
2278                         return B_FALSE;
2279                 }
2280                 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2281         }
2282
2283         /*
2284          * Allocate a brand new dquot on the kernel heap and return it
2285          * to the caller to initialize.
2286          */
2287         ASSERT(xfs_Gqm->qm_dqzone != NULL);
2288         *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2289         atomic_inc(&xfs_Gqm->qm_totaldquots);
2290
2291         return B_TRUE;
2292 }
2293
2294
2295 /*
2296  * Start a transaction and write the incore superblock changes to
2297  * disk. flags parameter indicates which fields have changed.
2298  */
2299 int
2300 xfs_qm_write_sb_changes(
2301         xfs_mount_t     *mp,
2302         __int64_t       flags)
2303 {
2304         xfs_trans_t     *tp;
2305         int             error;
2306
2307 #ifdef QUOTADEBUG
2308         cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2309 #endif
2310         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2311         if ((error = xfs_trans_reserve(tp, 0,
2312                                       mp->m_sb.sb_sectsize + 128, 0,
2313                                       0,
2314                                       XFS_DEFAULT_LOG_COUNT))) {
2315                 xfs_trans_cancel(tp, 0);
2316                 return error;
2317         }
2318
2319         xfs_mod_sb(tp, flags);
2320         error = xfs_trans_commit(tp, 0);
2321
2322         return error;
2323 }
2324
2325
2326 /* --------------- utility functions for vnodeops ---------------- */
2327
2328
2329 /*
2330  * Given an inode, a uid and gid (from cred_t) make sure that we have
2331  * allocated relevant dquot(s) on disk, and that we won't exceed inode
2332  * quotas by creating this file.
2333  * This also attaches dquot(s) to the given inode after locking it,
2334  * and returns the dquots corresponding to the uid and/or gid.
2335  *
2336  * in   : inode (unlocked)
2337  * out  : udquot, gdquot with references taken and unlocked
2338  */
2339 int
2340 xfs_qm_vop_dqalloc(
2341         struct xfs_inode        *ip,
2342         uid_t                   uid,
2343         gid_t                   gid,
2344         prid_t                  prid,
2345         uint                    flags,
2346         struct xfs_dquot        **O_udqpp,
2347         struct xfs_dquot        **O_gdqpp)
2348 {
2349         struct xfs_mount        *mp = ip->i_mount;
2350         struct xfs_dquot        *uq, *gq;
2351         int                     error;
2352         uint                    lockflags;
2353
2354         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2355                 return 0;
2356
2357         lockflags = XFS_ILOCK_EXCL;
2358         xfs_ilock(ip, lockflags);
2359
2360         if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
2361                 gid = ip->i_d.di_gid;
2362
2363         /*
2364          * Attach the dquot(s) to this inode, doing a dquot allocation
2365          * if necessary. The dquot(s) will not be locked.
2366          */
2367         if (XFS_NOT_DQATTACHED(mp, ip)) {
2368                 error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
2369                 if (error) {
2370                         xfs_iunlock(ip, lockflags);
2371                         return error;
2372                 }
2373         }
2374
2375         uq = gq = NULL;
2376         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2377                 if (ip->i_d.di_uid != uid) {
2378                         /*
2379                          * What we need is the dquot that has this uid, and
2380                          * if we send the inode to dqget, the uid of the inode
2381                          * takes priority over what's sent in the uid argument.
2382                          * We must unlock inode here before calling dqget if
2383                          * we're not sending the inode, because otherwise
2384                          * we'll deadlock by doing trans_reserve while
2385                          * holding ilock.
2386                          */
2387                         xfs_iunlock(ip, lockflags);
2388                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2389                                                  XFS_DQ_USER,
2390                                                  XFS_QMOPT_DQALLOC |
2391                                                  XFS_QMOPT_DOWARN,
2392                                                  &uq))) {
2393                                 ASSERT(error != ENOENT);
2394                                 return error;
2395                         }
2396                         /*
2397                          * Get the ilock in the right order.
2398                          */
2399                         xfs_dqunlock(uq);
2400                         lockflags = XFS_ILOCK_SHARED;
2401                         xfs_ilock(ip, lockflags);
2402                 } else {
2403                         /*
2404                          * Take an extra reference, because we'll return
2405                          * this to caller
2406                          */
2407                         ASSERT(ip->i_udquot);
2408                         uq = ip->i_udquot;
2409                         xfs_dqlock(uq);
2410                         XFS_DQHOLD(uq);
2411                         xfs_dqunlock(uq);
2412                 }
2413         }
2414         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2415                 if (ip->i_d.di_gid != gid) {
2416                         xfs_iunlock(ip, lockflags);
2417                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2418                                                  XFS_DQ_GROUP,
2419                                                  XFS_QMOPT_DQALLOC |
2420                                                  XFS_QMOPT_DOWARN,
2421                                                  &gq))) {
2422                                 if (uq)
2423                                         xfs_qm_dqrele(uq);
2424                                 ASSERT(error != ENOENT);
2425                                 return error;
2426                         }
2427                         xfs_dqunlock(gq);
2428                         lockflags = XFS_ILOCK_SHARED;
2429                         xfs_ilock(ip, lockflags);
2430                 } else {
2431                         ASSERT(ip->i_gdquot);
2432                         gq = ip->i_gdquot;
2433                         xfs_dqlock(gq);
2434                         XFS_DQHOLD(gq);
2435                         xfs_dqunlock(gq);
2436                 }
2437         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2438                 if (ip->i_d.di_projid != prid) {
2439                         xfs_iunlock(ip, lockflags);
2440                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2441                                                  XFS_DQ_PROJ,
2442                                                  XFS_QMOPT_DQALLOC |
2443                                                  XFS_QMOPT_DOWARN,
2444                                                  &gq))) {
2445                                 if (uq)
2446                                         xfs_qm_dqrele(uq);
2447                                 ASSERT(error != ENOENT);
2448                                 return (error);
2449                         }
2450                         xfs_dqunlock(gq);
2451                         lockflags = XFS_ILOCK_SHARED;
2452                         xfs_ilock(ip, lockflags);
2453                 } else {
2454                         ASSERT(ip->i_gdquot);
2455                         gq = ip->i_gdquot;
2456                         xfs_dqlock(gq);
2457                         XFS_DQHOLD(gq);
2458                         xfs_dqunlock(gq);
2459                 }
2460         }
2461         if (uq)
2462                 trace_xfs_dquot_dqalloc(ip);
2463
2464         xfs_iunlock(ip, lockflags);
2465         if (O_udqpp)
2466                 *O_udqpp = uq;
2467         else if (uq)
2468                 xfs_qm_dqrele(uq);
2469         if (O_gdqpp)
2470                 *O_gdqpp = gq;
2471         else if (gq)
2472                 xfs_qm_dqrele(gq);
2473         return 0;
2474 }
2475
2476 /*
2477  * Actually transfer ownership, and do dquot modifications.
2478  * These were already reserved.
2479  */
2480 xfs_dquot_t *
2481 xfs_qm_vop_chown(
2482         xfs_trans_t     *tp,
2483         xfs_inode_t     *ip,
2484         xfs_dquot_t     **IO_olddq,
2485         xfs_dquot_t     *newdq)
2486 {
2487         xfs_dquot_t     *prevdq;
2488         uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2489                                  XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2490
2491
2492         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2493         ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2494
2495         /* old dquot */
2496         prevdq = *IO_olddq;
2497         ASSERT(prevdq);
2498         ASSERT(prevdq != newdq);
2499
2500         xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2501         xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2502
2503         /* the sparkling new dquot */
2504         xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2505         xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2506
2507         /*
2508          * Take an extra reference, because the inode
2509          * is going to keep this dquot pointer even
2510          * after the trans_commit.
2511          */
2512         xfs_dqlock(newdq);
2513         XFS_DQHOLD(newdq);
2514         xfs_dqunlock(newdq);
2515         *IO_olddq = newdq;
2516
2517         return prevdq;
2518 }
2519
2520 /*
2521  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2522  */
2523 int
2524 xfs_qm_vop_chown_reserve(
2525         xfs_trans_t     *tp,
2526         xfs_inode_t     *ip,
2527         xfs_dquot_t     *udqp,
2528         xfs_dquot_t     *gdqp,
2529         uint            flags)
2530 {
2531         xfs_mount_t     *mp = ip->i_mount;
2532         uint            delblks, blkflags, prjflags = 0;
2533         xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2534         int             error;
2535
2536
2537         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2538         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2539
2540         delblks = ip->i_delayed_blks;
2541         delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2542         blkflags = XFS_IS_REALTIME_INODE(ip) ?
2543                         XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2544
2545         if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2546             ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2547                 delblksudq = udqp;
2548                 /*
2549                  * If there are delayed allocation blocks, then we have to
2550                  * unreserve those from the old dquot, and add them to the
2551                  * new dquot.
2552                  */
2553                 if (delblks) {
2554                         ASSERT(ip->i_udquot);
2555                         unresudq = ip->i_udquot;
2556                 }
2557         }
2558         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2559                 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2560                      ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
2561                         prjflags = XFS_QMOPT_ENOSPC;
2562
2563                 if (prjflags ||
2564                     (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2565                      ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2566                         delblksgdq = gdqp;
2567                         if (delblks) {
2568                                 ASSERT(ip->i_gdquot);
2569                                 unresgdq = ip->i_gdquot;
2570                         }
2571                 }
2572         }
2573
2574         if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2575                                 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2576                                 flags | blkflags | prjflags)))
2577                 return (error);
2578
2579         /*
2580          * Do the delayed blks reservations/unreservations now. Since, these
2581          * are done without the help of a transaction, if a reservation fails
2582          * its previous reservations won't be automatically undone by trans
2583          * code. So, we have to do it manually here.
2584          */
2585         if (delblks) {
2586                 /*
2587                  * Do the reservations first. Unreservation can't fail.
2588                  */
2589                 ASSERT(delblksudq || delblksgdq);
2590                 ASSERT(unresudq || unresgdq);
2591                 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2592                                 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2593                                 flags | blkflags | prjflags)))
2594                         return (error);
2595                 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2596                                 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2597                                 blkflags);
2598         }
2599
2600         return (0);
2601 }
2602
2603 int
2604 xfs_qm_vop_rename_dqattach(
2605         struct xfs_inode        **i_tab)
2606 {
2607         struct xfs_mount        *mp = i_tab[0]->i_mount;
2608         int                     i;
2609
2610         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2611                 return 0;
2612
2613         for (i = 0; (i < 4 && i_tab[i]); i++) {
2614                 struct xfs_inode        *ip = i_tab[i];
2615                 int                     error;
2616
2617                 /*
2618                  * Watch out for duplicate entries in the table.
2619                  */
2620                 if (i == 0 || ip != i_tab[i-1]) {
2621                         if (XFS_NOT_DQATTACHED(mp, ip)) {
2622                                 error = xfs_qm_dqattach(ip, 0);
2623                                 if (error)
2624                                         return error;
2625                         }
2626                 }
2627         }
2628         return 0;
2629 }
2630
2631 void
2632 xfs_qm_vop_create_dqattach(
2633         struct xfs_trans        *tp,
2634         struct xfs_inode        *ip,
2635         struct xfs_dquot        *udqp,
2636         struct xfs_dquot        *gdqp)
2637 {
2638         struct xfs_mount        *mp = tp->t_mountp;
2639
2640         if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2641                 return;
2642
2643         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2644         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2645
2646         if (udqp) {
2647                 xfs_dqlock(udqp);
2648                 XFS_DQHOLD(udqp);
2649                 xfs_dqunlock(udqp);
2650                 ASSERT(ip->i_udquot == NULL);
2651                 ip->i_udquot = udqp;
2652                 ASSERT(XFS_IS_UQUOTA_ON(mp));
2653                 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2654                 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2655         }
2656         if (gdqp) {
2657                 xfs_dqlock(gdqp);
2658                 XFS_DQHOLD(gdqp);
2659                 xfs_dqunlock(gdqp);
2660                 ASSERT(ip->i_gdquot == NULL);
2661                 ip->i_gdquot = gdqp;
2662                 ASSERT(XFS_IS_OQUOTA_ON(mp));
2663                 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2664                         ip->i_d.di_gid : ip->i_d.di_projid) ==
2665                                 be32_to_cpu(gdqp->q_core.d_id));
2666                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2667         }
2668 }
2669
2670 /* ------------- list stuff -----------------*/
2671 STATIC void
2672 xfs_qm_freelist_init(xfs_frlist_t *ql)
2673 {
2674         ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2675         mutex_init(&ql->qh_lock);
2676         ql->qh_version = 0;
2677         ql->qh_nelems = 0;
2678 }
2679
2680 STATIC void
2681 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2682 {
2683         xfs_dquot_t     *dqp, *nextdqp;
2684
2685         mutex_lock(&ql->qh_lock);
2686         for (dqp = ql->qh_next;
2687              dqp != (xfs_dquot_t *)ql; ) {
2688                 xfs_dqlock(dqp);
2689                 nextdqp = dqp->dq_flnext;
2690 #ifdef QUOTADEBUG
2691                 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2692 #endif
2693                 XQM_FREELIST_REMOVE(dqp);
2694                 xfs_dqunlock(dqp);
2695                 xfs_qm_dqdestroy(dqp);
2696                 dqp = nextdqp;
2697         }
2698         mutex_unlock(&ql->qh_lock);
2699         mutex_destroy(&ql->qh_lock);
2700
2701         ASSERT(ql->qh_nelems == 0);
2702 }
2703
2704 STATIC void
2705 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2706 {
2707         dq->dq_flnext = ql->qh_next;
2708         dq->dq_flprev = (xfs_dquot_t *)ql;
2709         ql->qh_next = dq;
2710         dq->dq_flnext->dq_flprev = dq;
2711         xfs_Gqm->qm_dqfreelist.qh_nelems++;
2712         xfs_Gqm->qm_dqfreelist.qh_version++;
2713 }
2714
2715 void
2716 xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2717 {
2718         xfs_dquot_t *next = dq->dq_flnext;
2719         xfs_dquot_t *prev = dq->dq_flprev;
2720
2721         next->dq_flprev = prev;
2722         prev->dq_flnext = next;
2723         dq->dq_flnext = dq->dq_flprev = dq;
2724         xfs_Gqm->qm_dqfreelist.qh_nelems--;
2725         xfs_Gqm->qm_dqfreelist.qh_version++;
2726 }
2727
2728 void
2729 xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2730 {
2731         xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2732 }