4badf38df5e9c0faff9f692c29cc370d34abd50f
[safe/jmp/linux-2.6] / fs / xfs / quota / xfs_qm.c
1 /*
2  * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32
33 #include "xfs.h"
34 #include "xfs_fs.h"
35 #include "xfs_inum.h"
36 #include "xfs_log.h"
37 #include "xfs_clnt.h"
38 #include "xfs_trans.h"
39 #include "xfs_sb.h"
40 #include "xfs_ag.h"
41 #include "xfs_dir.h"
42 #include "xfs_dir2.h"
43 #include "xfs_alloc.h"
44 #include "xfs_dmapi.h"
45 #include "xfs_quota.h"
46 #include "xfs_mount.h"
47 #include "xfs_alloc_btree.h"
48 #include "xfs_bmap_btree.h"
49 #include "xfs_ialloc_btree.h"
50 #include "xfs_btree.h"
51 #include "xfs_ialloc.h"
52 #include "xfs_attr_sf.h"
53 #include "xfs_dir_sf.h"
54 #include "xfs_dir2_sf.h"
55 #include "xfs_dinode.h"
56 #include "xfs_inode.h"
57 #include "xfs_bmap.h"
58 #include "xfs_bit.h"
59 #include "xfs_rtalloc.h"
60 #include "xfs_error.h"
61 #include "xfs_itable.h"
62 #include "xfs_rw.h"
63 #include "xfs_acl.h"
64 #include "xfs_cap.h"
65 #include "xfs_mac.h"
66 #include "xfs_attr.h"
67 #include "xfs_buf_item.h"
68 #include "xfs_trans_space.h"
69 #include "xfs_utils.h"
70
71 #include "xfs_qm.h"
72
73 /*
74  * The global quota manager. There is only one of these for the entire
75  * system, _not_ one per file system. XQM keeps track of the overall
76  * quota functionality, including maintaining the freelist and hash
77  * tables of dquots.
78  */
79 mutex_t xfs_Gqm_lock;
80 struct xfs_qm   *xfs_Gqm;
81
82 kmem_zone_t     *qm_dqzone;
83 kmem_zone_t     *qm_dqtrxzone;
84 STATIC kmem_shaker_t    xfs_qm_shaker;
85
86 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
87 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
88
89 STATIC void     xfs_qm_freelist_init(xfs_frlist_t *);
90 STATIC void     xfs_qm_freelist_destroy(xfs_frlist_t *);
91 STATIC int      xfs_qm_mplist_nowait(xfs_mount_t *);
92 STATIC int      xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
93
94 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
95 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
96 STATIC int      xfs_qm_shake(int, unsigned int);
97
98 #ifdef DEBUG
99 extern mutex_t  qcheck_lock;
100 #endif
101
102 #ifdef QUOTADEBUG
103 #define XQM_LIST_PRINT(l, NXT, title) \
104 { \
105         xfs_dquot_t     *dqp; int i = 0; \
106         cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
107         for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
108                 cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
109                                   "bcnt = %d, icnt = %d, refs = %d", \
110                         ++i, (int) INT_GET(dqp->q_core.d_id, ARCH_CONVERT), \
111                         DQFLAGTO_TYPESTR(dqp),       \
112                         (int) INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT), \
113                         (int) INT_GET(dqp->q_core.d_icount, ARCH_CONVERT), \
114                         (int) dqp->q_nrefs);  } \
115 }
116 #else
117 #define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
118 #endif
119
120 /*
121  * Initialize the XQM structure.
122  * Note that there is not one quota manager per file system.
123  */
124 STATIC struct xfs_qm *
125 xfs_Gqm_init(void)
126 {
127         xfs_qm_t                *xqm;
128         int                     hsize, i;
129
130         xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
131         ASSERT(xqm);
132
133         /*
134          * Initialize the dquot hash tables.
135          */
136         hsize = (DQUOT_HASH_HEURISTIC < XFS_QM_NCSIZE_THRESHOLD) ?
137                 XFS_QM_HASHSIZE_LOW : XFS_QM_HASHSIZE_HIGH;
138         xqm->qm_dqhashmask = hsize - 1;
139
140         xqm->qm_usr_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
141                                                       sizeof(xfs_dqhash_t),
142                                                       KM_SLEEP);
143         xqm->qm_grp_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
144                                                       sizeof(xfs_dqhash_t),
145                                                       KM_SLEEP);
146         ASSERT(xqm->qm_usr_dqhtable != NULL);
147         ASSERT(xqm->qm_grp_dqhtable != NULL);
148
149         for (i = 0; i < hsize; i++) {
150                 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
151                 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
152         }
153
154         /*
155          * Freelist of all dquots of all file systems
156          */
157         xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
158
159         /*
160          * dquot zone. we register our own low-memory callback.
161          */
162         if (!qm_dqzone) {
163                 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
164                                                 "xfs_dquots");
165                 qm_dqzone = xqm->qm_dqzone;
166         } else
167                 xqm->qm_dqzone = qm_dqzone;
168
169         xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
170
171         /*
172          * The t_dqinfo portion of transactions.
173          */
174         if (!qm_dqtrxzone) {
175                 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
176                                                    "xfs_dqtrx");
177                 qm_dqtrxzone = xqm->qm_dqtrxzone;
178         } else
179                 xqm->qm_dqtrxzone = qm_dqtrxzone;
180
181         atomic_set(&xqm->qm_totaldquots, 0);
182         xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
183         xqm->qm_nrefs = 0;
184 #ifdef DEBUG
185         mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
186 #endif
187         return xqm;
188 }
189
190 /*
191  * Destroy the global quota manager when its reference count goes to zero.
192  */
193 STATIC void
194 xfs_qm_destroy(
195         struct xfs_qm   *xqm)
196 {
197         int             hsize, i;
198
199         ASSERT(xqm != NULL);
200         ASSERT(xqm->qm_nrefs == 0);
201         kmem_shake_deregister(xfs_qm_shaker);
202         hsize = xqm->qm_dqhashmask + 1;
203         for (i = 0; i < hsize; i++) {
204                 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
205                 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
206         }
207         kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
208         kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
209         xqm->qm_usr_dqhtable = NULL;
210         xqm->qm_grp_dqhtable = NULL;
211         xqm->qm_dqhashmask = 0;
212         xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
213 #ifdef DEBUG
214         mutex_destroy(&qcheck_lock);
215 #endif
216         kmem_free(xqm, sizeof(xfs_qm_t));
217 }
218
219 /*
220  * Called at mount time to let XQM know that another file system is
221  * starting quotas. This isn't crucial information as the individual mount
222  * structures are pretty independent, but it helps the XQM keep a
223  * global view of what's going on.
224  */
225 /* ARGSUSED */
226 STATIC int
227 xfs_qm_hold_quotafs_ref(
228         struct xfs_mount *mp)
229 {
230         /*
231          * Need to lock the xfs_Gqm structure for things like this. For example,
232          * the structure could disappear between the entry to this routine and
233          * a HOLD operation if not locked.
234          */
235         XFS_QM_LOCK(xfs_Gqm);
236
237         if (xfs_Gqm == NULL)
238                 xfs_Gqm = xfs_Gqm_init();
239         /*
240          * We can keep a list of all filesystems with quotas mounted for
241          * debugging and statistical purposes, but ...
242          * Just take a reference and get out.
243          */
244         XFS_QM_HOLD(xfs_Gqm);
245         XFS_QM_UNLOCK(xfs_Gqm);
246
247         return 0;
248 }
249
250
251 /*
252  * Release the reference that a filesystem took at mount time,
253  * so that we know when we need to destroy the entire quota manager.
254  */
255 /* ARGSUSED */
256 STATIC void
257 xfs_qm_rele_quotafs_ref(
258         struct xfs_mount *mp)
259 {
260         xfs_dquot_t     *dqp, *nextdqp;
261
262         ASSERT(xfs_Gqm);
263         ASSERT(xfs_Gqm->qm_nrefs > 0);
264
265         /*
266          * Go thru the freelist and destroy all inactive dquots.
267          */
268         xfs_qm_freelist_lock(xfs_Gqm);
269
270         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
271              dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
272                 xfs_dqlock(dqp);
273                 nextdqp = dqp->dq_flnext;
274                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
275                         ASSERT(dqp->q_mount == NULL);
276                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
277                         ASSERT(dqp->HL_PREVP == NULL);
278                         ASSERT(dqp->MPL_PREVP == NULL);
279                         XQM_FREELIST_REMOVE(dqp);
280                         xfs_dqunlock(dqp);
281                         xfs_qm_dqdestroy(dqp);
282                 } else {
283                         xfs_dqunlock(dqp);
284                 }
285                 dqp = nextdqp;
286         }
287         xfs_qm_freelist_unlock(xfs_Gqm);
288
289         /*
290          * Destroy the entire XQM. If somebody mounts with quotaon, this'll
291          * be restarted.
292          */
293         XFS_QM_LOCK(xfs_Gqm);
294         XFS_QM_RELE(xfs_Gqm);
295         if (xfs_Gqm->qm_nrefs == 0) {
296                 xfs_qm_destroy(xfs_Gqm);
297                 xfs_Gqm = NULL;
298         }
299         XFS_QM_UNLOCK(xfs_Gqm);
300 }
301
302 /*
303  * This is called at mount time from xfs_mountfs to initialize the quotainfo
304  * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
305  * so already.  Note that the superblock has not been read in yet.
306  */
307 void
308 xfs_qm_mount_quotainit(
309         xfs_mount_t     *mp,
310         uint            flags)
311 {
312         /*
313          * User, projects or group quotas has to be on.
314          */
315         ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA));
316
317         /*
318          * Initialize the flags in the mount structure. From this point
319          * onwards we look at m_qflags to figure out if quotas's ON/OFF, etc.
320          * Note that we enforce nothing if accounting is off.
321          * ie.  XFSMNT_*QUOTA must be ON for XFSMNT_*QUOTAENF.
322          * It isn't necessary to take the quotaoff lock to do this; this is
323          * called from mount.
324          */
325         if (flags & XFSMNT_UQUOTA) {
326                 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
327                 if (flags & XFSMNT_UQUOTAENF)
328                         mp->m_qflags |= XFS_UQUOTA_ENFD;
329         }
330         if (flags & XFSMNT_GQUOTA) {
331                 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
332                 if (flags & XFSMNT_GQUOTAENF)
333                         mp->m_qflags |= XFS_OQUOTA_ENFD;
334         } else if (flags & XFSMNT_PQUOTA) {
335                 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
336                 if (flags & XFSMNT_PQUOTAENF)
337                         mp->m_qflags |= XFS_OQUOTA_ENFD;
338         }
339 }
340
341 /*
342  * Just destroy the quotainfo structure.
343  */
344 void
345 xfs_qm_unmount_quotadestroy(
346         xfs_mount_t     *mp)
347 {
348         if (mp->m_quotainfo)
349                 xfs_qm_destroy_quotainfo(mp);
350 }
351
352
353 /*
354  * This is called from xfs_mountfs to start quotas and initialize all
355  * necessary data structures like quotainfo.  This is also responsible for
356  * running a quotacheck as necessary.  We are guaranteed that the superblock
357  * is consistently read in at this point.
358  */
359 int
360 xfs_qm_mount_quotas(
361         xfs_mount_t     *mp,
362         int             mfsi_flags)
363 {
364         unsigned long   s;
365         int             error = 0;
366         uint            sbf;
367
368         /*
369          * If a file system had quotas running earlier, but decided to
370          * mount without -o uquota/pquota/gquota options, revoke the
371          * quotachecked license, and bail out.
372          */
373         if (! XFS_IS_QUOTA_ON(mp) &&
374             (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) {
375                 mp->m_qflags = 0;
376                 goto write_changes;
377         }
378
379         /*
380          * If quotas on realtime volumes is not supported, we disable
381          * quotas immediately.
382          */
383         if (mp->m_sb.sb_rextents) {
384                 cmn_err(CE_NOTE,
385                         "Cannot turn on quotas for realtime filesystem %s",
386                         mp->m_fsname);
387                 mp->m_qflags = 0;
388                 goto write_changes;
389         }
390
391         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
392
393         /*
394          * Allocate the quotainfo structure inside the mount struct, and
395          * create quotainode(s), and change/rev superblock if necessary.
396          */
397         if ((error = xfs_qm_init_quotainfo(mp))) {
398                 /*
399                  * We must turn off quotas.
400                  */
401                 ASSERT(mp->m_quotainfo == NULL);
402                 mp->m_qflags = 0;
403                 goto write_changes;
404         }
405         /*
406          * If any of the quotas are not consistent, do a quotacheck.
407          */
408         if (XFS_QM_NEED_QUOTACHECK(mp) &&
409                 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
410                 if ((error = xfs_qm_quotacheck(mp))) {
411                         /* Quotacheck has failed and quotas have
412                          * been disabled.
413                          */
414                         return XFS_ERROR(error);
415                 }
416         }
417
418  write_changes:
419         /*
420          * We actually don't have to acquire the SB_LOCK at all.
421          * This can only be called from mount, and that's single threaded. XXX
422          */
423         s = XFS_SB_LOCK(mp);
424         sbf = mp->m_sb.sb_qflags;
425         mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
426         XFS_SB_UNLOCK(mp, s);
427
428         if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
429                 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
430                         /*
431                          * We could only have been turning quotas off.
432                          * We aren't in very good shape actually because
433                          * the incore structures are convinced that quotas are
434                          * off, but the on disk superblock doesn't know that !
435                          */
436                         ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
437                         xfs_fs_cmn_err(CE_ALERT, mp,
438                                 "XFS mount_quotas: Superblock update failed!");
439                 }
440         }
441
442         if (error) {
443                 xfs_fs_cmn_err(CE_WARN, mp,
444                         "Failed to initialize disk quotas.");
445         }
446         return XFS_ERROR(error);
447 }
448
449 /*
450  * Called from the vfsops layer.
451  */
452 int
453 xfs_qm_unmount_quotas(
454         xfs_mount_t     *mp)
455 {
456         xfs_inode_t     *uqp, *gqp;
457         int             error = 0;
458
459         /*
460          * Release the dquots that root inode, et al might be holding,
461          * before we flush quotas and blow away the quotainfo structure.
462          */
463         ASSERT(mp->m_rootip);
464         xfs_qm_dqdetach(mp->m_rootip);
465         if (mp->m_rbmip)
466                 xfs_qm_dqdetach(mp->m_rbmip);
467         if (mp->m_rsumip)
468                 xfs_qm_dqdetach(mp->m_rsumip);
469
470         /*
471          * Flush out the quota inodes.
472          */
473         uqp = gqp = NULL;
474         if (mp->m_quotainfo) {
475                 if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
476                         xfs_ilock(uqp, XFS_ILOCK_EXCL);
477                         xfs_iflock(uqp);
478                         error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
479                         xfs_iunlock(uqp, XFS_ILOCK_EXCL);
480                         if (unlikely(error == EFSCORRUPTED)) {
481                                 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
482                                                  XFS_ERRLEVEL_LOW, mp);
483                                 goto out;
484                         }
485                 }
486                 if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
487                         xfs_ilock(gqp, XFS_ILOCK_EXCL);
488                         xfs_iflock(gqp);
489                         error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
490                         xfs_iunlock(gqp, XFS_ILOCK_EXCL);
491                         if (unlikely(error == EFSCORRUPTED)) {
492                                 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
493                                                  XFS_ERRLEVEL_LOW, mp);
494                                 goto out;
495                         }
496                 }
497         }
498         if (uqp) {
499                  XFS_PURGE_INODE(uqp);
500                  mp->m_quotainfo->qi_uquotaip = NULL;
501         }
502         if (gqp) {
503                 XFS_PURGE_INODE(gqp);
504                 mp->m_quotainfo->qi_gquotaip = NULL;
505         }
506 out:
507         return XFS_ERROR(error);
508 }
509
510 /*
511  * Flush all dquots of the given file system to disk. The dquots are
512  * _not_ purged from memory here, just their data written to disk.
513  */
514 STATIC int
515 xfs_qm_dqflush_all(
516         xfs_mount_t     *mp,
517         int             flags)
518 {
519         int             recl;
520         xfs_dquot_t     *dqp;
521         int             niters;
522         int             error;
523
524         if (mp->m_quotainfo == NULL)
525                 return (0);
526         niters = 0;
527 again:
528         xfs_qm_mplist_lock(mp);
529         FOREACH_DQUOT_IN_MP(dqp, mp) {
530                 xfs_dqlock(dqp);
531                 if (! XFS_DQ_IS_DIRTY(dqp)) {
532                         xfs_dqunlock(dqp);
533                         continue;
534                 }
535                 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
536                 /* XXX a sentinel would be better */
537                 recl = XFS_QI_MPLRECLAIMS(mp);
538                 if (! xfs_qm_dqflock_nowait(dqp)) {
539                         /*
540                          * If we can't grab the flush lock then check
541                          * to see if the dquot has been flushed delayed
542                          * write.  If so, grab its buffer and send it
543                          * out immediately.  We'll be able to acquire
544                          * the flush lock when the I/O completes.
545                          */
546                         xfs_qm_dqflock_pushbuf_wait(dqp);
547                 }
548                 /*
549                  * Let go of the mplist lock. We don't want to hold it
550                  * across a disk write.
551                  */
552                 xfs_qm_mplist_unlock(mp);
553                 error = xfs_qm_dqflush(dqp, flags);
554                 xfs_dqunlock(dqp);
555                 if (error)
556                         return (error);
557
558                 xfs_qm_mplist_lock(mp);
559                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
560                         xfs_qm_mplist_unlock(mp);
561                         /* XXX restart limit */
562                         goto again;
563                 }
564         }
565
566         xfs_qm_mplist_unlock(mp);
567         /* return ! busy */
568         return (0);
569 }
570 /*
571  * Release the group dquot pointers the user dquots may be
572  * carrying around as a hint. mplist is locked on entry and exit.
573  */
574 STATIC void
575 xfs_qm_detach_gdquots(
576         xfs_mount_t     *mp)
577 {
578         xfs_dquot_t     *dqp, *gdqp;
579         int             nrecl;
580
581  again:
582         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
583         dqp = XFS_QI_MPLNEXT(mp);
584         while (dqp) {
585                 xfs_dqlock(dqp);
586                 if ((gdqp = dqp->q_gdquot)) {
587                         xfs_dqlock(gdqp);
588                         dqp->q_gdquot = NULL;
589                 }
590                 xfs_dqunlock(dqp);
591
592                 if (gdqp) {
593                         /*
594                          * Can't hold the mplist lock across a dqput.
595                          * XXXmust convert to marker based iterations here.
596                          */
597                         nrecl = XFS_QI_MPLRECLAIMS(mp);
598                         xfs_qm_mplist_unlock(mp);
599                         xfs_qm_dqput(gdqp);
600
601                         xfs_qm_mplist_lock(mp);
602                         if (nrecl != XFS_QI_MPLRECLAIMS(mp))
603                                 goto again;
604                 }
605                 dqp = dqp->MPL_NEXT;
606         }
607 }
608
609 /*
610  * Go through all the incore dquots of this file system and take them
611  * off the mplist and hashlist, if the dquot type matches the dqtype
612  * parameter. This is used when turning off quota accounting for
613  * users and/or groups, as well as when the filesystem is unmounting.
614  */
615 STATIC int
616 xfs_qm_dqpurge_int(
617         xfs_mount_t     *mp,
618         uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
619 {
620         xfs_dquot_t     *dqp;
621         uint            dqtype;
622         int             nrecl;
623         xfs_dquot_t     *nextdqp;
624         int             nmisses;
625
626         if (mp->m_quotainfo == NULL)
627                 return (0);
628
629         dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
630         dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
631         dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
632
633         xfs_qm_mplist_lock(mp);
634
635         /*
636          * In the first pass through all incore dquots of this filesystem,
637          * we release the group dquot pointers the user dquots may be
638          * carrying around as a hint. We need to do this irrespective of
639          * what's being turned off.
640          */
641         xfs_qm_detach_gdquots(mp);
642
643       again:
644         nmisses = 0;
645         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
646         /*
647          * Try to get rid of all of the unwanted dquots. The idea is to
648          * get them off mplist and hashlist, but leave them on freelist.
649          */
650         dqp = XFS_QI_MPLNEXT(mp);
651         while (dqp) {
652                 /*
653                  * It's OK to look at the type without taking dqlock here.
654                  * We're holding the mplist lock here, and that's needed for
655                  * a dqreclaim.
656                  */
657                 if ((dqp->dq_flags & dqtype) == 0) {
658                         dqp = dqp->MPL_NEXT;
659                         continue;
660                 }
661
662                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
663                         nrecl = XFS_QI_MPLRECLAIMS(mp);
664                         xfs_qm_mplist_unlock(mp);
665                         XFS_DQ_HASH_LOCK(dqp->q_hash);
666                         xfs_qm_mplist_lock(mp);
667
668                         /*
669                          * XXXTheoretically, we can get into a very long
670                          * ping pong game here.
671                          * No one can be adding dquots to the mplist at
672                          * this point, but somebody might be taking things off.
673                          */
674                         if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
675                                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
676                                 goto again;
677                         }
678                 }
679
680                 /*
681                  * Take the dquot off the mplist and hashlist. It may remain on
682                  * freelist in INACTIVE state.
683                  */
684                 nextdqp = dqp->MPL_NEXT;
685                 nmisses += xfs_qm_dqpurge(dqp, flags);
686                 dqp = nextdqp;
687         }
688         xfs_qm_mplist_unlock(mp);
689         return nmisses;
690 }
691
692 int
693 xfs_qm_dqpurge_all(
694         xfs_mount_t     *mp,
695         uint            flags)
696 {
697         int             ndquots;
698
699         /*
700          * Purge the dquot cache.
701          * None of the dquots should really be busy at this point.
702          */
703         if (mp->m_quotainfo) {
704                 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
705                         delay(ndquots * 10);
706                 }
707         }
708         return 0;
709 }
710
711 STATIC int
712 xfs_qm_dqattach_one(
713         xfs_inode_t     *ip,
714         xfs_dqid_t      id,
715         uint            type,
716         uint            doalloc,
717         uint            dolock,
718         xfs_dquot_t     *udqhint, /* hint */
719         xfs_dquot_t     **IO_idqpp)
720 {
721         xfs_dquot_t     *dqp;
722         int             error;
723
724         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
725         error = 0;
726         /*
727          * See if we already have it in the inode itself. IO_idqpp is
728          * &i_udquot or &i_gdquot. This made the code look weird, but
729          * made the logic a lot simpler.
730          */
731         if ((dqp = *IO_idqpp)) {
732                 if (dolock)
733                         xfs_dqlock(dqp);
734                 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
735                 goto done;
736         }
737
738         /*
739          * udqhint is the i_udquot field in inode, and is non-NULL only
740          * when the type arg is group/project. Its purpose is to save a
741          * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
742          * the user dquot.
743          */
744         ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
745         if (udqhint && !dolock)
746                 xfs_dqlock(udqhint);
747
748         /*
749          * No need to take dqlock to look at the id.
750          * The ID can't change until it gets reclaimed, and it won't
751          * be reclaimed as long as we have a ref from inode and we hold
752          * the ilock.
753          */
754         if (udqhint &&
755             (dqp = udqhint->q_gdquot) &&
756             (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id)) {
757                 ASSERT(XFS_DQ_IS_LOCKED(udqhint));
758                 xfs_dqlock(dqp);
759                 XFS_DQHOLD(dqp);
760                 ASSERT(*IO_idqpp == NULL);
761                 *IO_idqpp = dqp;
762                 if (!dolock) {
763                         xfs_dqunlock(dqp);
764                         xfs_dqunlock(udqhint);
765                 }
766                 goto done;
767         }
768         /*
769          * We can't hold a dquot lock when we call the dqget code.
770          * We'll deadlock in no time, because of (not conforming to)
771          * lock ordering - the inodelock comes before any dquot lock,
772          * and we may drop and reacquire the ilock in xfs_qm_dqget().
773          */
774         if (udqhint)
775                 xfs_dqunlock(udqhint);
776         /*
777          * Find the dquot from somewhere. This bumps the
778          * reference count of dquot and returns it locked.
779          * This can return ENOENT if dquot didn't exist on
780          * disk and we didn't ask it to allocate;
781          * ESRCH if quotas got turned off suddenly.
782          */
783         if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
784                                  doalloc|XFS_QMOPT_DOWARN, &dqp))) {
785                 if (udqhint && dolock)
786                         xfs_dqlock(udqhint);
787                 goto done;
788         }
789
790         xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
791         /*
792          * dqget may have dropped and re-acquired the ilock, but it guarantees
793          * that the dquot returned is the one that should go in the inode.
794          */
795         *IO_idqpp = dqp;
796         ASSERT(dqp);
797         ASSERT(XFS_DQ_IS_LOCKED(dqp));
798         if (! dolock) {
799                 xfs_dqunlock(dqp);
800                 goto done;
801         }
802         if (! udqhint)
803                 goto done;
804
805         ASSERT(udqhint);
806         ASSERT(dolock);
807         ASSERT(XFS_DQ_IS_LOCKED(dqp));
808         if (! xfs_qm_dqlock_nowait(udqhint)) {
809                 xfs_dqunlock(dqp);
810                 xfs_dqlock(udqhint);
811                 xfs_dqlock(dqp);
812         }
813       done:
814 #ifdef QUOTADEBUG
815         if (udqhint) {
816                 if (dolock)
817                         ASSERT(XFS_DQ_IS_LOCKED(udqhint));
818         }
819         if (! error) {
820                 if (dolock)
821                         ASSERT(XFS_DQ_IS_LOCKED(dqp));
822         }
823 #endif
824         return (error);
825 }
826
827
828 /*
829  * Given a udquot and gdquot, attach a ptr to the group dquot in the
830  * udquot as a hint for future lookups. The idea sounds simple, but the
831  * execution isn't, because the udquot might have a group dquot attached
832  * already and getting rid of that gets us into lock ordering contraints.
833  * The process is complicated more by the fact that the dquots may or may not
834  * be locked on entry.
835  */
836 STATIC void
837 xfs_qm_dqattach_grouphint(
838         xfs_dquot_t     *udq,
839         xfs_dquot_t     *gdq,
840         uint            locked)
841 {
842         xfs_dquot_t     *tmp;
843
844 #ifdef QUOTADEBUG
845         if (locked) {
846                 ASSERT(XFS_DQ_IS_LOCKED(udq));
847                 ASSERT(XFS_DQ_IS_LOCKED(gdq));
848         }
849 #endif
850         if (! locked)
851                 xfs_dqlock(udq);
852
853         if ((tmp = udq->q_gdquot)) {
854                 if (tmp == gdq) {
855                         if (! locked)
856                                 xfs_dqunlock(udq);
857                         return;
858                 }
859
860                 udq->q_gdquot = NULL;
861                 /*
862                  * We can't keep any dqlocks when calling dqrele,
863                  * because the freelist lock comes before dqlocks.
864                  */
865                 xfs_dqunlock(udq);
866                 if (locked)
867                         xfs_dqunlock(gdq);
868                 /*
869                  * we took a hard reference once upon a time in dqget,
870                  * so give it back when the udquot no longer points at it
871                  * dqput() does the unlocking of the dquot.
872                  */
873                 xfs_qm_dqrele(tmp);
874
875                 xfs_dqlock(udq);
876                 xfs_dqlock(gdq);
877
878         } else {
879                 ASSERT(XFS_DQ_IS_LOCKED(udq));
880                 if (! locked) {
881                         xfs_dqlock(gdq);
882                 }
883         }
884
885         ASSERT(XFS_DQ_IS_LOCKED(udq));
886         ASSERT(XFS_DQ_IS_LOCKED(gdq));
887         /*
888          * Somebody could have attached a gdquot here,
889          * when we dropped the uqlock. If so, just do nothing.
890          */
891         if (udq->q_gdquot == NULL) {
892                 XFS_DQHOLD(gdq);
893                 udq->q_gdquot = gdq;
894         }
895         if (! locked) {
896                 xfs_dqunlock(gdq);
897                 xfs_dqunlock(udq);
898         }
899 }
900
901
902 /*
903  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
904  * into account.
905  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
906  * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
907  * much made this code a complete mess, but it has been pretty useful.
908  * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
909  * Inode may get unlocked and relocked in here, and the caller must deal with
910  * the consequences.
911  */
912 int
913 xfs_qm_dqattach(
914         xfs_inode_t     *ip,
915         uint            flags)
916 {
917         xfs_mount_t     *mp = ip->i_mount;
918         uint            nquotas = 0;
919         int             error = 0;
920
921         if ((! XFS_IS_QUOTA_ON(mp)) ||
922             (! XFS_NOT_DQATTACHED(mp, ip)) ||
923             (ip->i_ino == mp->m_sb.sb_uquotino) ||
924             (ip->i_ino == mp->m_sb.sb_gquotino))
925                 return (0);
926
927         ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
928                XFS_ISLOCKED_INODE_EXCL(ip));
929
930         if (! (flags & XFS_QMOPT_ILOCKED))
931                 xfs_ilock(ip, XFS_ILOCK_EXCL);
932
933         if (XFS_IS_UQUOTA_ON(mp)) {
934                 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
935                                                 flags & XFS_QMOPT_DQALLOC,
936                                                 flags & XFS_QMOPT_DQLOCK,
937                                                 NULL, &ip->i_udquot);
938                 if (error)
939                         goto done;
940                 nquotas++;
941         }
942         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
943         if (XFS_IS_OQUOTA_ON(mp)) {
944                 error = XFS_IS_GQUOTA_ON(mp) ?
945                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
946                                                 flags & XFS_QMOPT_DQALLOC,
947                                                 flags & XFS_QMOPT_DQLOCK,
948                                                 ip->i_udquot, &ip->i_gdquot) :
949                         xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
950                                                 flags & XFS_QMOPT_DQALLOC,
951                                                 flags & XFS_QMOPT_DQLOCK,
952                                                 ip->i_udquot, &ip->i_gdquot);
953                 /*
954                  * Don't worry about the udquot that we may have
955                  * attached above. It'll get detached, if not already.
956                  */
957                 if (error)
958                         goto done;
959                 nquotas++;
960         }
961
962         /*
963          * Attach this group quota to the user quota as a hint.
964          * This WON'T, in general, result in a thrash.
965          */
966         if (nquotas == 2) {
967                 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
968                 ASSERT(ip->i_udquot);
969                 ASSERT(ip->i_gdquot);
970
971                 /*
972                  * We may or may not have the i_udquot locked at this point,
973                  * but this check is OK since we don't depend on the i_gdquot to
974                  * be accurate 100% all the time. It is just a hint, and this
975                  * will succeed in general.
976                  */
977                 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
978                         goto done;
979                 /*
980                  * Attach i_gdquot to the gdquot hint inside the i_udquot.
981                  */
982                 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
983                                          flags & XFS_QMOPT_DQLOCK);
984         }
985
986       done:
987
988 #ifdef QUOTADEBUG
989         if (! error) {
990                 if (ip->i_udquot) {
991                         if (flags & XFS_QMOPT_DQLOCK)
992                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
993                 }
994                 if (ip->i_gdquot) {
995                         if (flags & XFS_QMOPT_DQLOCK)
996                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
997                 }
998                 if (XFS_IS_UQUOTA_ON(mp))
999                         ASSERT(ip->i_udquot);
1000                 if (XFS_IS_OQUOTA_ON(mp))
1001                         ASSERT(ip->i_gdquot);
1002         }
1003 #endif
1004
1005         if (! (flags & XFS_QMOPT_ILOCKED))
1006                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1007
1008 #ifdef QUOTADEBUG
1009         else
1010                 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
1011 #endif
1012         return (error);
1013 }
1014
1015 /*
1016  * Release dquots (and their references) if any.
1017  * The inode should be locked EXCL except when this's called by
1018  * xfs_ireclaim.
1019  */
1020 void
1021 xfs_qm_dqdetach(
1022         xfs_inode_t     *ip)
1023 {
1024         if (!(ip->i_udquot || ip->i_gdquot))
1025                 return;
1026
1027         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
1028         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
1029         if (ip->i_udquot) {
1030                 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
1031                 xfs_qm_dqrele(ip->i_udquot);
1032                 ip->i_udquot = NULL;
1033         }
1034         if (ip->i_gdquot) {
1035                 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
1036                 xfs_qm_dqrele(ip->i_gdquot);
1037                 ip->i_gdquot = NULL;
1038         }
1039 }
1040
1041 /*
1042  * This is called by VFS_SYNC and flags arg determines the caller,
1043  * and its motives, as done in xfs_sync.
1044  *
1045  * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
1046  * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
1047  * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
1048  */
1049
1050 int
1051 xfs_qm_sync(
1052         xfs_mount_t     *mp,
1053         short           flags)
1054 {
1055         int             recl, restarts;
1056         xfs_dquot_t     *dqp;
1057         uint            flush_flags;
1058         boolean_t       nowait;
1059         int             error;
1060
1061         restarts = 0;
1062         /*
1063          * We won't block unless we are asked to.
1064          */
1065         nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
1066
1067   again:
1068         xfs_qm_mplist_lock(mp);
1069         /*
1070          * dqpurge_all() also takes the mplist lock and iterate thru all dquots
1071          * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
1072          * when we have the mplist lock, we know that dquots will be consistent
1073          * as long as we have it locked.
1074          */
1075         if (! XFS_IS_QUOTA_ON(mp)) {
1076                 xfs_qm_mplist_unlock(mp);
1077                 return (0);
1078         }
1079         FOREACH_DQUOT_IN_MP(dqp, mp) {
1080                 /*
1081                  * If this is vfs_sync calling, then skip the dquots that
1082                  * don't 'seem' to be dirty. ie. don't acquire dqlock.
1083                  * This is very similar to what xfs_sync does with inodes.
1084                  */
1085                 if (flags & SYNC_BDFLUSH) {
1086                         if (! XFS_DQ_IS_DIRTY(dqp))
1087                                 continue;
1088                 }
1089
1090                 if (nowait) {
1091                         /*
1092                          * Try to acquire the dquot lock. We are NOT out of
1093                          * lock order, but we just don't want to wait for this
1094                          * lock, unless somebody wanted us to.
1095                          */
1096                         if (! xfs_qm_dqlock_nowait(dqp))
1097                                 continue;
1098                 } else {
1099                         xfs_dqlock(dqp);
1100                 }
1101
1102                 /*
1103                  * Now, find out for sure if this dquot is dirty or not.
1104                  */
1105                 if (! XFS_DQ_IS_DIRTY(dqp)) {
1106                         xfs_dqunlock(dqp);
1107                         continue;
1108                 }
1109
1110                 /* XXX a sentinel would be better */
1111                 recl = XFS_QI_MPLRECLAIMS(mp);
1112                 if (! xfs_qm_dqflock_nowait(dqp)) {
1113                         if (nowait) {
1114                                 xfs_dqunlock(dqp);
1115                                 continue;
1116                         }
1117                         /*
1118                          * If we can't grab the flush lock then if the caller
1119                          * really wanted us to give this our best shot,
1120                          * see if we can give a push to the buffer before we wait
1121                          * on the flush lock. At this point, we know that
1122                          * eventhough the dquot is being flushed,
1123                          * it has (new) dirty data.
1124                          */
1125                         xfs_qm_dqflock_pushbuf_wait(dqp);
1126                 }
1127                 /*
1128                  * Let go of the mplist lock. We don't want to hold it
1129                  * across a disk write
1130                  */
1131                 flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
1132                 xfs_qm_mplist_unlock(mp);
1133                 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
1134                 error = xfs_qm_dqflush(dqp, flush_flags);
1135                 xfs_dqunlock(dqp);
1136                 if (error && XFS_FORCED_SHUTDOWN(mp))
1137                         return(0);      /* Need to prevent umount failure */
1138                 else if (error)
1139                         return (error);
1140
1141                 xfs_qm_mplist_lock(mp);
1142                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
1143                         if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1144                                 break;
1145
1146                         xfs_qm_mplist_unlock(mp);
1147                         goto again;
1148                 }
1149         }
1150
1151         xfs_qm_mplist_unlock(mp);
1152         return (0);
1153 }
1154
1155
1156 /*
1157  * This initializes all the quota information that's kept in the
1158  * mount structure
1159  */
1160 STATIC int
1161 xfs_qm_init_quotainfo(
1162         xfs_mount_t     *mp)
1163 {
1164         xfs_quotainfo_t *qinf;
1165         int             error;
1166         xfs_dquot_t     *dqp;
1167
1168         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1169
1170         /*
1171          * Tell XQM that we exist as soon as possible.
1172          */
1173         if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1174                 return (error);
1175         }
1176
1177         qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1178
1179         /*
1180          * See if quotainodes are setup, and if not, allocate them,
1181          * and change the superblock accordingly.
1182          */
1183         if ((error = xfs_qm_init_quotainos(mp))) {
1184                 kmem_free(qinf, sizeof(xfs_quotainfo_t));
1185                 mp->m_quotainfo = NULL;
1186                 return (error);
1187         }
1188
1189         spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
1190         xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1191         qinf->qi_dqreclaims = 0;
1192
1193         /* mutex used to serialize quotaoffs */
1194         mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff");
1195
1196         /* Precalc some constants */
1197         qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1198         ASSERT(qinf->qi_dqchunklen);
1199         qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1200         do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1201
1202         mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1203
1204         /*
1205          * We try to get the limits from the superuser's limits fields.
1206          * This is quite hacky, but it is standard quota practice.
1207          * We look at the USR dquot with id == 0 first, but if user quotas
1208          * are not enabled we goto the GRP dquot with id == 0.
1209          * We don't really care to keep separate default limits for user
1210          * and group quotas, at least not at this point.
1211          */
1212         error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1213                              XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
1214                              (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1215                                 XFS_DQ_PROJ),
1216                              XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1217                              &dqp);
1218         if (! error) {
1219                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1220
1221                 /*
1222                  * The warnings and timers set the grace period given to
1223                  * a user or group before he or she can not perform any
1224                  * more writing. If it is zero, a default is used.
1225                  */
1226                 qinf->qi_btimelimit =
1227                                 INT_GET(ddqp->d_btimer, ARCH_CONVERT) ?
1228                                 INT_GET(ddqp->d_btimer, ARCH_CONVERT) :
1229                                 XFS_QM_BTIMELIMIT;
1230                 qinf->qi_itimelimit =
1231                                 INT_GET(ddqp->d_itimer, ARCH_CONVERT) ?
1232                                 INT_GET(ddqp->d_itimer, ARCH_CONVERT) :
1233                                 XFS_QM_ITIMELIMIT;
1234                 qinf->qi_rtbtimelimit =
1235                                 INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) ?
1236                                 INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) :
1237                                 XFS_QM_RTBTIMELIMIT;
1238                 qinf->qi_bwarnlimit =
1239                                 INT_GET(ddqp->d_bwarns, ARCH_CONVERT) ?
1240                                 INT_GET(ddqp->d_bwarns, ARCH_CONVERT) :
1241                                 XFS_QM_BWARNLIMIT;
1242                 qinf->qi_iwarnlimit =
1243                                 INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ?
1244                                 INT_GET(ddqp->d_iwarns, ARCH_CONVERT) :
1245                                 XFS_QM_IWARNLIMIT;
1246                 qinf->qi_rtbwarnlimit =
1247                                 INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) ?
1248                                 INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) :
1249                                 XFS_QM_RTBWARNLIMIT;
1250                 qinf->qi_bhardlimit =
1251                                 INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT);
1252                 qinf->qi_bsoftlimit =
1253                                 INT_GET(ddqp->d_blk_softlimit, ARCH_CONVERT);
1254                 qinf->qi_ihardlimit =
1255                                 INT_GET(ddqp->d_ino_hardlimit, ARCH_CONVERT);
1256                 qinf->qi_isoftlimit =
1257                                 INT_GET(ddqp->d_ino_softlimit, ARCH_CONVERT);
1258                 qinf->qi_rtbhardlimit =
1259                                 INT_GET(ddqp->d_rtb_hardlimit, ARCH_CONVERT);
1260                 qinf->qi_rtbsoftlimit =
1261                                 INT_GET(ddqp->d_rtb_softlimit, ARCH_CONVERT);
1262  
1263                 /*
1264                  * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1265                  * we don't want this dquot cached. We haven't done a
1266                  * quotacheck yet, and quotacheck doesn't like incore dquots.
1267                  */
1268                 xfs_qm_dqdestroy(dqp);
1269         } else {
1270                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1271                 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1272                 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1273                 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1274                 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1275                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1276         }
1277
1278         return (0);
1279 }
1280
1281
1282 /*
1283  * Gets called when unmounting a filesystem or when all quotas get
1284  * turned off.
1285  * This purges the quota inodes, destroys locks and frees itself.
1286  */
1287 void
1288 xfs_qm_destroy_quotainfo(
1289         xfs_mount_t     *mp)
1290 {
1291         xfs_quotainfo_t *qi;
1292
1293         qi = mp->m_quotainfo;
1294         ASSERT(qi != NULL);
1295         ASSERT(xfs_Gqm != NULL);
1296
1297         /*
1298          * Release the reference that XQM kept, so that we know
1299          * when the XQM structure should be freed. We cannot assume
1300          * that xfs_Gqm is non-null after this point.
1301          */
1302         xfs_qm_rele_quotafs_ref(mp);
1303
1304         spinlock_destroy(&qi->qi_pinlock);
1305         xfs_qm_list_destroy(&qi->qi_dqlist);
1306
1307         if (qi->qi_uquotaip) {
1308                 XFS_PURGE_INODE(qi->qi_uquotaip);
1309                 qi->qi_uquotaip = NULL; /* paranoia */
1310         }
1311         if (qi->qi_gquotaip) {
1312                 XFS_PURGE_INODE(qi->qi_gquotaip);
1313                 qi->qi_gquotaip = NULL;
1314         }
1315         mutex_destroy(&qi->qi_quotaofflock);
1316         kmem_free(qi, sizeof(xfs_quotainfo_t));
1317         mp->m_quotainfo = NULL;
1318 }
1319
1320
1321
1322 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1323
1324 /* ARGSUSED */
1325 STATIC void
1326 xfs_qm_list_init(
1327         xfs_dqlist_t    *list,
1328         char            *str,
1329         int             n)
1330 {
1331         mutex_init(&list->qh_lock, MUTEX_DEFAULT, str);
1332         list->qh_next = NULL;
1333         list->qh_version = 0;
1334         list->qh_nelems = 0;
1335 }
1336
1337 STATIC void
1338 xfs_qm_list_destroy(
1339         xfs_dqlist_t    *list)
1340 {
1341         mutex_destroy(&(list->qh_lock));
1342 }
1343
1344
1345 /*
1346  * Stripped down version of dqattach. This doesn't attach, or even look at the
1347  * dquots attached to the inode. The rationale is that there won't be any
1348  * attached at the time this is called from quotacheck.
1349  */
1350 STATIC int
1351 xfs_qm_dqget_noattach(
1352         xfs_inode_t     *ip,
1353         xfs_dquot_t     **O_udqpp,
1354         xfs_dquot_t     **O_gdqpp)
1355 {
1356         int             error;
1357         xfs_mount_t     *mp;
1358         xfs_dquot_t     *udqp, *gdqp;
1359
1360         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
1361         mp = ip->i_mount;
1362         udqp = NULL;
1363         gdqp = NULL;
1364
1365         if (XFS_IS_UQUOTA_ON(mp)) {
1366                 ASSERT(ip->i_udquot == NULL);
1367                 /*
1368                  * We want the dquot allocated if it doesn't exist.
1369                  */
1370                 if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1371                                          XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1372                                          &udqp))) {
1373                         /*
1374                          * Shouldn't be able to turn off quotas here.
1375                          */
1376                         ASSERT(error != ESRCH);
1377                         ASSERT(error != ENOENT);
1378                         return (error);
1379                 }
1380                 ASSERT(udqp);
1381         }
1382
1383         if (XFS_IS_OQUOTA_ON(mp)) {
1384                 ASSERT(ip->i_gdquot == NULL);
1385                 if (udqp)
1386                         xfs_dqunlock(udqp);
1387                 error = XFS_IS_GQUOTA_ON(mp) ?
1388                                 xfs_qm_dqget(mp, ip,
1389                                              ip->i_d.di_gid, XFS_DQ_GROUP,
1390                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1391                                              &gdqp) :
1392                                 xfs_qm_dqget(mp, ip,
1393                                              ip->i_d.di_projid, XFS_DQ_PROJ,
1394                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1395                                              &gdqp);
1396                 if (error) {
1397                         if (udqp)
1398                                 xfs_qm_dqrele(udqp);
1399                         ASSERT(error != ESRCH);
1400                         ASSERT(error != ENOENT);
1401                         return (error);
1402                 }
1403                 ASSERT(gdqp);
1404
1405                 /* Reacquire the locks in the right order */
1406                 if (udqp) {
1407                         if (! xfs_qm_dqlock_nowait(udqp)) {
1408                                 xfs_dqunlock(gdqp);
1409                                 xfs_dqlock(udqp);
1410                                 xfs_dqlock(gdqp);
1411                         }
1412                 }
1413         }
1414
1415         *O_udqpp = udqp;
1416         *O_gdqpp = gdqp;
1417
1418 #ifdef QUOTADEBUG
1419         if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1420         if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1421 #endif
1422         return (0);
1423 }
1424
1425 /*
1426  * Create an inode and return with a reference already taken, but unlocked
1427  * This is how we create quota inodes
1428  */
1429 STATIC int
1430 xfs_qm_qino_alloc(
1431         xfs_mount_t     *mp,
1432         xfs_inode_t     **ip,
1433         __int64_t       sbfields,
1434         uint            flags)
1435 {
1436         xfs_trans_t     *tp;
1437         int             error;
1438         unsigned long s;
1439         cred_t          zerocr;
1440         int             committed;
1441
1442         tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
1443         if ((error = xfs_trans_reserve(tp,
1444                                       XFS_QM_QINOCREATE_SPACE_RES(mp),
1445                                       XFS_CREATE_LOG_RES(mp), 0,
1446                                       XFS_TRANS_PERM_LOG_RES,
1447                                       XFS_CREATE_LOG_COUNT))) {
1448                 xfs_trans_cancel(tp, 0);
1449                 return (error);
1450         }
1451         memset(&zerocr, 0, sizeof(zerocr));
1452
1453         if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0,
1454                                    &zerocr, 0, 1, ip, &committed))) {
1455                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1456                                  XFS_TRANS_ABORT);
1457                 return (error);
1458         }
1459
1460         /*
1461          * Keep an extra reference to this quota inode. This inode is
1462          * locked exclusively and joined to the transaction already.
1463          */
1464         ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
1465         VN_HOLD(XFS_ITOV((*ip)));
1466
1467         /*
1468          * Make the changes in the superblock, and log those too.
1469          * sbfields arg may contain fields other than *QUOTINO;
1470          * VERSIONNUM for example.
1471          */
1472         s = XFS_SB_LOCK(mp);
1473         if (flags & XFS_QMOPT_SBVERSION) {
1474 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1475                 unsigned oldv = mp->m_sb.sb_versionnum;
1476 #endif
1477                 ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
1478                 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1479                                    XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1480                        (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1481                         XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1482
1483                 XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
1484                 mp->m_sb.sb_uquotino = NULLFSINO;
1485                 mp->m_sb.sb_gquotino = NULLFSINO;
1486
1487                 /* qflags will get updated _after_ quotacheck */
1488                 mp->m_sb.sb_qflags = 0;
1489 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1490                 cmn_err(CE_NOTE,
1491                         "Old superblock version %x, converting to %x.",
1492                         oldv, mp->m_sb.sb_versionnum);
1493 #endif
1494         }
1495         if (flags & XFS_QMOPT_UQUOTA)
1496                 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1497         else
1498                 mp->m_sb.sb_gquotino = (*ip)->i_ino;
1499         XFS_SB_UNLOCK(mp, s);
1500         xfs_mod_sb(tp, sbfields);
1501
1502         if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
1503                                      NULL))) {
1504                 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1505                 return (error);
1506         }
1507         return (0);
1508 }
1509
1510
1511 STATIC int
1512 xfs_qm_reset_dqcounts(
1513         xfs_mount_t     *mp,
1514         xfs_buf_t       *bp,
1515         xfs_dqid_t      id,
1516         uint            type)
1517 {
1518         xfs_disk_dquot_t        *ddq;
1519         int                     j;
1520
1521         xfs_buftrace("RESET DQUOTS", bp);
1522         /*
1523          * Reset all counters and timers. They'll be
1524          * started afresh by xfs_qm_quotacheck.
1525          */
1526 #ifdef DEBUG
1527         j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1528         do_div(j, sizeof(xfs_dqblk_t));
1529         ASSERT(XFS_QM_DQPERBLK(mp) == j);
1530 #endif
1531         ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1532         for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1533                 /*
1534                  * Do a sanity check, and if needed, repair the dqblk. Don't
1535                  * output any warnings because it's perfectly possible to
1536                  * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
1537                  */
1538                 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1539                                       "xfs_quotacheck");
1540                 INT_SET(ddq->d_bcount, ARCH_CONVERT, 0ULL);
1541                 INT_SET(ddq->d_icount, ARCH_CONVERT, 0ULL);
1542                 INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
1543                 INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
1544                 INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
1545                 INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, (time_t)0);
1546                 INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
1547                 INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
1548                 INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, 0UL);
1549                 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1550         }
1551
1552         return (0);
1553 }
1554
1555 STATIC int
1556 xfs_qm_dqiter_bufs(
1557         xfs_mount_t     *mp,
1558         xfs_dqid_t      firstid,
1559         xfs_fsblock_t   bno,
1560         xfs_filblks_t   blkcnt,
1561         uint            flags)
1562 {
1563         xfs_buf_t       *bp;
1564         int             error;
1565         int             notcommitted;
1566         int             incr;
1567         int             type;
1568
1569         ASSERT(blkcnt > 0);
1570         notcommitted = 0;
1571         incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1572                 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1573         type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1574                 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1575         error = 0;
1576
1577         /*
1578          * Blkcnt arg can be a very big number, and might even be
1579          * larger than the log itself. So, we have to break it up into
1580          * manageable-sized transactions.
1581          * Note that we don't start a permanent transaction here; we might
1582          * not be able to get a log reservation for the whole thing up front,
1583          * and we don't really care to either, because we just discard
1584          * everything if we were to crash in the middle of this loop.
1585          */
1586         while (blkcnt--) {
1587                 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1588                               XFS_FSB_TO_DADDR(mp, bno),
1589                               (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1590                 if (error)
1591                         break;
1592
1593                 (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1594                 xfs_bdwrite(mp, bp);
1595                 /*
1596                  * goto the next block.
1597                  */
1598                 bno++;
1599                 firstid += XFS_QM_DQPERBLK(mp);
1600         }
1601         return (error);
1602 }
1603
1604 /*
1605  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1606  * caller supplied function for every chunk of dquots that we find.
1607  */
1608 STATIC int
1609 xfs_qm_dqiterate(
1610         xfs_mount_t     *mp,
1611         xfs_inode_t     *qip,
1612         uint            flags)
1613 {
1614         xfs_bmbt_irec_t         *map;
1615         int                     i, nmaps;       /* number of map entries */
1616         int                     error;          /* return value */
1617         xfs_fileoff_t           lblkno;
1618         xfs_filblks_t           maxlblkcnt;
1619         xfs_dqid_t              firstid;
1620         xfs_fsblock_t           rablkno;
1621         xfs_filblks_t           rablkcnt;
1622
1623         error = 0;
1624         /*
1625          * This looks racey, but we can't keep an inode lock across a
1626          * trans_reserve. But, this gets called during quotacheck, and that
1627          * happens only at mount time which is single threaded.
1628          */
1629         if (qip->i_d.di_nblocks == 0)
1630                 return (0);
1631
1632         map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1633
1634         lblkno = 0;
1635         maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1636         do {
1637                 nmaps = XFS_DQITER_MAP_SIZE;
1638                 /*
1639                  * We aren't changing the inode itself. Just changing
1640                  * some of its data. No new blocks are added here, and
1641                  * the inode is never added to the transaction.
1642                  */
1643                 xfs_ilock(qip, XFS_ILOCK_SHARED);
1644                 error = xfs_bmapi(NULL, qip, lblkno,
1645                                   maxlblkcnt - lblkno,
1646                                   XFS_BMAPI_METADATA,
1647                                   NULL,
1648                                   0, map, &nmaps, NULL);
1649                 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1650                 if (error)
1651                         break;
1652
1653                 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1654                 for (i = 0; i < nmaps; i++) {
1655                         ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1656                         ASSERT(map[i].br_blockcount);
1657
1658
1659                         lblkno += map[i].br_blockcount;
1660
1661                         if (map[i].br_startblock == HOLESTARTBLOCK)
1662                                 continue;
1663
1664                         firstid = (xfs_dqid_t) map[i].br_startoff *
1665                                 XFS_QM_DQPERBLK(mp);
1666                         /*
1667                          * Do a read-ahead on the next extent.
1668                          */
1669                         if ((i+1 < nmaps) &&
1670                             (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1671                                 rablkcnt =  map[i+1].br_blockcount;
1672                                 rablkno = map[i+1].br_startblock;
1673                                 while (rablkcnt--) {
1674                                         xfs_baread(mp->m_ddev_targp,
1675                                                XFS_FSB_TO_DADDR(mp, rablkno),
1676                                                (int)XFS_QI_DQCHUNKLEN(mp));
1677                                         rablkno++;
1678                                 }
1679                         }
1680                         /*
1681                          * Iterate thru all the blks in the extent and
1682                          * reset the counters of all the dquots inside them.
1683                          */
1684                         if ((error = xfs_qm_dqiter_bufs(mp,
1685                                                        firstid,
1686                                                        map[i].br_startblock,
1687                                                        map[i].br_blockcount,
1688                                                        flags))) {
1689                                 break;
1690                         }
1691                 }
1692
1693                 if (error)
1694                         break;
1695         } while (nmaps > 0);
1696
1697         kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
1698
1699         return (error);
1700 }
1701
1702 /*
1703  * Called by dqusage_adjust in doing a quotacheck.
1704  * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1705  * this updates its incore copy as well as the buffer copy. This is
1706  * so that once the quotacheck is done, we can just log all the buffers,
1707  * as opposed to logging numerous updates to individual dquots.
1708  */
1709 STATIC void
1710 xfs_qm_quotacheck_dqadjust(
1711         xfs_dquot_t             *dqp,
1712         xfs_qcnt_t              nblks,
1713         xfs_qcnt_t              rtblks)
1714 {
1715         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1716         xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
1717         /*
1718          * Adjust the inode count and the block count to reflect this inode's
1719          * resource usage.
1720          */
1721         INT_MOD(dqp->q_core.d_icount, ARCH_CONVERT, +1);
1722         dqp->q_res_icount++;
1723         if (nblks) {
1724                 INT_MOD(dqp->q_core.d_bcount, ARCH_CONVERT, nblks);
1725                 dqp->q_res_bcount += nblks;
1726         }
1727         if (rtblks) {
1728                 INT_MOD(dqp->q_core.d_rtbcount, ARCH_CONVERT, rtblks);
1729                 dqp->q_res_rtbcount += rtblks;
1730         }
1731
1732         /*
1733          * Set default limits, adjust timers (since we changed usages)
1734          */
1735         if (! XFS_IS_SUSER_DQUOT(dqp)) {
1736                 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1737                 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1738         }
1739
1740         dqp->dq_flags |= XFS_DQ_DIRTY;
1741 }
1742
1743 STATIC int
1744 xfs_qm_get_rtblks(
1745         xfs_inode_t     *ip,
1746         xfs_qcnt_t      *O_rtblks)
1747 {
1748         xfs_filblks_t   rtblks;                 /* total rt blks */
1749         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1750         xfs_extnum_t    nextents;               /* number of extent entries */
1751         xfs_bmbt_rec_t  *base;                  /* base of extent array */
1752         xfs_bmbt_rec_t  *ep;                    /* pointer to an extent entry */
1753         int             error;
1754
1755         ASSERT(XFS_IS_REALTIME_INODE(ip));
1756         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1757         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1758                 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1759                         return (error);
1760         }
1761         rtblks = 0;
1762         nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1763         base = &ifp->if_u1.if_extents[0];
1764         for (ep = base; ep < &base[nextents]; ep++)
1765                 rtblks += xfs_bmbt_get_blockcount(ep);
1766         *O_rtblks = (xfs_qcnt_t)rtblks;
1767         return (0);
1768 }
1769
1770 /*
1771  * callback routine supplied to bulkstat(). Given an inumber, find its
1772  * dquots and update them to account for resources taken by that inode.
1773  */
1774 /* ARGSUSED */
1775 STATIC int
1776 xfs_qm_dqusage_adjust(
1777         xfs_mount_t     *mp,            /* mount point for filesystem */
1778         xfs_ino_t       ino,            /* inode number to get data for */
1779         void            __user *buffer, /* not used */
1780         int             ubsize,         /* not used */
1781         void            *private_data,  /* not used */
1782         xfs_daddr_t     bno,            /* starting block of inode cluster */
1783         int             *ubused,        /* not used */
1784         void            *dip,           /* on-disk inode pointer (not used) */
1785         int             *res)           /* result code value */
1786 {
1787         xfs_inode_t     *ip;
1788         xfs_dquot_t     *udqp, *gdqp;
1789         xfs_qcnt_t      nblks, rtblks;
1790         int             error;
1791
1792         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1793
1794         /*
1795          * rootino must have its resources accounted for, not so with the quota
1796          * inodes.
1797          */
1798         if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1799                 *res = BULKSTAT_RV_NOTHING;
1800                 return XFS_ERROR(EINVAL);
1801         }
1802
1803         /*
1804          * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1805          * interface expects the inode to be exclusively locked because that's
1806          * the case in all other instances. It's OK that we do this because
1807          * quotacheck is done only at mount time.
1808          */
1809         if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1810                 *res = BULKSTAT_RV_NOTHING;
1811                 return (error);
1812         }
1813
1814         if (ip->i_d.di_mode == 0) {
1815                 xfs_iput_new(ip, XFS_ILOCK_EXCL);
1816                 *res = BULKSTAT_RV_NOTHING;
1817                 return XFS_ERROR(ENOENT);
1818         }
1819
1820         /*
1821          * Obtain the locked dquots. In case of an error (eg. allocation
1822          * fails for ENOSPC), we return the negative of the error number
1823          * to bulkstat, so that it can get propagated to quotacheck() and
1824          * making us disable quotas for the file system.
1825          */
1826         if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1827                 xfs_iput(ip, XFS_ILOCK_EXCL);
1828                 *res = BULKSTAT_RV_GIVEUP;
1829                 return (error);
1830         }
1831
1832         rtblks = 0;
1833         if (! XFS_IS_REALTIME_INODE(ip)) {
1834                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1835         } else {
1836                 /*
1837                  * Walk thru the extent list and count the realtime blocks.
1838                  */
1839                 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1840                         xfs_iput(ip, XFS_ILOCK_EXCL);
1841                         if (udqp)
1842                                 xfs_qm_dqput(udqp);
1843                         if (gdqp)
1844                                 xfs_qm_dqput(gdqp);
1845                         *res = BULKSTAT_RV_GIVEUP;
1846                         return (error);
1847                 }
1848                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1849         }
1850         ASSERT(ip->i_delayed_blks == 0);
1851
1852         /*
1853          * We can't release the inode while holding its dquot locks.
1854          * The inode can go into inactive and might try to acquire the dquotlocks.
1855          * So, just unlock here and do a vn_rele at the end.
1856          */
1857         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1858
1859         /*
1860          * Add the (disk blocks and inode) resources occupied by this
1861          * inode to its dquots. We do this adjustment in the incore dquot,
1862          * and also copy the changes to its buffer.
1863          * We don't care about putting these changes in a transaction
1864          * envelope because if we crash in the middle of a 'quotacheck'
1865          * we have to start from the beginning anyway.
1866          * Once we're done, we'll log all the dquot bufs.
1867          *
1868          * The *QUOTA_ON checks below may look pretty racey, but quotachecks
1869          * and quotaoffs don't race. (Quotachecks happen at mount time only).
1870          */
1871         if (XFS_IS_UQUOTA_ON(mp)) {
1872                 ASSERT(udqp);
1873                 xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1874                 xfs_qm_dqput(udqp);
1875         }
1876         if (XFS_IS_OQUOTA_ON(mp)) {
1877                 ASSERT(gdqp);
1878                 xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1879                 xfs_qm_dqput(gdqp);
1880         }
1881         /*
1882          * Now release the inode. This will send it to 'inactive', and
1883          * possibly even free blocks.
1884          */
1885         VN_RELE(XFS_ITOV(ip));
1886
1887         /*
1888          * Goto next inode.
1889          */
1890         *res = BULKSTAT_RV_DIDONE;
1891         return (0);
1892 }
1893
1894 /*
1895  * Walk thru all the filesystem inodes and construct a consistent view
1896  * of the disk quota world. If the quotacheck fails, disable quotas.
1897  */
1898 int
1899 xfs_qm_quotacheck(
1900         xfs_mount_t     *mp)
1901 {
1902         int             done, count, error;
1903         xfs_ino_t       lastino;
1904         size_t          structsz;
1905         xfs_inode_t     *uip, *gip;
1906         uint            flags;
1907
1908         count = INT_MAX;
1909         structsz = 1;
1910         lastino = 0;
1911         flags = 0;
1912
1913         ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1914         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1915
1916         /*
1917          * There should be no cached dquots. The (simplistic) quotacheck
1918          * algorithm doesn't like that.
1919          */
1920         ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
1921
1922         cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1923
1924         /*
1925          * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1926          * their counters to zero. We need a clean slate.
1927          * We don't log our changes till later.
1928          */
1929         if ((uip = XFS_QI_UQIP(mp))) {
1930                 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1931                         goto error_return;
1932                 flags |= XFS_UQUOTA_CHKD;
1933         }
1934
1935         if ((gip = XFS_QI_GQIP(mp))) {
1936                 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1937                                         XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1938                         goto error_return;
1939                 flags |= XFS_OQUOTA_CHKD;
1940         }
1941
1942         do {
1943                 /*
1944                  * Iterate thru all the inodes in the file system,
1945                  * adjusting the corresponding dquot counters in core.
1946                  */
1947                 if ((error = xfs_bulkstat(mp, &lastino, &count,
1948                                      xfs_qm_dqusage_adjust, NULL,
1949                                      structsz, NULL,
1950                                      BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
1951                                      &done)))
1952                         break;
1953
1954         } while (! done);
1955
1956         /*
1957          * We can get this error if we couldn't do a dquot allocation inside
1958          * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1959          * dirty dquots that might be cached, we just want to get rid of them
1960          * and turn quotaoff. The dquots won't be attached to any of the inodes
1961          * at this point (because we intentionally didn't in dqget_noattach).
1962          */
1963         if (error) {
1964                 xfs_qm_dqpurge_all(mp,
1965                                    XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
1966                                    XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
1967                 goto error_return;
1968         }
1969         /*
1970          * We've made all the changes that we need to make incore.
1971          * Now flush_them down to disk buffers.
1972          */
1973         xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1974
1975         /*
1976          * We didn't log anything, because if we crashed, we'll have to
1977          * start the quotacheck from scratch anyway. However, we must make
1978          * sure that our dquot changes are secure before we put the
1979          * quotacheck'd stamp on the superblock. So, here we do a synchronous
1980          * flush.
1981          */
1982         XFS_bflush(mp->m_ddev_targp);
1983
1984         /*
1985          * If one type of quotas is off, then it will lose its
1986          * quotachecked status, since we won't be doing accounting for
1987          * that type anymore.
1988          */
1989         mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1990         mp->m_qflags |= flags;
1991
1992         XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
1993
1994  error_return:
1995         if (error) {
1996                 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1997                         "Disabling quotas.",
1998                         mp->m_fsname, error);
1999                 /*
2000                  * We must turn off quotas.
2001                  */
2002                 ASSERT(mp->m_quotainfo != NULL);
2003                 ASSERT(xfs_Gqm != NULL);
2004                 xfs_qm_destroy_quotainfo(mp);
2005                 xfs_mount_reset_sbqflags(mp);
2006         } else {
2007                 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
2008         }
2009         return (error);
2010 }
2011
2012 /*
2013  * This is called after the superblock has been read in and we're ready to
2014  * iget the quota inodes.
2015  */
2016 STATIC int
2017 xfs_qm_init_quotainos(
2018         xfs_mount_t     *mp)
2019 {
2020         xfs_inode_t     *uip, *gip;
2021         int             error;
2022         __int64_t       sbflags;
2023         uint            flags;
2024
2025         ASSERT(mp->m_quotainfo);
2026         uip = gip = NULL;
2027         sbflags = 0;
2028         flags = 0;
2029
2030         /*
2031          * Get the uquota and gquota inodes
2032          */
2033         if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
2034                 if (XFS_IS_UQUOTA_ON(mp) &&
2035                     mp->m_sb.sb_uquotino != NULLFSINO) {
2036                         ASSERT(mp->m_sb.sb_uquotino > 0);
2037                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
2038                                              0, 0, &uip, 0)))
2039                                 return XFS_ERROR(error);
2040                 }
2041                 if (XFS_IS_OQUOTA_ON(mp) &&
2042                     mp->m_sb.sb_gquotino != NULLFSINO) {
2043                         ASSERT(mp->m_sb.sb_gquotino > 0);
2044                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
2045                                              0, 0, &gip, 0))) {
2046                                 if (uip)
2047                                         VN_RELE(XFS_ITOV(uip));
2048                                 return XFS_ERROR(error);
2049                         }
2050                 }
2051         } else {
2052                 flags |= XFS_QMOPT_SBVERSION;
2053                 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
2054                             XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
2055         }
2056
2057         /*
2058          * Create the two inodes, if they don't exist already. The changes
2059          * made above will get added to a transaction and logged in one of
2060          * the qino_alloc calls below.  If the device is readonly,
2061          * temporarily switch to read-write to do this.
2062          */
2063         if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
2064                 if ((error = xfs_qm_qino_alloc(mp, &uip,
2065                                               sbflags | XFS_SB_UQUOTINO,
2066                                               flags | XFS_QMOPT_UQUOTA)))
2067                         return XFS_ERROR(error);
2068
2069                 flags &= ~XFS_QMOPT_SBVERSION;
2070         }
2071         if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
2072                 flags |= (XFS_IS_GQUOTA_ON(mp) ?
2073                                 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
2074                 error = xfs_qm_qino_alloc(mp, &gip,
2075                                           sbflags | XFS_SB_GQUOTINO, flags);
2076                 if (error) {
2077                         if (uip)
2078                                 VN_RELE(XFS_ITOV(uip));
2079
2080                         return XFS_ERROR(error);
2081                 }
2082         }
2083
2084         XFS_QI_UQIP(mp) = uip;
2085         XFS_QI_GQIP(mp) = gip;
2086
2087         return (0);
2088 }
2089
2090
2091 /*
2092  * Traverse the freelist of dquots and attempt to reclaim a maximum of
2093  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
2094  * favor the lookup function ...
2095  * XXXsup merge this with qm_reclaim_one().
2096  */
2097 STATIC int
2098 xfs_qm_shake_freelist(
2099         int howmany)
2100 {
2101         int             nreclaimed;
2102         xfs_dqhash_t    *hash;
2103         xfs_dquot_t     *dqp, *nextdqp;
2104         int             restarts;
2105         int             nflushes;
2106
2107         if (howmany <= 0)
2108                 return (0);
2109
2110         nreclaimed = 0;
2111         restarts = 0;
2112         nflushes = 0;
2113
2114 #ifdef QUOTADEBUG
2115         cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
2116 #endif
2117         /* lock order is : hashchainlock, freelistlock, mplistlock */
2118  tryagain:
2119         xfs_qm_freelist_lock(xfs_Gqm);
2120
2121         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
2122              ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
2123               nreclaimed < howmany); ) {
2124                 xfs_dqlock(dqp);
2125
2126                 /*
2127                  * We are racing with dqlookup here. Naturally we don't
2128                  * want to reclaim a dquot that lookup wants.
2129                  */
2130                 if (dqp->dq_flags & XFS_DQ_WANT) {
2131                         xfs_dqunlock(dqp);
2132                         xfs_qm_freelist_unlock(xfs_Gqm);
2133                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2134                                 return (nreclaimed);
2135                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2136                         goto tryagain;
2137                 }
2138
2139                 /*
2140                  * If the dquot is inactive, we are assured that it is
2141                  * not on the mplist or the hashlist, and that makes our
2142                  * life easier.
2143                  */
2144                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2145                         ASSERT(dqp->q_mount == NULL);
2146                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2147                         ASSERT(dqp->HL_PREVP == NULL);
2148                         ASSERT(dqp->MPL_PREVP == NULL);
2149                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2150                         nextdqp = dqp->dq_flnext;
2151                         goto off_freelist;
2152                 }
2153
2154                 ASSERT(dqp->MPL_PREVP);
2155                 /*
2156                  * Try to grab the flush lock. If this dquot is in the process of
2157                  * getting flushed to disk, we don't want to reclaim it.
2158                  */
2159                 if (! xfs_qm_dqflock_nowait(dqp)) {
2160                         xfs_dqunlock(dqp);
2161                         dqp = dqp->dq_flnext;
2162                         continue;
2163                 }
2164
2165                 /*
2166                  * We have the flush lock so we know that this is not in the
2167                  * process of being flushed. So, if this is dirty, flush it
2168                  * DELWRI so that we don't get a freelist infested with
2169                  * dirty dquots.
2170                  */
2171                 if (XFS_DQ_IS_DIRTY(dqp)) {
2172                         xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
2173                         /*
2174                          * We flush it delayed write, so don't bother
2175                          * releasing the mplock.
2176                          */
2177                         (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2178                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2179                         dqp = dqp->dq_flnext;
2180                         continue;
2181                 }
2182                 /*
2183                  * We're trying to get the hashlock out of order. This races
2184                  * with dqlookup; so, we giveup and goto the next dquot if
2185                  * we couldn't get the hashlock. This way, we won't starve
2186                  * a dqlookup process that holds the hashlock that is
2187                  * waiting for the freelist lock.
2188                  */
2189                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
2190                         xfs_dqfunlock(dqp);
2191                         xfs_dqunlock(dqp);
2192                         dqp = dqp->dq_flnext;
2193                         continue;
2194                 }
2195                 /*
2196                  * This races with dquot allocation code as well as dqflush_all
2197                  * and reclaim code. So, if we failed to grab the mplist lock,
2198                  * giveup everything and start over.
2199                  */
2200                 hash = dqp->q_hash;
2201                 ASSERT(hash);
2202                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2203                         /* XXX put a sentinel so that we can come back here */
2204                         xfs_dqfunlock(dqp);
2205                         xfs_dqunlock(dqp);
2206                         XFS_DQ_HASH_UNLOCK(hash);
2207                         xfs_qm_freelist_unlock(xfs_Gqm);
2208                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2209                                 return (nreclaimed);
2210                         goto tryagain;
2211                 }
2212                 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
2213 #ifdef QUOTADEBUG
2214                 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2215                         dqp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
2216 #endif
2217                 ASSERT(dqp->q_nrefs == 0);
2218                 nextdqp = dqp->dq_flnext;
2219                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2220                 XQM_HASHLIST_REMOVE(hash, dqp);
2221                 xfs_dqfunlock(dqp);
2222                 xfs_qm_mplist_unlock(dqp->q_mount);
2223                 XFS_DQ_HASH_UNLOCK(hash);
2224
2225  off_freelist:
2226                 XQM_FREELIST_REMOVE(dqp);
2227                 xfs_dqunlock(dqp);
2228                 nreclaimed++;
2229                 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2230                 xfs_qm_dqdestroy(dqp);
2231                 dqp = nextdqp;
2232         }
2233         xfs_qm_freelist_unlock(xfs_Gqm);
2234         return (nreclaimed);
2235 }
2236
2237
2238 /*
2239  * The kmem_shake interface is invoked when memory is running low.
2240  */
2241 /* ARGSUSED */
2242 STATIC int
2243 xfs_qm_shake(int nr_to_scan, unsigned int gfp_mask)
2244 {
2245         int     ndqused, nfree, n;
2246
2247         if (!kmem_shake_allow(gfp_mask))
2248                 return (0);
2249         if (!xfs_Gqm)
2250                 return (0);
2251
2252         nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2253         /* incore dquots in all f/s's */
2254         ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2255
2256         ASSERT(ndqused >= 0);
2257
2258         if (nfree <= ndqused && nfree < ndquot)
2259                 return (0);
2260
2261         ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2262         n = nfree - ndqused - ndquot;           /* # over target */
2263
2264         return xfs_qm_shake_freelist(MAX(nfree, n));
2265 }
2266
2267
2268 /*
2269  * Just pop the least recently used dquot off the freelist and
2270  * recycle it. The returned dquot is locked.
2271  */
2272 STATIC xfs_dquot_t *
2273 xfs_qm_dqreclaim_one(void)
2274 {
2275         xfs_dquot_t     *dqpout;
2276         xfs_dquot_t     *dqp;
2277         int             restarts;
2278         int             nflushes;
2279
2280         restarts = 0;
2281         dqpout = NULL;
2282         nflushes = 0;
2283
2284         /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2285  startagain:
2286         xfs_qm_freelist_lock(xfs_Gqm);
2287
2288         FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2289                 xfs_dqlock(dqp);
2290
2291                 /*
2292                  * We are racing with dqlookup here. Naturally we don't
2293                  * want to reclaim a dquot that lookup wants. We release the
2294                  * freelist lock and start over, so that lookup will grab
2295                  * both the dquot and the freelistlock.
2296                  */
2297                 if (dqp->dq_flags & XFS_DQ_WANT) {
2298                         ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2299                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
2300                         xfs_dqunlock(dqp);
2301                         xfs_qm_freelist_unlock(xfs_Gqm);
2302                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2303                                 return (NULL);
2304                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2305                         goto startagain;
2306                 }
2307
2308                 /*
2309                  * If the dquot is inactive, we are assured that it is
2310                  * not on the mplist or the hashlist, and that makes our
2311                  * life easier.
2312                  */
2313                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2314                         ASSERT(dqp->q_mount == NULL);
2315                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2316                         ASSERT(dqp->HL_PREVP == NULL);
2317                         ASSERT(dqp->MPL_PREVP == NULL);
2318                         XQM_FREELIST_REMOVE(dqp);
2319                         xfs_dqunlock(dqp);
2320                         dqpout = dqp;
2321                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2322                         break;
2323                 }
2324
2325                 ASSERT(dqp->q_hash);
2326                 ASSERT(dqp->MPL_PREVP);
2327
2328                 /*
2329                  * Try to grab the flush lock. If this dquot is in the process of
2330                  * getting flushed to disk, we don't want to reclaim it.
2331                  */
2332                 if (! xfs_qm_dqflock_nowait(dqp)) {
2333                         xfs_dqunlock(dqp);
2334                         continue;
2335                 }
2336
2337                 /*
2338                  * We have the flush lock so we know that this is not in the
2339                  * process of being flushed. So, if this is dirty, flush it
2340                  * DELWRI so that we don't get a freelist infested with
2341                  * dirty dquots.
2342                  */
2343                 if (XFS_DQ_IS_DIRTY(dqp)) {
2344                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2345                         /*
2346                          * We flush it delayed write, so don't bother
2347                          * releasing the freelist lock.
2348                          */
2349                         (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2350                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2351                         continue;
2352                 }
2353
2354                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2355                         xfs_dqfunlock(dqp);
2356                         xfs_dqunlock(dqp);
2357                         continue;
2358                 }
2359
2360                 if (! xfs_qm_dqhashlock_nowait(dqp))
2361                         goto mplistunlock;
2362
2363                 ASSERT(dqp->q_nrefs == 0);
2364                 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2365                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2366                 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2367                 XQM_FREELIST_REMOVE(dqp);
2368                 dqpout = dqp;
2369                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
2370  mplistunlock:
2371                 xfs_qm_mplist_unlock(dqp->q_mount);
2372                 xfs_dqfunlock(dqp);
2373                 xfs_dqunlock(dqp);
2374                 if (dqpout)
2375                         break;
2376         }
2377
2378         xfs_qm_freelist_unlock(xfs_Gqm);
2379         return (dqpout);
2380 }
2381
2382
2383 /*------------------------------------------------------------------*/
2384
2385 /*
2386  * Return a new incore dquot. Depending on the number of
2387  * dquots in the system, we either allocate a new one on the kernel heap,
2388  * or reclaim a free one.
2389  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2390  * to reclaim an existing one from the freelist.
2391  */
2392 boolean_t
2393 xfs_qm_dqalloc_incore(
2394         xfs_dquot_t **O_dqpp)
2395 {
2396         xfs_dquot_t     *dqp;
2397
2398         /*
2399          * Check against high water mark to see if we want to pop
2400          * a nincompoop dquot off the freelist.
2401          */
2402         if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2403                 /*
2404                  * Try to recycle a dquot from the freelist.
2405                  */
2406                 if ((dqp = xfs_qm_dqreclaim_one())) {
2407                         XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2408                         /*
2409                          * Just zero the core here. The rest will get
2410                          * reinitialized by caller. XXX we shouldn't even
2411                          * do this zero ...
2412                          */
2413                         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2414                         *O_dqpp = dqp;
2415                         return (B_FALSE);
2416                 }
2417                 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2418         }
2419
2420         /*
2421          * Allocate a brand new dquot on the kernel heap and return it
2422          * to the caller to initialize.
2423          */
2424         ASSERT(xfs_Gqm->qm_dqzone != NULL);
2425         *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2426         atomic_inc(&xfs_Gqm->qm_totaldquots);
2427
2428         return (B_TRUE);
2429 }
2430
2431
2432 /*
2433  * Start a transaction and write the incore superblock changes to
2434  * disk. flags parameter indicates which fields have changed.
2435  */
2436 int
2437 xfs_qm_write_sb_changes(
2438         xfs_mount_t     *mp,
2439         __int64_t       flags)
2440 {
2441         xfs_trans_t     *tp;
2442         int             error;
2443
2444 #ifdef QUOTADEBUG
2445         cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2446 #endif
2447         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2448         if ((error = xfs_trans_reserve(tp, 0,
2449                                       mp->m_sb.sb_sectsize + 128, 0,
2450                                       0,
2451                                       XFS_DEFAULT_LOG_COUNT))) {
2452                 xfs_trans_cancel(tp, 0);
2453                 return (error);
2454         }
2455
2456         xfs_mod_sb(tp, flags);
2457         (void) xfs_trans_commit(tp, 0, NULL);
2458
2459         return (0);
2460 }
2461
2462
2463 /* --------------- utility functions for vnodeops ---------------- */
2464
2465
2466 /*
2467  * Given an inode, a uid and gid (from cred_t) make sure that we have
2468  * allocated relevant dquot(s) on disk, and that we won't exceed inode
2469  * quotas by creating this file.
2470  * This also attaches dquot(s) to the given inode after locking it,
2471  * and returns the dquots corresponding to the uid and/or gid.
2472  *
2473  * in   : inode (unlocked)
2474  * out  : udquot, gdquot with references taken and unlocked
2475  */
2476 int
2477 xfs_qm_vop_dqalloc(
2478         xfs_mount_t     *mp,
2479         xfs_inode_t     *ip,
2480         uid_t           uid,
2481         gid_t           gid,
2482         prid_t          prid,
2483         uint            flags,
2484         xfs_dquot_t     **O_udqpp,
2485         xfs_dquot_t     **O_gdqpp)
2486 {
2487         int             error;
2488         xfs_dquot_t     *uq, *gq;
2489         uint            lockflags;
2490
2491         if (!XFS_IS_QUOTA_ON(mp))
2492                 return 0;
2493
2494         lockflags = XFS_ILOCK_EXCL;
2495         xfs_ilock(ip, lockflags);
2496
2497         if ((flags & XFS_QMOPT_INHERIT) &&
2498             XFS_INHERIT_GID(ip, XFS_MTOVFS(mp)))
2499                 gid = ip->i_d.di_gid;
2500
2501         /*
2502          * Attach the dquot(s) to this inode, doing a dquot allocation
2503          * if necessary. The dquot(s) will not be locked.
2504          */
2505         if (XFS_NOT_DQATTACHED(mp, ip)) {
2506                 if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
2507                                             XFS_QMOPT_ILOCKED))) {
2508                         xfs_iunlock(ip, lockflags);
2509                         return (error);
2510                 }
2511         }
2512
2513         uq = gq = NULL;
2514         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2515                 if (ip->i_d.di_uid != uid) {
2516                         /*
2517                          * What we need is the dquot that has this uid, and
2518                          * if we send the inode to dqget, the uid of the inode
2519                          * takes priority over what's sent in the uid argument.
2520                          * We must unlock inode here before calling dqget if
2521                          * we're not sending the inode, because otherwise
2522                          * we'll deadlock by doing trans_reserve while
2523                          * holding ilock.
2524                          */
2525                         xfs_iunlock(ip, lockflags);
2526                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2527                                                  XFS_DQ_USER,
2528                                                  XFS_QMOPT_DQALLOC |
2529                                                  XFS_QMOPT_DOWARN,
2530                                                  &uq))) {
2531                                 ASSERT(error != ENOENT);
2532                                 return (error);
2533                         }
2534                         /*
2535                          * Get the ilock in the right order.
2536                          */
2537                         xfs_dqunlock(uq);
2538                         lockflags = XFS_ILOCK_SHARED;
2539                         xfs_ilock(ip, lockflags);
2540                 } else {
2541                         /*
2542                          * Take an extra reference, because we'll return
2543                          * this to caller
2544                          */
2545                         ASSERT(ip->i_udquot);
2546                         uq = ip->i_udquot;
2547                         xfs_dqlock(uq);
2548                         XFS_DQHOLD(uq);
2549                         xfs_dqunlock(uq);
2550                 }
2551         }
2552         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2553                 if (ip->i_d.di_gid != gid) {
2554                         xfs_iunlock(ip, lockflags);
2555                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2556                                                  XFS_DQ_GROUP,
2557                                                  XFS_QMOPT_DQALLOC |
2558                                                  XFS_QMOPT_DOWARN,
2559                                                  &gq))) {
2560                                 if (uq)
2561                                         xfs_qm_dqrele(uq);
2562                                 ASSERT(error != ENOENT);
2563                                 return (error);
2564                         }
2565                         xfs_dqunlock(gq);
2566                         lockflags = XFS_ILOCK_SHARED;
2567                         xfs_ilock(ip, lockflags);
2568                 } else {
2569                         ASSERT(ip->i_gdquot);
2570                         gq = ip->i_gdquot;
2571                         xfs_dqlock(gq);
2572                         XFS_DQHOLD(gq);
2573                         xfs_dqunlock(gq);
2574                 }
2575         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2576                 if (ip->i_d.di_projid != prid) {
2577                         xfs_iunlock(ip, lockflags);
2578                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2579                                                  XFS_DQ_PROJ,
2580                                                  XFS_QMOPT_DQALLOC |
2581                                                  XFS_QMOPT_DOWARN,
2582                                                  &gq))) {
2583                                 if (uq)
2584                                         xfs_qm_dqrele(uq);
2585                                 ASSERT(error != ENOENT);
2586                                 return (error);
2587                         }
2588                         xfs_dqunlock(gq);
2589                         lockflags = XFS_ILOCK_SHARED;
2590                         xfs_ilock(ip, lockflags);
2591                 } else {
2592                         ASSERT(ip->i_gdquot);
2593                         gq = ip->i_gdquot;
2594                         xfs_dqlock(gq);
2595                         XFS_DQHOLD(gq);
2596                         xfs_dqunlock(gq);
2597                 }
2598         }
2599         if (uq)
2600                 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
2601
2602         xfs_iunlock(ip, lockflags);
2603         if (O_udqpp)
2604                 *O_udqpp = uq;
2605         else if (uq)
2606                 xfs_qm_dqrele(uq);
2607         if (O_gdqpp)
2608                 *O_gdqpp = gq;
2609         else if (gq)
2610                 xfs_qm_dqrele(gq);
2611         return (0);
2612 }
2613
2614 /*
2615  * Actually transfer ownership, and do dquot modifications.
2616  * These were already reserved.
2617  */
2618 xfs_dquot_t *
2619 xfs_qm_vop_chown(
2620         xfs_trans_t     *tp,
2621         xfs_inode_t     *ip,
2622         xfs_dquot_t     **IO_olddq,
2623         xfs_dquot_t     *newdq)
2624 {
2625         xfs_dquot_t     *prevdq;
2626         uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2627                                  XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2628
2629         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
2630         ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2631
2632         /* old dquot */
2633         prevdq = *IO_olddq;
2634         ASSERT(prevdq);
2635         ASSERT(prevdq != newdq);
2636
2637         xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2638         xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2639
2640         /* the sparkling new dquot */
2641         xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2642         xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2643
2644         /*
2645          * Take an extra reference, because the inode
2646          * is going to keep this dquot pointer even
2647          * after the trans_commit.
2648          */
2649         xfs_dqlock(newdq);
2650         XFS_DQHOLD(newdq);
2651         xfs_dqunlock(newdq);
2652         *IO_olddq = newdq;
2653
2654         return (prevdq);
2655 }
2656
2657 /*
2658  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2659  */
2660 int
2661 xfs_qm_vop_chown_reserve(
2662         xfs_trans_t     *tp,
2663         xfs_inode_t     *ip,
2664         xfs_dquot_t     *udqp,
2665         xfs_dquot_t     *gdqp,
2666         uint            flags)
2667 {
2668         int             error;
2669         xfs_mount_t     *mp;
2670         uint            delblks, blkflags;
2671         xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2672
2673         ASSERT(XFS_ISLOCKED_INODE(ip));
2674         mp = ip->i_mount;
2675         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2676
2677         delblks = ip->i_delayed_blks;
2678         delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2679         blkflags = XFS_IS_REALTIME_INODE(ip) ?
2680                         XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2681
2682         if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2683             ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
2684                 delblksudq = udqp;
2685                 /*
2686                  * If there are delayed allocation blocks, then we have to
2687                  * unreserve those from the old dquot, and add them to the
2688                  * new dquot.
2689                  */
2690                 if (delblks) {
2691                         ASSERT(ip->i_udquot);
2692                         unresudq = ip->i_udquot;
2693                 }
2694         }
2695         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2696                 if ((XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid !=
2697                                 INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) ||
2698                     (XFS_IS_PQUOTA_ON(ip->i_mount) && ip->i_d.di_projid !=
2699                                 INT_GET(gdqp->q_core.d_id, ARCH_CONVERT))) {
2700                         delblksgdq = gdqp;
2701                         if (delblks) {
2702                                 ASSERT(ip->i_gdquot);
2703                                 unresgdq = ip->i_gdquot;
2704                         }
2705                 }
2706         }
2707
2708         if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2709                                 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2710                                 flags | blkflags)))
2711                 return (error);
2712
2713         /*
2714          * Do the delayed blks reservations/unreservations now. Since, these
2715          * are done without the help of a transaction, if a reservation fails
2716          * its previous reservations won't be automatically undone by trans
2717          * code. So, we have to do it manually here.
2718          */
2719         if (delblks) {
2720                 /*
2721                  * Do the reservations first. Unreservation can't fail.
2722                  */
2723                 ASSERT(delblksudq || delblksgdq);
2724                 ASSERT(unresudq || unresgdq);
2725                 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2726                                 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2727                                 flags | blkflags)))
2728                         return (error);
2729                 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2730                                 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2731                                 blkflags);
2732         }
2733
2734         return (0);
2735 }
2736
2737 int
2738 xfs_qm_vop_rename_dqattach(
2739         xfs_inode_t     **i_tab)
2740 {
2741         xfs_inode_t     *ip;
2742         int             i;
2743         int             error;
2744
2745         ip = i_tab[0];
2746
2747         if (! XFS_IS_QUOTA_ON(ip->i_mount))
2748                 return (0);
2749
2750         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2751                 error = xfs_qm_dqattach(ip, 0);
2752                 if (error)
2753                         return (error);
2754         }
2755         for (i = 1; (i < 4 && i_tab[i]); i++) {
2756                 /*
2757                  * Watch out for duplicate entries in the table.
2758                  */
2759                 if ((ip = i_tab[i]) != i_tab[i-1]) {
2760                         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2761                                 error = xfs_qm_dqattach(ip, 0);
2762                                 if (error)
2763                                         return (error);
2764                         }
2765                 }
2766         }
2767         return (0);
2768 }
2769
2770 void
2771 xfs_qm_vop_dqattach_and_dqmod_newinode(
2772         xfs_trans_t     *tp,
2773         xfs_inode_t     *ip,
2774         xfs_dquot_t     *udqp,
2775         xfs_dquot_t     *gdqp)
2776 {
2777         if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2778                 return;
2779
2780         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
2781         ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2782
2783         if (udqp) {
2784                 xfs_dqlock(udqp);
2785                 XFS_DQHOLD(udqp);
2786                 xfs_dqunlock(udqp);
2787                 ASSERT(ip->i_udquot == NULL);
2788                 ip->i_udquot = udqp;
2789                 ASSERT(ip->i_d.di_uid == INT_GET(udqp->q_core.d_id, ARCH_CONVERT));
2790                 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2791         }
2792         if (gdqp) {
2793                 xfs_dqlock(gdqp);
2794                 XFS_DQHOLD(gdqp);
2795                 xfs_dqunlock(gdqp);
2796                 ASSERT(ip->i_gdquot == NULL);
2797                 ip->i_gdquot = gdqp;
2798                 ASSERT(ip->i_d.di_gid == INT_GET(gdqp->q_core.d_id, ARCH_CONVERT));
2799                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2800         }
2801 }
2802
2803 /* ------------- list stuff -----------------*/
2804 STATIC void
2805 xfs_qm_freelist_init(xfs_frlist_t *ql)
2806 {
2807         ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2808         mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf");
2809         ql->qh_version = 0;
2810         ql->qh_nelems = 0;
2811 }
2812
2813 STATIC void
2814 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2815 {
2816         xfs_dquot_t     *dqp, *nextdqp;
2817
2818         mutex_lock(&ql->qh_lock, PINOD);
2819         for (dqp = ql->qh_next;
2820              dqp != (xfs_dquot_t *)ql; ) {
2821                 xfs_dqlock(dqp);
2822                 nextdqp = dqp->dq_flnext;
2823 #ifdef QUOTADEBUG
2824                 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2825 #endif
2826                 XQM_FREELIST_REMOVE(dqp);
2827                 xfs_dqunlock(dqp);
2828                 xfs_qm_dqdestroy(dqp);
2829                 dqp = nextdqp;
2830         }
2831         /*
2832          * Don't bother about unlocking.
2833          */
2834         mutex_destroy(&ql->qh_lock);
2835
2836         ASSERT(ql->qh_nelems == 0);
2837 }
2838
2839 STATIC void
2840 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2841 {
2842         dq->dq_flnext = ql->qh_next;
2843         dq->dq_flprev = (xfs_dquot_t *)ql;
2844         ql->qh_next = dq;
2845         dq->dq_flnext->dq_flprev = dq;
2846         xfs_Gqm->qm_dqfreelist.qh_nelems++;
2847         xfs_Gqm->qm_dqfreelist.qh_version++;
2848 }
2849
2850 void
2851 xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2852 {
2853         xfs_dquot_t *next = dq->dq_flnext;
2854         xfs_dquot_t *prev = dq->dq_flprev;
2855
2856         next->dq_flprev = prev;
2857         prev->dq_flnext = next;
2858         dq->dq_flnext = dq->dq_flprev = dq;
2859         xfs_Gqm->qm_dqfreelist.qh_nelems--;
2860         xfs_Gqm->qm_dqfreelist.qh_version++;
2861 }
2862
2863 void
2864 xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2865 {
2866         xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2867 }
2868
2869 STATIC int
2870 xfs_qm_dqhashlock_nowait(
2871         xfs_dquot_t *dqp)
2872 {
2873         int locked;
2874
2875         locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
2876         return (locked);
2877 }
2878
2879 int
2880 xfs_qm_freelist_lock_nowait(
2881         xfs_qm_t *xqm)
2882 {
2883         int locked;
2884
2885         locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
2886         return (locked);
2887 }
2888
2889 STATIC int
2890 xfs_qm_mplist_nowait(
2891         xfs_mount_t     *mp)
2892 {
2893         int locked;
2894
2895         ASSERT(mp->m_quotainfo);
2896         locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
2897         return (locked);
2898 }