dquot: cleanup space allocation / freeing routines
[safe/jmp/linux-2.6] / fs / quota / dquot.c
index 70f36c0..baf202c 100644 (file)
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/writeback.h> /* for inode_lock, oddly enough.. */
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-#include <net/netlink.h>
-#include <net/genetlink.h>
-#endif
 
 #include <asm/uaccess.h>
 
  *
  * Any operation working on dquots via inode pointers must hold dqptr_sem.  If
  * operation is just reading pointers from inode (or not using them at all) the
- * read lock is enough. If pointers are altered function must hold write lock
- * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
- * for altering the flag i_mutex is also needed).
+ * read lock is enough. If pointers are altered function must hold write lock.
+ * Special care needs to be taken about S_NOQUOTA inode flag (marking that
+ * inode is a quota file). Functions adding pointers from inode to dquots have
+ * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
+ * have to do all pointer modifications before dropping dqptr_sem. This makes
+ * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
+ * then drops all pointers to dquots from an inode.
  *
  * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
  * from inodes (dquot_alloc_space() and such don't check the dq_lock).
@@ -229,6 +229,8 @@ static struct hlist_head *dquot_hash;
 struct dqstats dqstats;
 EXPORT_SYMBOL(dqstats);
 
+static qsize_t inode_get_rsv_space(struct inode *inode);
+
 static inline unsigned int
 hashfn(const struct super_block *sb, unsigned int id, int type)
 {
@@ -327,6 +329,30 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
 }
 EXPORT_SYMBOL(dquot_mark_dquot_dirty);
 
+/* Dirtify all the dquots - this can block when journalling */
+static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
+{
+       int ret, err, cnt;
+
+       ret = err = 0;
+       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+               if (dquot[cnt])
+                       /* Even in case of error we have to continue */
+                       ret = mark_dquot_dirty(dquot[cnt]);
+               if (!err)
+                       err = ret;
+       }
+       return err;
+}
+
+static inline void dqput_all(struct dquot **dquot)
+{
+       unsigned int cnt;
+
+       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+               dqput(dquot[cnt]);
+}
+
 /* This function needs dq_list_lock */
 static inline int clear_dquot_dirty(struct dquot *dquot)
 {
@@ -544,7 +570,7 @@ out:
 }
 EXPORT_SYMBOL(dquot_scan_active);
 
-int vfs_quota_sync(struct super_block *sb, int type)
+int vfs_quota_sync(struct super_block *sb, int type, int wait)
 {
        struct list_head *dirty;
        struct dquot *dquot;
@@ -589,6 +615,33 @@ int vfs_quota_sync(struct super_block *sb, int type)
        spin_unlock(&dq_list_lock);
        mutex_unlock(&dqopt->dqonoff_mutex);
 
+       if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
+               return 0;
+
+       /* This is not very clever (and fast) but currently I don't know about
+        * any other simple way of getting quota data to disk and we must get
+        * them there for userspace to be visible... */
+       if (sb->s_op->sync_fs)
+               sb->s_op->sync_fs(sb, 1);
+       sync_blockdev(sb->s_bdev);
+
+       /*
+        * Now when everything is written we can discard the pagecache so
+        * that userspace sees the changes.
+        */
+       mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+               if (type != -1 && cnt != type)
+                       continue;
+               if (!sb_has_quota_active(sb, cnt))
+                       continue;
+               mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
+                                 I_MUTEX_QUOTA);
+               truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
+               mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
+       }
+       mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+
        return 0;
 }
 EXPORT_SYMBOL(vfs_quota_sync);
@@ -820,11 +873,14 @@ static int dqinit_needed(struct inode *inode, int type)
 static void add_dquot_ref(struct super_block *sb, int type)
 {
        struct inode *inode, *old_inode = NULL;
+       int reserved = 0;
 
        spin_lock(&inode_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
                        continue;
+               if (unlikely(inode_get_rsv_space(inode) > 0))
+                       reserved = 1;
                if (!atomic_read(&inode->i_writecount))
                        continue;
                if (!dqinit_needed(inode, type))
@@ -845,6 +901,12 @@ static void add_dquot_ref(struct super_block *sb, int type)
        }
        spin_unlock(&inode_lock);
        iput(old_inode);
+
+       if (reserved) {
+               printk(KERN_WARNING "VFS (%s): Writes happened before quota"
+                       " was turned on thus quota information is probably "
+                       "inconsistent. Please run quotacheck(8).\n", sb->s_id);
+       }
 }
 
 /*
@@ -958,10 +1020,12 @@ static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
 /*
  * Claim reserved quota space
  */
-static void dquot_claim_reserved_space(struct dquot *dquot,
-                                               qsize_t number)
+static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
 {
-       WARN_ON(dquot->dq_dqb.dqb_rsvspace < number);
+       if (dquot->dq_dqb.dqb_rsvspace < number) {
+               WARN_ON_ONCE(1);
+               number = dquot->dq_dqb.dqb_rsvspace;
+       }
        dquot->dq_dqb.dqb_curspace += number;
        dquot->dq_dqb.dqb_rsvspace -= number;
 }
@@ -969,7 +1033,12 @@ static void dquot_claim_reserved_space(struct dquot *dquot,
 static inline
 void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
 {
-       dquot->dq_dqb.dqb_rsvspace -= number;
+       if (dquot->dq_dqb.dqb_rsvspace >= number)
+               dquot->dq_dqb.dqb_rsvspace -= number;
+       else {
+               WARN_ON_ONCE(1);
+               dquot->dq_dqb.dqb_rsvspace = 0;
+       }
 }
 
 static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
@@ -1071,73 +1140,6 @@ static void print_warning(struct dquot *dquot, const int warntype)
 }
 #endif
 
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-
-/* Netlink family structure for quota */
-static struct genl_family quota_genl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = "VFS_DQUOT",
-       .version = 1,
-       .maxattr = QUOTA_NL_A_MAX,
-};
-
-/* Send warning to userspace about user which exceeded quota */
-static void send_warning(const struct dquot *dquot, const char warntype)
-{
-       static atomic_t seq;
-       struct sk_buff *skb;
-       void *msg_head;
-       int ret;
-       int msg_size = 4 * nla_total_size(sizeof(u32)) +
-                      2 * nla_total_size(sizeof(u64));
-
-       /* We have to allocate using GFP_NOFS as we are called from a
-        * filesystem performing write and thus further recursion into
-        * the fs to free some data could cause deadlocks. */
-       skb = genlmsg_new(msg_size, GFP_NOFS);
-       if (!skb) {
-               printk(KERN_ERR
-                 "VFS: Not enough memory to send quota warning.\n");
-               return;
-       }
-       msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
-                       &quota_genl_family, 0, QUOTA_NL_C_WARNING);
-       if (!msg_head) {
-               printk(KERN_ERR
-                 "VFS: Cannot store netlink header in quota warning.\n");
-               goto err_out;
-       }
-       ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
-       if (ret)
-               goto attr_err_out;
-       ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
-       if (ret)
-               goto attr_err_out;
-       ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
-       if (ret)
-               goto attr_err_out;
-       ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
-               MAJOR(dquot->dq_sb->s_dev));
-       if (ret)
-               goto attr_err_out;
-       ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
-               MINOR(dquot->dq_sb->s_dev));
-       if (ret)
-               goto attr_err_out;
-       ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
-       if (ret)
-               goto attr_err_out;
-       genlmsg_end(skb, msg_head);
-
-       genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
-       return;
-attr_err_out:
-       printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
-err_out:
-       kfree_skb(skb);
-}
-#endif
 /*
  * Write warnings to the console and send warning messages over netlink.
  *
@@ -1145,18 +1147,20 @@ err_out:
  */
 static void flush_warnings(struct dquot *const *dquots, char *warntype)
 {
+       struct dquot *dq;
        int i;
 
-       for (i = 0; i < MAXQUOTAS; i++)
-               if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN &&
-                   !warning_issued(dquots[i], warntype[i])) {
+       for (i = 0; i < MAXQUOTAS; i++) {
+               dq = dquots[i];
+               if (dq && warntype[i] != QUOTA_NL_NOWARN &&
+                   !warning_issued(dq, warntype[i])) {
 #ifdef CONFIG_PRINT_QUOTA_WARNING
-                       print_warning(dquots[i], warntype[i]);
-#endif
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-                       send_warning(dquots[i], warntype[i]);
+                       print_warning(dq, warntype[i]);
 #endif
+                       quota_send_warning(dq->dq_type, dq->dq_id,
+                                          dq->dq_sb->s_dev, warntype[i]);
                }
+       }
 }
 
 static int ignore_hardlimit(struct dquot *dquot)
@@ -1287,6 +1291,7 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
                return QUOTA_NL_BHARDBELOW;
        return QUOTA_NL_NOWARN;
 }
+
 /*
  *     Initialize quota pointers in inode
  *     We do things in a bit complicated way but by that we avoid calling
@@ -1296,8 +1301,9 @@ int dquot_initialize(struct inode *inode, int type)
 {
        unsigned int id = 0;
        int cnt, ret = 0;
-       struct dquot *got[MAXQUOTAS] = { NULL, NULL };
+       struct dquot *got[MAXQUOTAS];
        struct super_block *sb = inode->i_sb;
+       qsize_t rsv;
 
        /* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1306,6 +1312,7 @@ int dquot_initialize(struct inode *inode, int type)
 
        /* First get references to structures we might need. */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+               got[cnt] = NULL;
                if (type != -1 && cnt != type)
                        continue;
                switch (cnt) {
@@ -1320,7 +1327,6 @@ int dquot_initialize(struct inode *inode, int type)
        }
 
        down_write(&sb_dqopt(sb)->dqptr_sem);
-       /* Having dqptr_sem we know NOQUOTA flags can't be altered... */
        if (IS_NOQUOTA(inode))
                goto out_err;
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1332,13 +1338,19 @@ int dquot_initialize(struct inode *inode, int type)
                if (!inode->i_dquot[cnt]) {
                        inode->i_dquot[cnt] = got[cnt];
                        got[cnt] = NULL;
+                       /*
+                        * Make quota reservation system happy if someone
+                        * did a write before quota was turned on
+                        */
+                       rsv = inode_get_rsv_space(inode);
+                       if (unlikely(rsv))
+                               dquot_resv_space(inode->i_dquot[cnt], rsv);
                }
        }
 out_err:
        up_write(&sb_dqopt(sb)->dqptr_sem);
        /* Drop unused references */
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-               dqput(got[cnt]);
+       dqput_all(got);
        return ret;
 }
 EXPORT_SYMBOL(dquot_initialize);
@@ -1357,9 +1369,7 @@ int dquot_drop(struct inode *inode)
                inode->i_dquot[cnt] = NULL;
        }
        up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-               dqput(put[cnt]);
+       dqput_all(put);
        return 0;
 }
 EXPORT_SYMBOL(dquot_drop);
@@ -1388,23 +1398,100 @@ void vfs_dq_drop(struct inode *inode)
 EXPORT_SYMBOL(vfs_dq_drop);
 
 /*
- * Following four functions update i_blocks+i_bytes fields and
- * quota information (together with appropriate checks)
- * NOTE: We absolutely rely on the fact that caller dirties
- * the inode (usually macros in quotaops.h care about this) and
- * holds a handle for the current transaction so that dquot write and
- * inode write go into the same transaction.
+ * inode_reserved_space is managed internally by quota, and protected by
+ * i_lock similar to i_blocks+i_bytes.
+ */
+static qsize_t *inode_reserved_space(struct inode * inode)
+{
+       /* Filesystem must explicitly define it's own method in order to use
+        * quota reservation interface */
+       BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
+       return inode->i_sb->dq_op->get_reserved_space(inode);
+}
+
+void inode_add_rsv_space(struct inode *inode, qsize_t number)
+{
+       spin_lock(&inode->i_lock);
+       *inode_reserved_space(inode) += number;
+       spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(inode_add_rsv_space);
+
+void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+{
+       spin_lock(&inode->i_lock);
+       *inode_reserved_space(inode) -= number;
+       __inode_add_bytes(inode, number);
+       spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(inode_claim_rsv_space);
+
+void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+{
+       spin_lock(&inode->i_lock);
+       *inode_reserved_space(inode) -= number;
+       spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(inode_sub_rsv_space);
+
+static qsize_t inode_get_rsv_space(struct inode *inode)
+{
+       qsize_t ret;
+
+       if (!inode->i_sb->dq_op->get_reserved_space)
+               return 0;
+       spin_lock(&inode->i_lock);
+       ret = *inode_reserved_space(inode);
+       spin_unlock(&inode->i_lock);
+       return ret;
+}
+
+static void inode_incr_space(struct inode *inode, qsize_t number,
+                               int reserve)
+{
+       if (reserve)
+               inode_add_rsv_space(inode, number);
+       else
+               inode_add_bytes(inode, number);
+}
+
+static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
+{
+       if (reserve)
+               inode_sub_rsv_space(inode, number);
+       else
+               inode_sub_bytes(inode, number);
+}
+
+/*
+ * This functions updates i_blocks+i_bytes fields and quota information
+ * (together with appropriate checks).
+ *
+ * NOTE: We absolutely rely on the fact that caller dirties the inode
+ * (usually helpers in quotaops.h care about this) and holds a handle for
+ * the current transaction so that dquot write and inode write go into the
+ * same transaction.
  */
 
 /*
  * This operation can block, but only after everything is updated
  */
 int __dquot_alloc_space(struct inode *inode, qsize_t number,
-                       int warn, int reserve)
+               int warn, int reserve)
 {
-       int cnt, ret = QUOTA_OK;
+       int cnt, ret = 0;
        char warntype[MAXQUOTAS];
 
+       /*
+        * First test before acquiring mutex - solves deadlocks when we
+        * re-enter the quota code and are already holding the mutex
+        */
+       if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
+               inode_incr_space(inode, number, reserve);
+               goto out;
+       }
+
+       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                warntype[cnt] = QUOTA_NL_NOWARN;
 
@@ -1412,10 +1499,11 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!inode->i_dquot[cnt])
                        continue;
-               if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
-                   == NO_QUOTA) {
-                       ret = NO_QUOTA;
-                       goto out_unlock;
+               if (check_bdq(inode->i_dquot[cnt], number, !warn, warntype+cnt)
+                               == NO_QUOTA) {
+                       ret = -EDQUOT;
+                       spin_unlock(&dq_data_lock);
+                       goto out_flush_warn;
                }
        }
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1426,66 +1514,19 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
                else
                        dquot_incr_space(inode->i_dquot[cnt], number);
        }
-       if (!reserve)
-               inode_add_bytes(inode, number);
-out_unlock:
+       inode_incr_space(inode, number, reserve);
        spin_unlock(&dq_data_lock);
-       flush_warnings(inode->i_dquot, warntype);
-       return ret;
-}
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
-       int cnt, ret = QUOTA_OK;
-
-       /*
-        * First test before acquiring mutex - solves deadlocks when we
-        * re-enter the quota code and are already holding the mutex
-        */
-       if (IS_NOQUOTA(inode)) {
-               inode_add_bytes(inode, number);
-               goto out;
-       }
-
-       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       if (IS_NOQUOTA(inode)) {
-               inode_add_bytes(inode, number);
-               goto out_unlock;
-       }
-
-       ret = __dquot_alloc_space(inode, number, warn, 0);
-       if (ret == NO_QUOTA)
-               goto out_unlock;
-
-       /* Dirtify all the dquots - this can block when journalling */
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-               if (inode->i_dquot[cnt])
-                       mark_dquot_dirty(inode->i_dquot[cnt]);
-out_unlock:
-       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-       return ret;
-}
-EXPORT_SYMBOL(dquot_alloc_space);
-
-int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
-{
-       int ret = QUOTA_OK;
-
-       if (IS_NOQUOTA(inode))
-               goto out;
 
-       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       if (IS_NOQUOTA(inode))
-               goto out_unlock;
-
-       ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
+       if (reserve)
+               goto out_flush_warn;
+       mark_all_dquot_dirty(inode->i_dquot);
+out_flush_warn:
+       flush_warnings(inode->i_dquot, warntype);
        up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
        return ret;
 }
-EXPORT_SYMBOL(dquot_reserve_space);
+EXPORT_SYMBOL(__dquot_alloc_space);
 
 /*
  * This operation can block, but only after everything is updated
@@ -1502,10 +1543,6 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                warntype[cnt] = QUOTA_NL_NOWARN;
        down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       if (IS_NOQUOTA(inode)) {
-               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-               return QUOTA_OK;
-       }
        spin_lock(&dq_data_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!inode->i_dquot[cnt])
@@ -1524,33 +1561,26 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 warn_put_all:
        spin_unlock(&dq_data_lock);
        if (ret == QUOTA_OK)
-               /* Dirtify all the dquots - this can block when journalling */
-               for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-                       if (inode->i_dquot[cnt])
-                               mark_dquot_dirty(inode->i_dquot[cnt]);
+               mark_all_dquot_dirty(inode->i_dquot);
        flush_warnings(inode->i_dquot, warntype);
        up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
        return ret;
 }
 EXPORT_SYMBOL(dquot_alloc_inode);
 
-int dquot_claim_space(struct inode *inode, qsize_t number)
+/*
+ * Convert in-memory reserved quotas to real consumed quotas
+ */
+int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
        int cnt;
-       int ret = QUOTA_OK;
 
-       if (IS_NOQUOTA(inode)) {
-               inode_add_bytes(inode, number);
-               goto out;
+       if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
+               inode_claim_rsv_space(inode, number);
+               return 0;
        }
 
        down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       if (IS_NOQUOTA(inode))  {
-               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-               inode_add_bytes(inode, number);
-               goto out;
-       }
-
        spin_lock(&dq_data_lock);
        /* Claim reserved quotas to allocated quotas */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1559,87 +1589,51 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
                                                        number);
        }
        /* Update inode bytes */
-       inode_add_bytes(inode, number);
-       spin_unlock(&dq_data_lock);
-       /* Dirtify all the dquots - this can block when journalling */
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-               if (inode->i_dquot[cnt])
-                       mark_dquot_dirty(inode->i_dquot[cnt]);
-       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-       return ret;
-}
-EXPORT_SYMBOL(dquot_claim_space);
-
-/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
-       int cnt;
-
-       if (IS_NOQUOTA(inode))
-               goto out;
-
-       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       if (IS_NOQUOTA(inode))
-               goto out_unlock;
-
-       spin_lock(&dq_data_lock);
-       /* Release reserved dquots */
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               if (inode->i_dquot[cnt])
-                       dquot_free_reserved_space(inode->i_dquot[cnt], number);
-       }
+       inode_claim_rsv_space(inode, number);
        spin_unlock(&dq_data_lock);
-
-out_unlock:
+       mark_all_dquot_dirty(inode->i_dquot);
        up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-       return;
+       return 0;
 }
-EXPORT_SYMBOL(dquot_release_reserved_space);
+EXPORT_SYMBOL(dquot_claim_space_nodirty);
 
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_space(struct inode *inode, qsize_t number)
+void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
        unsigned int cnt;
        char warntype[MAXQUOTAS];
 
        /* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-       if (IS_NOQUOTA(inode)) {
-out_sub:
-               inode_sub_bytes(inode, number);
-               return QUOTA_OK;
+       if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
+               inode_decr_space(inode, number, reserve);
+               return;
        }
 
        down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       /* Now recheck reliably when holding dqptr_sem */
-       if (IS_NOQUOTA(inode)) {
-               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-               goto out_sub;
-       }
        spin_lock(&dq_data_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!inode->i_dquot[cnt])
                        continue;
                warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
-               dquot_decr_space(inode->i_dquot[cnt], number);
+               if (reserve)
+                       dquot_free_reserved_space(inode->i_dquot[cnt], number);
+               else
+                       dquot_decr_space(inode->i_dquot[cnt], number);
        }
-       inode_sub_bytes(inode, number);
+       inode_decr_space(inode, number, reserve);
        spin_unlock(&dq_data_lock);
-       /* Dirtify all the dquots - this can block when journalling */
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-               if (inode->i_dquot[cnt])
-                       mark_dquot_dirty(inode->i_dquot[cnt]);
+
+       if (reserve)
+               goto out_unlock;
+       mark_all_dquot_dirty(inode->i_dquot);
+out_unlock:
        flush_warnings(inode->i_dquot, warntype);
        up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       return QUOTA_OK;
 }
-EXPORT_SYMBOL(dquot_free_space);
+EXPORT_SYMBOL(__dquot_free_space);
 
 /*
  * This operation can block, but only after everything is updated
@@ -1655,11 +1649,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
                return QUOTA_OK;
 
        down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       /* Now recheck reliably when holding dqptr_sem */
-       if (IS_NOQUOTA(inode)) {
-               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-               return QUOTA_OK;
-       }
        spin_lock(&dq_data_lock);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                if (!inode->i_dquot[cnt])
@@ -1668,10 +1657,7 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
                dquot_decr_inodes(inode->i_dquot[cnt], number);
        }
        spin_unlock(&dq_data_lock);
-       /* Dirtify all the dquots - this can block when journalling */
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-               if (inode->i_dquot[cnt])
-                       mark_dquot_dirty(inode->i_dquot[cnt]);
+       mark_all_dquot_dirty(inode->i_dquot);
        flush_warnings(inode->i_dquot, warntype);
        up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
        return QUOTA_OK;
@@ -1679,33 +1665,18 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 EXPORT_SYMBOL(dquot_free_inode);
 
 /*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
-       qsize_t reserved_space = 0;
-
-       if (sb_any_quota_active(inode->i_sb) &&
-           inode->i_sb->dq_op->get_reserved_space)
-               reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
-       return reserved_space;
-}
-
-/*
  * Transfer the number of inode and blocks from one diskquota to an other.
  *
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  */
-int dquot_transfer(struct inode *inode, struct iattr *iattr)
+int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
 {
        qsize_t space, cur_space;
        qsize_t rsv_space = 0;
        struct dquot *transfer_from[MAXQUOTAS];
        struct dquot *transfer_to[MAXQUOTAS];
        int cnt, ret = QUOTA_OK;
-       int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid,
-           chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid;
        char warntype_to[MAXQUOTAS];
        char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
@@ -1719,22 +1690,18 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
                transfer_to[cnt] = NULL;
                warntype_to[cnt] = QUOTA_NL_NOWARN;
        }
-       if (chuid)
-               transfer_to[USRQUOTA] = dqget(inode->i_sb, iattr->ia_uid,
-                                             USRQUOTA);
-       if (chgid)
-               transfer_to[GRPQUOTA] = dqget(inode->i_sb, iattr->ia_gid,
-                                             GRPQUOTA);
-
+       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+               if (mask & (1 << cnt))
+                       transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
+       }
        down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-       /* Now recheck reliably when holding dqptr_sem */
        if (IS_NOQUOTA(inode)) {        /* File without quota accounting? */
                up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
                goto put_all;
        }
        spin_lock(&dq_data_lock);
        cur_space = inode_get_bytes(inode);
-       rsv_space = dquot_get_reserved_space(inode);
+       rsv_space = inode_get_rsv_space(inode);
        space = cur_space + rsv_space;
        /* Build the transfer_from list and check the limits */
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1778,25 +1745,18 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
        spin_unlock(&dq_data_lock);
        up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 
-       /* Dirtify all the dquots - this can block when journalling */
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               if (transfer_from[cnt])
-                       mark_dquot_dirty(transfer_from[cnt]);
-               if (transfer_to[cnt]) {
-                       mark_dquot_dirty(transfer_to[cnt]);
-                       /* The reference we got is transferred to the inode */
-                       transfer_to[cnt] = NULL;
-               }
-       }
+       mark_all_dquot_dirty(transfer_from);
+       mark_all_dquot_dirty(transfer_to);
+       /* The reference we got is transferred to the inode */
+       for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+               transfer_to[cnt] = NULL;
 warn_put_all:
        flush_warnings(transfer_to, warntype_to);
        flush_warnings(transfer_from, warntype_from_inodes);
        flush_warnings(transfer_from, warntype_from_space);
 put_all:
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               dqput(transfer_from[cnt]);
-               dqput(transfer_to[cnt]);
-       }
+       dqput_all(transfer_from);
+       dqput_all(transfer_to);
        return ret;
 over_quota:
        spin_unlock(&dq_data_lock);
@@ -1809,12 +1769,25 @@ over_quota:
 }
 EXPORT_SYMBOL(dquot_transfer);
 
-/* Wrapper for transferring ownership of an inode */
+/* Wrapper for transferring ownership of an inode for uid/gid only
+ * Called from FSXXX_setattr()
+ */
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
+       qid_t chid[MAXQUOTAS];
+       unsigned long mask = 0;
+
+       if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
+               mask |= 1 << USRQUOTA;
+               chid[USRQUOTA] = iattr->ia_uid;
+       }
+       if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
+               mask |= 1 << GRPQUOTA;
+               chid[GRPQUOTA] = iattr->ia_gid;
+       }
        if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
                vfs_dq_init(inode);
-               if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+               if (inode->i_sb->dq_op->transfer(inode, chid, mask) == NO_QUOTA)
                        return 1;
        }
        return 0;
@@ -1839,12 +1812,10 @@ EXPORT_SYMBOL(dquot_commit_info);
 /*
  * Definitions of diskquota operations.
  */
-struct dquot_operations dquot_operations = {
+const struct dquot_operations dquot_operations = {
        .initialize     = dquot_initialize,
        .drop           = dquot_drop,
-       .alloc_space    = dquot_alloc_space,
        .alloc_inode    = dquot_alloc_inode,
-       .free_space     = dquot_free_space,
        .free_inode     = dquot_free_inode,
        .transfer       = dquot_transfer,
        .write_dquot    = dquot_commit,
@@ -2035,15 +2006,16 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
        }
 
        if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
-               /* As we bypass the pagecache we must now flush the inode so
-                * that we see all the changes from userspace... */
-               write_inode_now(inode, 1);
-               /* And now flush the block cache so that kernel sees the
-                * changes */
+               /* As we bypass the pagecache we must now flush all the
+                * dirty data and invalidate caches so that kernel sees
+                * changes from userspace. It is not enough to just flush
+                * the quota file since if blocksize < pagesize, invalidation
+                * of the cache could fail because of other unrelated dirty
+                * data */
+               sync_filesystem(sb);
                invalidate_bdev(sb->s_bdev);
        }
        mutex_lock(&dqopt->dqonoff_mutex);
-       mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
        if (sb_has_quota_loaded(sb, type)) {
                error = -EBUSY;
                goto out_lock;
@@ -2053,11 +2025,15 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
                /* We don't want quota and atime on quota files (deadlocks
                 * possible) Also nobody should write to the file - we use
                 * special IO operations which ignore the immutable bit. */
-               down_write(&dqopt->dqptr_sem);
+               mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
                oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
                                             S_NOQUOTA);
                inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
-               up_write(&dqopt->dqptr_sem);
+               mutex_unlock(&inode->i_mutex);
+               /*
+                * When S_NOQUOTA is set, remove dquot references as no more
+                * references can be added
+                */
                sb->dq_op->drop(inode);
        }
 
@@ -2080,7 +2056,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
                goto out_file_init;
        }
        mutex_unlock(&dqopt->dqio_mutex);
-       mutex_unlock(&inode->i_mutex);
        spin_lock(&dq_state_lock);
        dqopt->flags |= dquot_state_flag(flags, type);
        spin_unlock(&dq_state_lock);
@@ -2095,14 +2070,13 @@ out_file_init:
        iput(inode);
 out_lock:
        if (oldflags != -1) {
-               down_write(&dqopt->dqptr_sem);
+               mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
                /* Set the flags back (in the case of accidental quotaon()
                 * on a wrong file we don't want to mess up the flags) */
                inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
                inode->i_flags |= oldflags;
-               up_write(&dqopt->dqptr_sem);
+               mutex_unlock(&inode->i_mutex);
        }
-       mutex_unlock(&inode->i_mutex);
        mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
        put_quota_format(fmt);
@@ -2232,7 +2206,9 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
        struct dentry *dentry;
        int error;
 
+       mutex_lock(&sb->s_root->d_inode->i_mutex);
        dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
+       mutex_unlock(&sb->s_root->d_inode->i_mutex);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
@@ -2460,7 +2436,7 @@ out:
 }
 EXPORT_SYMBOL(vfs_set_dqinfo);
 
-struct quotactl_ops vfs_quotactl_ops = {
+const struct quotactl_ops vfs_quotactl_ops = {
        .quota_on       = vfs_quota_on,
        .quota_off      = vfs_quota_off,
        .quota_sync     = vfs_quota_sync,
@@ -2472,100 +2448,89 @@ struct quotactl_ops vfs_quotactl_ops = {
 
 static ctl_table fs_dqstats_table[] = {
        {
-               .ctl_name       = FS_DQ_LOOKUPS,
                .procname       = "lookups",
                .data           = &dqstats.lookups,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .ctl_name       = FS_DQ_DROPS,
                .procname       = "drops",
                .data           = &dqstats.drops,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .ctl_name       = FS_DQ_READS,
                .procname       = "reads",
                .data           = &dqstats.reads,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .ctl_name       = FS_DQ_WRITES,
                .procname       = "writes",
                .data           = &dqstats.writes,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .ctl_name       = FS_DQ_CACHE_HITS,
                .procname       = "cache_hits",
                .data           = &dqstats.cache_hits,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .ctl_name       = FS_DQ_ALLOCATED,
                .procname       = "allocated_dquots",
                .data           = &dqstats.allocated_dquots,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .ctl_name       = FS_DQ_FREE,
                .procname       = "free_dquots",
                .data           = &dqstats.free_dquots,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
        {
-               .ctl_name       = FS_DQ_SYNCS,
                .procname       = "syncs",
                .data           = &dqstats.syncs,
                .maxlen         = sizeof(int),
                .mode           = 0444,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
 #ifdef CONFIG_PRINT_QUOTA_WARNING
        {
-               .ctl_name       = FS_DQ_WARNINGS,
                .procname       = "warnings",
                .data           = &flag_print_warnings,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = proc_dointvec,
        },
 #endif
-       { .ctl_name = 0 },
+       { },
 };
 
 static ctl_table fs_table[] = {
        {
-               .ctl_name       = FS_DQSTATS,
                .procname       = "quota",
                .mode           = 0555,
                .child          = fs_dqstats_table,
        },
-       { .ctl_name = 0 },
+       { },
 };
 
 static ctl_table sys_table[] = {
        {
-               .ctl_name       = CTL_FS,
                .procname       = "fs",
                .mode           = 0555,
                .child          = fs_table,
        },
-       { .ctl_name = 0 },
+       { },
 };
 
 static int __init dquot_init(void)
@@ -2606,12 +2571,6 @@ static int __init dquot_init(void)
 
        register_shrinker(&dqcache_shrinker);
 
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-       if (genl_register_family(&quota_genl_family) != 0)
-               printk(KERN_ERR
-                      "VFS: Failed to create quota netlink interface.\n");
-#endif
-
        return 0;
 }
 module_init(dquot_init);