netns xfrm: fix "ip xfrm state|policy count" misreport
[safe/jmp/linux-2.6] / kernel / acct.c
index fc71c13..a6605ca 100644 (file)
@@ -75,7 +75,8 @@ int acct_parm[3] = {4, 2, 30};
 /*
  * External references and all of the globals.
  */
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+               struct pid_namespace *ns, struct file *);
 
 /*
  * This structure is used so that all the data protected by lock
@@ -88,11 +89,11 @@ struct bsd_acct_struct {
        struct file             *file;
        struct pid_namespace    *ns;
        struct timer_list       timer;
+       struct list_head        list;
 };
 
 static DEFINE_SPINLOCK(acct_lock);
-
-static struct bsd_acct_struct acct_globals __cacheline_aligned;
+static LIST_HEAD(acct_list);
 
 /*
  * Called whenever the timer says to check the free space.
@@ -106,7 +107,7 @@ static void acct_timeout(unsigned long x)
 /*
  * Check the amount of free space and suspend/resume accordingly.
  */
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
 {
        struct kstatfs sbuf;
        int res;
@@ -115,8 +116,8 @@ static int check_free_space(struct file *file)
        sector_t suspend;
 
        spin_lock(&acct_lock);
-       res = acct_globals.active;
-       if (!file || !acct_globals.needcheck)
+       res = acct->active;
+       if (!file || !acct->needcheck)
                goto out;
        spin_unlock(&acct_lock);
 
@@ -137,33 +138,33 @@ static int check_free_space(struct file *file)
                act = 0;
 
        /*
-        * If some joker switched acct_globals.file under us we'ld better be
+        * If some joker switched acct->file under us we'ld better be
         * silent and _not_ touch anything.
         */
        spin_lock(&acct_lock);
-       if (file != acct_globals.file) {
+       if (file != acct->file) {
                if (act)
                        res = act>0;
                goto out;
        }
 
-       if (acct_globals.active) {
+       if (acct->active) {
                if (act < 0) {
-                       acct_globals.active = 0;
+                       acct->active = 0;
                        printk(KERN_INFO "Process accounting paused\n");
                }
        } else {
                if (act > 0) {
-                       acct_globals.active = 1;
+                       acct->active = 1;
                        printk(KERN_INFO "Process accounting resumed\n");
                }
        }
 
-       del_timer(&acct_globals.timer);
-       acct_globals.needcheck = 0;
-       acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-       add_timer(&acct_globals.timer);
-       res = acct_globals.active;
+       del_timer(&acct->timer);
+       acct->needcheck = 0;
+       acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+       add_timer(&acct->timer);
+       res = acct->active;
 out:
        spin_unlock(&acct_lock);
        return res;
@@ -175,36 +176,38 @@ out:
  *
  * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+               struct pid_namespace *ns)
 {
        struct file *old_acct = NULL;
        struct pid_namespace *old_ns = NULL;
 
-       if (acct_globals.file) {
-               old_acct = acct_globals.file;
-               old_ns = acct_globals.ns;
-               del_timer(&acct_globals.timer);
-               acct_globals.active = 0;
-               acct_globals.needcheck = 0;
-               acct_globals.file = NULL;
+       if (acct->file) {
+               old_acct = acct->file;
+               old_ns = acct->ns;
+               del_timer(&acct->timer);
+               acct->active = 0;
+               acct->needcheck = 0;
+               acct->file = NULL;
+               acct->ns = NULL;
+               list_del(&acct->list);
        }
        if (file) {
-               acct_globals.file = file;
-               acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
-               acct_globals.needcheck = 0;
-               acct_globals.active = 1;
+               acct->file = file;
+               acct->ns = ns;
+               acct->needcheck = 0;
+               acct->active = 1;
+               list_add(&acct->list, &acct_list);
                /* It's been deleted if it was used before so this is safe */
-               setup_timer(&acct_globals.timer, acct_timeout,
-                               (unsigned long)&acct_globals);
-               acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-               add_timer(&acct_globals.timer);
+               setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+               acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+               add_timer(&acct->timer);
        }
        if (old_acct) {
                mnt_unpin(old_acct->f_path.mnt);
                spin_unlock(&acct_lock);
-               do_acct_process(old_ns, old_acct);
+               do_acct_process(acct, old_ns, old_acct);
                filp_close(old_acct, NULL);
-               put_pid_ns(old_ns);
                spin_lock(&acct_lock);
        }
 }
@@ -212,7 +215,10 @@ static void acct_file_reopen(struct file *file)
 static int acct_on(char *name)
 {
        struct file *file;
+       struct vfsmount *mnt;
        int error;
+       struct pid_namespace *ns;
+       struct bsd_acct_struct *acct = NULL;
 
        /* Difference from BSD - they don't do O_APPEND */
        file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -229,18 +235,35 @@ static int acct_on(char *name)
                return -EIO;
        }
 
+       ns = task_active_pid_ns(current);
+       if (ns->bacct == NULL) {
+               acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+               if (acct == NULL) {
+                       filp_close(file, NULL);
+                       return -ENOMEM;
+               }
+       }
+
        error = security_acct(file);
        if (error) {
+               kfree(acct);
                filp_close(file, NULL);
                return error;
        }
 
        spin_lock(&acct_lock);
-       mnt_pin(file->f_path.mnt);
-       acct_file_reopen(file);
+       if (ns->bacct == NULL) {
+               ns->bacct = acct;
+               acct = NULL;
+       }
+
+       mnt = file->f_path.mnt;
+       mnt_pin(mnt);
+       acct_file_reopen(ns->bacct, file, ns);
        spin_unlock(&acct_lock);
 
-       mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+       mntput(mnt); /* it's pinned, now give up active reference */
+       kfree(acct);
 
        return 0;
 }
@@ -256,7 +279,7 @@ static int acct_on(char *name)
  * should be written. If the filename is NULL, accounting will be
  * shutdown.
  */
-asmlinkage long sys_acct(const char __user *name)
+SYSCALL_DEFINE1(acct, const char __user *, name)
 {
        int error;
 
@@ -270,10 +293,16 @@ asmlinkage long sys_acct(const char __user *name)
                error = acct_on(tmp);
                putname(tmp);
        } else {
+               struct bsd_acct_struct *acct;
+
+               acct = task_active_pid_ns(current)->bacct;
+               if (acct == NULL)
+                       return 0;
+
                error = security_acct(NULL);
                if (!error) {
                        spin_lock(&acct_lock);
-                       acct_file_reopen(NULL);
+                       acct_file_reopen(acct, NULL, NULL);
                        spin_unlock(&acct_lock);
                }
        }
@@ -289,9 +318,15 @@ asmlinkage long sys_acct(const char __user *name)
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
+       struct bsd_acct_struct *acct;
+
        spin_lock(&acct_lock);
-       if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-               acct_file_reopen(NULL);
+restart:
+       list_for_each_entry(acct, &acct_list, list)
+               if (acct->file && acct->file->f_path.mnt == m) {
+                       acct_file_reopen(acct, NULL, NULL);
+                       goto restart;
+               }
        spin_unlock(&acct_lock);
 }
 
@@ -304,10 +339,29 @@ void acct_auto_close_mnt(struct vfsmount *m)
  */
 void acct_auto_close(struct super_block *sb)
 {
+       struct bsd_acct_struct *acct;
+
+       spin_lock(&acct_lock);
+restart:
+       list_for_each_entry(acct, &acct_list, list)
+               if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+                       acct_file_reopen(acct, NULL, NULL);
+                       goto restart;
+               }
+       spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+       struct bsd_acct_struct *acct;
+
        spin_lock(&acct_lock);
-       if (acct_globals.file &&
-           acct_globals.file->f_path.mnt->mnt_sb == sb) {
-               acct_file_reopen(NULL);
+       acct = ns->bacct;
+       if (acct != NULL) {
+               if (acct->file != NULL)
+                       acct_file_reopen(acct, NULL, NULL);
+
+               kfree(acct);
        }
        spin_unlock(&acct_lock);
 }
@@ -426,7 +480,8 @@ static u32 encode_float(u64 value)
 /*
  *  do_acct_process does all actual work. Caller holds the reference to file.
  */
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+               struct pid_namespace *ns, struct file *file)
 {
        struct pacct_struct *pacct = &current->signal->pacct;
        acct_t ac;
@@ -436,13 +491,17 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
        u64 run_time;
        struct timespec uptime;
        struct tty_struct *tty;
+       const struct cred *orig_cred;
+
+       /* Perform file operations on behalf of whoever enabled accounting */
+       orig_cred = override_creds(file->f_cred);
 
        /*
         * First check to see if there is enough free_space to continue
         * the process accounting system.
         */
-       if (!check_free_space(file))
-               return;
+       if (!check_free_space(acct, file))
+               goto out;
 
        /*
         * Fill the accounting struct with the needed info as recorded
@@ -477,15 +536,15 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
        do_div(elapsed, AHZ);
        ac.ac_btime = get_seconds() - elapsed;
        /* we really need to bite the bullet and change layout */
-       ac.ac_uid = current->uid;
-       ac.ac_gid = current->gid;
+       ac.ac_uid = orig_cred->uid;
+       ac.ac_gid = orig_cred->gid;
 #if ACCT_VERSION==2
        ac.ac_ahz = AHZ;
 #endif
 #if ACCT_VERSION==1 || ACCT_VERSION==2
        /* backward-compatible 16 bit fields */
-       ac.ac_uid16 = current->uid;
-       ac.ac_gid16 = current->gid;
+       ac.ac_uid16 = ac.ac_uid;
+       ac.ac_gid16 = ac.ac_gid;
 #endif
 #if ACCT_VERSION==3
        ac.ac_pid = task_tgid_nr_ns(current, ns);
@@ -495,7 +554,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
 #endif
 
        spin_lock_irq(&current->sighand->siglock);
-       tty = current->signal->tty;
+       tty = current->signal->tty;     /* Safe as we hold the siglock */
        ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
        ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
        ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
@@ -524,6 +583,8 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
                               sizeof(acct_t), &file->f_pos);
        current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
        set_fs(fs);
+out:
+       revert_creds(orig_cred);
 }
 
 /**
@@ -578,33 +639,46 @@ void acct_collect(long exitcode, int group_dead)
        spin_unlock_irq(&current->sighand->siglock);
 }
 
-/**
- * acct_process - now just a wrapper around do_acct_process
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
 {
        struct file *file = NULL;
-       struct pid_namespace *ns;
+       struct bsd_acct_struct *acct;
 
+       acct = ns->bacct;
        /*
         * accelerate the common fastpath:
         */
-       if (!acct_globals.file)
+       if (!acct || !acct->file)
                return;
 
        spin_lock(&acct_lock);
-       file = acct_globals.file;
+       file = acct->file;
        if (unlikely(!file)) {
                spin_unlock(&acct_lock);
                return;
        }
        get_file(file);
-       ns = get_pid_ns(acct_globals.ns);
        spin_unlock(&acct_lock);
 
-       do_acct_process(ns, file);
+       do_acct_process(acct, ns, file);
        fput(file);
-       put_pid_ns(ns);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+       struct pid_namespace *ns;
+
+       /*
+        * This loop is safe lockless, since current is still
+        * alive and holds its namespace, which in turn holds
+        * its parent.
+        */
+       for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+               acct_process_in_ns(ns);
 }