netns xfrm: fix "ip xfrm state|policy count" misreport
[safe/jmp/linux-2.6] / kernel / acct.c
index 72d4760..a6605ca 100644 (file)
@@ -89,11 +89,11 @@ struct bsd_acct_struct {
        struct file             *file;
        struct pid_namespace    *ns;
        struct timer_list       timer;
+       struct list_head        list;
 };
 
 static DEFINE_SPINLOCK(acct_lock);
-
-static struct bsd_acct_struct acct_globals __cacheline_aligned;
+static LIST_HEAD(acct_list);
 
 /*
  * Called whenever the timer says to check the free space.
@@ -176,7 +176,8 @@ out:
  *
  * NOTE: acct_lock MUST be held on entry and exit.
  */
-static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+               struct pid_namespace *ns)
 {
        struct file *old_acct = NULL;
        struct pid_namespace *old_ns = NULL;
@@ -188,12 +189,15 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
                acct->active = 0;
                acct->needcheck = 0;
                acct->file = NULL;
+               acct->ns = NULL;
+               list_del(&acct->list);
        }
        if (file) {
                acct->file = file;
-               acct->ns = get_pid_ns(task_active_pid_ns(current));
+               acct->ns = ns;
                acct->needcheck = 0;
                acct->active = 1;
+               list_add(&acct->list, &acct_list);
                /* It's been deleted if it was used before so this is safe */
                setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
                acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
@@ -204,7 +208,6 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
                spin_unlock(&acct_lock);
                do_acct_process(acct, old_ns, old_acct);
                filp_close(old_acct, NULL);
-               put_pid_ns(old_ns);
                spin_lock(&acct_lock);
        }
 }
@@ -212,7 +215,10 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file)
 static int acct_on(char *name)
 {
        struct file *file;
+       struct vfsmount *mnt;
        int error;
+       struct pid_namespace *ns;
+       struct bsd_acct_struct *acct = NULL;
 
        /* Difference from BSD - they don't do O_APPEND */
        file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -229,18 +235,35 @@ static int acct_on(char *name)
                return -EIO;
        }
 
+       ns = task_active_pid_ns(current);
+       if (ns->bacct == NULL) {
+               acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+               if (acct == NULL) {
+                       filp_close(file, NULL);
+                       return -ENOMEM;
+               }
+       }
+
        error = security_acct(file);
        if (error) {
+               kfree(acct);
                filp_close(file, NULL);
                return error;
        }
 
        spin_lock(&acct_lock);
-       mnt_pin(file->f_path.mnt);
-       acct_file_reopen(&acct_globals, file);
+       if (ns->bacct == NULL) {
+               ns->bacct = acct;
+               acct = NULL;
+       }
+
+       mnt = file->f_path.mnt;
+       mnt_pin(mnt);
+       acct_file_reopen(ns->bacct, file, ns);
        spin_unlock(&acct_lock);
 
-       mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+       mntput(mnt); /* it's pinned, now give up active reference */
+       kfree(acct);
 
        return 0;
 }
@@ -256,7 +279,7 @@ static int acct_on(char *name)
  * should be written. If the filename is NULL, accounting will be
  * shutdown.
  */
-asmlinkage long sys_acct(const char __user *name)
+SYSCALL_DEFINE1(acct, const char __user *, name)
 {
        int error;
 
@@ -270,10 +293,16 @@ asmlinkage long sys_acct(const char __user *name)
                error = acct_on(tmp);
                putname(tmp);
        } else {
+               struct bsd_acct_struct *acct;
+
+               acct = task_active_pid_ns(current)->bacct;
+               if (acct == NULL)
+                       return 0;
+
                error = security_acct(NULL);
                if (!error) {
                        spin_lock(&acct_lock);
-                       acct_file_reopen(&acct_globals, NULL);
+                       acct_file_reopen(acct, NULL, NULL);
                        spin_unlock(&acct_lock);
                }
        }
@@ -289,9 +318,15 @@ asmlinkage long sys_acct(const char __user *name)
  */
 void acct_auto_close_mnt(struct vfsmount *m)
 {
+       struct bsd_acct_struct *acct;
+
        spin_lock(&acct_lock);
-       if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-               acct_file_reopen(&acct_globals, NULL);
+restart:
+       list_for_each_entry(acct, &acct_list, list)
+               if (acct->file && acct->file->f_path.mnt == m) {
+                       acct_file_reopen(acct, NULL, NULL);
+                       goto restart;
+               }
        spin_unlock(&acct_lock);
 }
 
@@ -304,10 +339,29 @@ void acct_auto_close_mnt(struct vfsmount *m)
  */
 void acct_auto_close(struct super_block *sb)
 {
+       struct bsd_acct_struct *acct;
+
+       spin_lock(&acct_lock);
+restart:
+       list_for_each_entry(acct, &acct_list, list)
+               if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+                       acct_file_reopen(acct, NULL, NULL);
+                       goto restart;
+               }
+       spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+       struct bsd_acct_struct *acct;
+
        spin_lock(&acct_lock);
-       if (acct_globals.file &&
-           acct_globals.file->f_path.mnt->mnt_sb == sb) {
-               acct_file_reopen(&acct_globals, NULL);
+       acct = ns->bacct;
+       if (acct != NULL) {
+               if (acct->file != NULL)
+                       acct_file_reopen(acct, NULL, NULL);
+
+               kfree(acct);
        }
        spin_unlock(&acct_lock);
 }
@@ -437,13 +491,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
        u64 run_time;
        struct timespec uptime;
        struct tty_struct *tty;
+       const struct cred *orig_cred;
+
+       /* Perform file operations on behalf of whoever enabled accounting */
+       orig_cred = override_creds(file->f_cred);
 
        /*
         * First check to see if there is enough free_space to continue
         * the process accounting system.
         */
        if (!check_free_space(acct, file))
-               return;
+               goto out;
 
        /*
         * Fill the accounting struct with the needed info as recorded
@@ -478,15 +536,15 @@ static void do_acct_process(struct bsd_acct_struct *acct,
        do_div(elapsed, AHZ);
        ac.ac_btime = get_seconds() - elapsed;
        /* we really need to bite the bullet and change layout */
-       ac.ac_uid = current->uid;
-       ac.ac_gid = current->gid;
+       ac.ac_uid = orig_cred->uid;
+       ac.ac_gid = orig_cred->gid;
 #if ACCT_VERSION==2
        ac.ac_ahz = AHZ;
 #endif
 #if ACCT_VERSION==1 || ACCT_VERSION==2
        /* backward-compatible 16 bit fields */
-       ac.ac_uid16 = current->uid;
-       ac.ac_gid16 = current->gid;
+       ac.ac_uid16 = ac.ac_uid;
+       ac.ac_gid16 = ac.ac_gid;
 #endif
 #if ACCT_VERSION==3
        ac.ac_pid = task_tgid_nr_ns(current, ns);
@@ -496,7 +554,7 @@ static void do_acct_process(struct bsd_acct_struct *acct,
 #endif
 
        spin_lock_irq(&current->sighand->siglock);
-       tty = current->signal->tty;
+       tty = current->signal->tty;     /* Safe as we hold the siglock */
        ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
        ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
        ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
@@ -525,6 +583,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
                               sizeof(acct_t), &file->f_pos);
        current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
        set_fs(fs);
+out:
+       revert_creds(orig_cred);
 }
 
 /**
@@ -579,33 +639,46 @@ void acct_collect(long exitcode, int group_dead)
        spin_unlock_irq(&current->sighand->siglock);
 }
 
-/**
- * acct_process - now just a wrapper around do_acct_process
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
 {
        struct file *file = NULL;
-       struct pid_namespace *ns;
+       struct bsd_acct_struct *acct;
 
+       acct = ns->bacct;
        /*
         * accelerate the common fastpath:
         */
-       if (!acct_globals.file)
+       if (!acct || !acct->file)
                return;
 
        spin_lock(&acct_lock);
-       file = acct_globals.file;
+       file = acct->file;
        if (unlikely(!file)) {
                spin_unlock(&acct_lock);
                return;
        }
        get_file(file);
-       ns = get_pid_ns(acct_globals.ns);
        spin_unlock(&acct_lock);
 
-       do_acct_process(&acct_globals, ns, file);
+       do_acct_process(acct, ns, file);
        fput(file);
-       put_pid_ns(ns);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+       struct pid_namespace *ns;
+
+       /*
+        * This loop is safe lockless, since current is still
+        * alive and holds its namespace, which in turn holds
+        * its parent.
+        */
+       for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+               acct_process_in_ns(ns);
 }