tunnels: fix netns vs proto registration ordering
[safe/jmp/linux-2.6] / kernel / rcutree.h
index 9aa8c8a..d2a0046 100644 (file)
  * In practice, this has not been tested, so there is probably some
  * bug somewhere.
  */
-#define MAX_RCU_LVLS 3
+#define MAX_RCU_LVLS 4
 #define RCU_FANOUT           (CONFIG_RCU_FANOUT)
 #define RCU_FANOUT_SQ        (RCU_FANOUT * RCU_FANOUT)
 #define RCU_FANOUT_CUBE              (RCU_FANOUT_SQ * RCU_FANOUT)
+#define RCU_FANOUT_FOURTH     (RCU_FANOUT_CUBE * RCU_FANOUT)
 
 #if NR_CPUS <= RCU_FANOUT
 #  define NUM_RCU_LVLS       1
 #  define NUM_RCU_LVL_1              (NR_CPUS)
 #  define NUM_RCU_LVL_2              0
 #  define NUM_RCU_LVL_3              0
+#  define NUM_RCU_LVL_4              0
 #elif NR_CPUS <= RCU_FANOUT_SQ
 #  define NUM_RCU_LVLS       2
 #  define NUM_RCU_LVL_0              1
 #  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
 #  define NUM_RCU_LVL_2              (NR_CPUS)
 #  define NUM_RCU_LVL_3              0
+#  define NUM_RCU_LVL_4              0
 #elif NR_CPUS <= RCU_FANOUT_CUBE
 #  define NUM_RCU_LVLS       3
 #  define NUM_RCU_LVL_0              1
 #  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
 #  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
 #  define NUM_RCU_LVL_3              NR_CPUS
+#  define NUM_RCU_LVL_4              0
+#elif NR_CPUS <= RCU_FANOUT_FOURTH
+#  define NUM_RCU_LVLS       4
+#  define NUM_RCU_LVL_0              1
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
+#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
+#  define NUM_RCU_LVL_4              NR_CPUS
 #else
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
 #endif /* #if (NR_CPUS) <= RCU_FANOUT */
 
-#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
 
 /*
@@ -79,24 +90,67 @@ struct rcu_dynticks {
  * Definition for node within the RCU grace-period-detection hierarchy.
  */
 struct rcu_node {
-       spinlock_t lock;
+       spinlock_t lock;        /* Root rcu_node's lock protects some */
+                               /*  rcu_state fields as well as following. */
        long    gpnum;          /* Current grace period for this node. */
                                /*  This will either be equal to or one */
                                /*  behind the root rcu_node's gpnum. */
+       long    completed;      /* Last grace period completed for this node. */
+                               /*  This will either be equal to or one */
+                               /*  behind the root rcu_node's gpnum. */
        unsigned long qsmask;   /* CPUs or groups that need to switch in */
                                /*  order for current grace period to proceed.*/
+                               /*  In leaf rcu_node, each bit corresponds to */
+                               /*  an rcu_data structure, otherwise, each */
+                               /*  bit corresponds to a child rcu_node */
+                               /*  structure. */
+       unsigned long expmask;  /* Groups that have ->blocked_tasks[] */
+                               /*  elements that need to drain to allow the */
+                               /*  current expedited grace period to */
+                               /*  complete (only for TREE_PREEMPT_RCU). */
        unsigned long qsmaskinit;
-                               /* Per-GP initialization for qsmask. */
+                               /* Per-GP initial value for qsmask & expmask. */
        unsigned long grpmask;  /* Mask to apply to parent qsmask. */
+                               /*  Only one bit will be set in this mask. */
        int     grplo;          /* lowest-numbered CPU or group here. */
        int     grphi;          /* highest-numbered CPU or group here. */
        u8      grpnum;         /* CPU/group number for next level up. */
        u8      level;          /* root is at level 0. */
        struct rcu_node *parent;
-       struct list_head blocked_tasks[2];
+       struct list_head blocked_tasks[4];
                                /* Tasks blocked in RCU read-side critsect. */
+                               /*  Grace period number (->gpnum) x blocked */
+                               /*  by tasks on the (x & 0x1) element of the */
+                               /*  blocked_tasks[] array. */
 } ____cacheline_internodealigned_in_smp;
 
+/*
+ * Do a full breadth-first scan of the rcu_node structures for the
+ * specified rcu_state structure.
+ */
+#define rcu_for_each_node_breadth_first(rsp, rnp) \
+       for ((rnp) = &(rsp)->node[0]; \
+            (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+
+/*
+ * Do a breadth-first scan of the non-leaf rcu_node structures for the
+ * specified rcu_state structure.  Note that if there is a singleton
+ * rcu_node tree with but one rcu_node structure, this loop is a no-op.
+ */
+#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+       for ((rnp) = &(rsp)->node[0]; \
+            (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
+
+/*
+ * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
+ * structure.  Note that if there is a singleton rcu_node tree with but
+ * one rcu_node structure, this loop -will- visit the rcu_node structure.
+ * It is still a leaf node, even if it is also the root node.
+ */
+#define rcu_for_each_leaf_node(rsp, rnp) \
+       for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
+            (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+
 /* Index values for nxttail array in struct rcu_data. */
 #define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
 #define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
@@ -126,23 +180,30 @@ struct rcu_data {
         * Any of the partitions might be empty, in which case the
         * pointer to that partition will be equal to the pointer for
         * the following partition.  When the list is empty, all of
-        * the nxttail elements point to nxtlist, which is NULL.
+        * the nxttail elements point to the ->nxtlist pointer itself,
+        * which in that case is NULL.
         *
-        * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
-        *      Entries that might have arrived after current GP ended
-        * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
-        *      Entries known to have arrived before current GP ended
-        * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
-        *      Entries that batch # <= ->completed - 1: waiting for current GP
         * [nxtlist, *nxttail[RCU_DONE_TAIL]):
         *      Entries that batch # <= ->completed
         *      The grace period for these entries has completed, and
         *      the other grace-period-completed entries may be moved
         *      here temporarily in rcu_process_callbacks().
+        * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
+        *      Entries that batch # <= ->completed - 1: waiting for current GP
+        * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
+        *      Entries known to have arrived before current GP ended
+        * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
+        *      Entries that might have arrived after current GP ended
+        *      Note that the value of *nxttail[RCU_NEXT_TAIL] will
+        *      always be NULL, as this is the end of the list.
         */
        struct rcu_head *nxtlist;
        struct rcu_head **nxttail[RCU_NEXT_SIZE];
        long            qlen;           /* # of queued callbacks */
+       long            qlen_last_fqs_check;
+                                       /* qlen at last check for QS forcing */
+       unsigned long   n_force_qs_snap;
+                                       /* did other CPU force QS recently? */
        long            blimit;         /* Upper limit on a processed batch */
 
 #ifdef CONFIG_NO_HZ
@@ -173,13 +234,15 @@ struct rcu_data {
 };
 
 /* Values for signaled field in struct rcu_state. */
-#define RCU_GP_INIT            0       /* Grace period being initialized. */
-#define RCU_SAVE_DYNTICK       1       /* Need to scan dyntick state. */
-#define RCU_FORCE_QS           2       /* Need to force quiescent state. */
+#define RCU_GP_IDLE            0       /* No grace period in progress. */
+#define RCU_GP_INIT            1       /* Grace period being initialized. */
+#define RCU_SAVE_DYNTICK       2       /* Need to scan dyntick state. */
+#define RCU_SAVE_COMPLETED     3       /* Need to save rsp->completed. */
+#define RCU_FORCE_QS           4       /* Need to force quiescent state. */
 #ifdef CONFIG_NO_HZ
 #define RCU_SIGNAL_INIT                RCU_SAVE_DYNTICK
 #else /* #ifdef CONFIG_NO_HZ */
-#define RCU_SIGNAL_INIT                RCU_FORCE_QS
+#define RCU_SIGNAL_INIT                RCU_SAVE_COMPLETED
 #endif /* #else #ifdef CONFIG_NO_HZ */
 
 #define RCU_JIFFIES_TILL_FORCE_QS       3      /* for rsp->jiffies_force_qs */
@@ -216,10 +279,23 @@ struct rcu_state {
                                                /* Force QS state. */
        long    gpnum;                          /* Current gp number. */
        long    completed;                      /* # of last completed gp. */
+
+       /* End of fields guarded by root rcu_node's lock. */
+
        spinlock_t onofflock;                   /* exclude on/offline and */
-                                               /*  starting new GP. */
+                                               /*  starting new GP.  Also */
+                                               /*  protects the following */
+                                               /*  orphan_cbs fields. */
+       struct rcu_head *orphan_cbs_list;       /* list of rcu_head structs */
+                                               /*  orphaned by all CPUs in */
+                                               /*  a given leaf rcu_node */
+                                               /*  going offline. */
+       struct rcu_head **orphan_cbs_tail;      /* And tail pointer. */
+       long orphan_qlen;                       /* Number of orphaned cbs. */
        spinlock_t fqslock;                     /* Only one task forcing */
                                                /*  quiescent states. */
+       long    completed_fqs;                  /* Value of completed @ snap. */
+                                               /*  Protected by fqslock. */
        unsigned long jiffies_force_qs;         /* Time at which to invoke */
                                                /*  force_quiescent_state(). */
        unsigned long n_force_qs;               /* Number of calls to */
@@ -234,11 +310,15 @@ struct rcu_state {
        unsigned long jiffies_stall;            /* Time at which to check */
                                                /*  for CPU stalls. */
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-#ifdef CONFIG_NO_HZ
-       long dynticks_completed;                /* Value of completed @ snap. */
-#endif /* #ifdef CONFIG_NO_HZ */
 };
 
+/* Return values for rcu_preempt_offline_tasks(). */
+
+#define RCU_OFL_TASKS_NORM_GP  0x1             /* Tasks blocking normal */
+                                               /*  GP were moved to root. */
+#define RCU_OFL_TASKS_EXP_GP   0x2             /* Tasks blocking expedited */
+                                               /*  GP were moved to root. */
+
 #ifdef RCU_TREE_NONCORE
 
 /*
@@ -255,5 +335,37 @@ extern struct rcu_state rcu_preempt_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
-#endif /* #ifdef RCU_TREE_NONCORE */
+#else /* #ifdef RCU_TREE_NONCORE */
+
+/* Forward declarations for rcutree_plugin.h */
+static void rcu_bootup_announce(void);
+long rcu_batches_completed(void);
+static void rcu_preempt_note_context_switch(int cpu);
+static int rcu_preempted_readers(struct rcu_node *rnp);
+#ifdef CONFIG_HOTPLUG_CPU
+static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
+                                     unsigned long flags);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+static void rcu_print_task_stall(struct rcu_node *rnp);
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
+#ifdef CONFIG_HOTPLUG_CPU
+static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                    struct rcu_node *rnp,
+                                    struct rcu_data *rdp);
+static void rcu_preempt_offline_cpu(int cpu);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_preempt_check_callbacks(int cpu);
+static void rcu_preempt_process_callbacks(void);
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
+#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
+static int rcu_preempt_pending(int cpu);
+static int rcu_preempt_needs_cpu(int cpu);
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
+static void rcu_preempt_send_cbs_to_orphanage(void);
+static void __init __rcu_init_preempt(void);
 
+#endif /* #else #ifdef RCU_TREE_NONCORE */