memcg: mem+swap controller Kconfig

[safe/jmp/linux-2.6] / init / Kconfig
diff --git a/init/Kconfig b/init/Kconfig

index 113c74c..a724a14 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -271,59 +271,6 @@ config LOG_BUF_SHIFT
                      13 =>  8 KB
                      12 =>  4 KB
  
-config CGROUPS
-       bool "Control Group support"
-       help
-         This option will let you use process cgroup subsystems
-         such as Cpusets
-
-         Say N if unsure.
-
-config CGROUP_DEBUG
-       bool "Example debug cgroup subsystem"
-       depends on CGROUPS
-       default n
-       help
-         This option enables a simple cgroup subsystem that
-         exports useful debugging information about the cgroups
-         framework
-
-         Say N if unsure
-
-config CGROUP_NS
-        bool "Namespace cgroup subsystem"
-        depends on CGROUPS
-        help
-          Provides a simple namespace cgroup subsystem to
-          provide hierarchical naming of sets of namespaces,
-          for instance virtual servers and checkpoint/restart
-          jobs.
-
-config CGROUP_FREEZER
-        bool "control group freezer subsystem"
-        depends on CGROUPS
-        help
-          Provides a way to freeze and unfreeze all tasks in a
-         cgroup.
-
-config CGROUP_DEVICE
-       bool "Device controller for cgroups"
-       depends on CGROUPS && EXPERIMENTAL
-       help
-         Provides a cgroup implementing whitelists for devices which
-         a process in the cgroup can mknod or open.
-
-config CPUSETS
-       bool "Cpuset support"
-       depends on SMP && CGROUPS
-       help
-         This option will let you create and manage CPUSETs which
-         allow dynamically partitioning a system into sets of CPUs and
-         Memory Nodes and assigning tasks to run only within those sets.
-         This is primarily useful on large SMP or NUMA systems.
-
-         Say N if unsure.
-
  #
  # Architectures with an unreliable sched_clock() should select this:
  #
@@ -337,6 +284,8 @@ config GROUP_SCHED
         help
           This feature lets CPU scheduler recognize task groups and control CPU
           bandwidth allocation to such task groups.
+         In order to create a group from arbitrary set of processes, use
+         CONFIG_CGROUPS. (See Control Group support.)
  
  config FAIR_GROUP_SCHED
         bool "Group scheduling for SCHED_OTHER"
@@ -354,7 +303,7 @@ config RT_GROUP_SCHED
           setting below. If enabled, it will also make it impossible to
           schedule realtime tasks for non-root users until you allocate
           realtime bandwidth for them.
-         See Documentation/sched-rt-group.txt for more information.
+         See Documentation/scheduler/sched-rt-group.txt for more information.
  
  choice
         depends on GROUP_SCHED
@@ -379,6 +328,66 @@ config CGROUP_SCHED
  
  endchoice
  
+menu "Control Group support"
+config CGROUPS
+       bool "Control Group support"
+       help
+         This option add support for grouping sets of processes together, for
+         use with process control subsystems such as Cpusets, CFS, memory
+         controls or device isolation.
+         See
+               - Documentation/cpusets.txt     (Cpusets)
+               - Documentation/scheduler/sched-design-CFS.txt  (CFS)
+               - Documentation/cgroups/ (features for grouping, isolation)
+               - Documentation/controllers/ (features for resource control)
+
+         Say N if unsure.
+
+config CGROUP_DEBUG
+       bool "Example debug cgroup subsystem"
+       depends on CGROUPS
+       default n
+       help
+         This option enables a simple cgroup subsystem that
+         exports useful debugging information about the cgroups
+         framework
+
+         Say N if unsure
+
+config CGROUP_NS
+        bool "Namespace cgroup subsystem"
+        depends on CGROUPS
+        help
+          Provides a simple namespace cgroup subsystem to
+          provide hierarchical naming of sets of namespaces,
+          for instance virtual servers and checkpoint/restart
+          jobs.
+
+config CGROUP_FREEZER
+        bool "control group freezer subsystem"
+        depends on CGROUPS
+        help
+          Provides a way to freeze and unfreeze all tasks in a
+         cgroup.
+
+config CGROUP_DEVICE
+       bool "Device controller for cgroups"
+       depends on CGROUPS && EXPERIMENTAL
+       help
+         Provides a cgroup implementing whitelists for devices which
+         a process in the cgroup can mknod or open.
+
+config CPUSETS
+       bool "Cpuset support"
+       depends on SMP && CGROUPS
+       help
+         This option will let you create and manage CPUSETs which
+         allow dynamically partitioning a system into sets of CPUs and
+         Memory Nodes and assigning tasks to run only within those sets.
+         This is primarily useful on large SMP or NUMA systems.
+
+         Say N if unsure.
+
  config CGROUP_CPUACCT
         bool "Simple CPU accounting cgroup subsystem"
         depends on CGROUPS
@@ -393,53 +402,86 @@ config RESOURCE_COUNTERS
            infrastructure that works with cgroups
         depends on CGROUPS
  
-config MM_OWNER
-       bool
-
  config CGROUP_MEM_RES_CTLR
         bool "Memory Resource Controller for Control Groups"
         depends on CGROUPS && RESOURCE_COUNTERS
         select MM_OWNER
         help
-         Provides a memory resource controller that manages both page cache and
-         RSS memory.
+         Provides a memory resource controller that manages both anonymous
+         memory and page cache. (See Documentation/controllers/memory.txt)
  
           Note that setting this option increases fixed memory overhead
-         associated with each page of memory in the system by 4/8 bytes
-         and also increases cache misses because struct page on many 64bit
-         systems will not fit into a single cache line anymore.
+         associated with each page of memory in the system. By this,
+         20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory
+         usage tracking struct at boot. Total amount of this is printed out
+         at boot.
  
           Only enable when you're ok with these trade offs and really
-         sure you need the memory resource controller.
+         sure you need the memory resource controller. Even when you enable
+         this, you can set "cgroup_disable=memory" at your boot option to
+         disable memory resource controller and you can avoid overheads.
+         (and lose benefits of memory resource controller)
  
           This config option also selects MM_OWNER config option, which
           could in turn add some fork/exit overhead.
  
+config MM_OWNER
+       bool
+
+config CGROUP_MEM_RES_CTLR_SWAP
+       bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
+       depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
+       help
+         Add swap management feature to memory resource controller. When you
+         enable this, you can limit mem+swap usage per cgroup. In other words,
+         when you disable this, memory resource controller has no cares to
+         usage of swap...a process can exhaust all of the swap. This extension
+         is useful when you want to avoid exhaustion swap but this itself
+         adds more overheads and consumes memory for remembering information.
+         Especially if you use 32bit system or small memory system, please
+         be careful about enabling this. When memory resource controller
+         is disabled by boot option, this will be automatically disabled and
+         there will be no overhead from this. Even when you set this config=y,
+         if boot option "noswapaccount" is set, swap will not be accounted.
+
+
+endmenu
+
  config SYSFS_DEPRECATED
         bool
  
  config SYSFS_DEPRECATED_V2
-       bool "Create deprecated sysfs files"
+       bool "Create deprecated sysfs layout for older userspace tools"
         depends on SYSFS
         default y
         select SYSFS_DEPRECATED
         help
-         This option creates deprecated symlinks such as the
-         "device"-link, the <subsystem>:<name>-link, and the
-         "bus"-link. It may also add deprecated key in the
-         uevent environment.
-         None of these features or values should be used today, as
-         they export driver core implementation details to userspace
-         or export properties which can't be kept stable across kernel
-         releases.
-
-         If enabled, this option will also move any device structures
-         that belong to a class, back into the /sys/class hierarchy, in
-         order to support older versions of udev and some userspace
-         programs.
-
-         If you are using a distro with the most recent userspace
-         packages, it should be safe to say N here.
+         This option switches the layout of sysfs to the deprecated
+         version.
+
+         The current sysfs layout features a unified device tree at
+         /sys/devices/, which is able to express a hierarchy between
+         class devices. If the deprecated option is set to Y, the
+         unified device tree is split into a bus device tree at
+         /sys/devices/ and several individual class device trees at
+         /sys/class/. The class and bus devices will be connected by
+         "<subsystem>:<name>" and the "device" links. The "block"
+         class devices, will not show up in /sys/class/block/. Some
+         subsystems will suppress the creation of some devices which
+         depend on the unified device tree.
+
+         This option is not a pure compatibility option that can
+         be safely enabled on newer distributions. It will change the
+         layout of sysfs to the non-extensible deprecated version,
+         and disable some features, which can not be exported without
+         confusing older userspace tools. Since 2007/2008 all major
+         distributions do not enable this option, and ship no tools which
+         depend on the deprecated layout or this option.
+
+         If you are using a new kernel on an older distribution, or use
+         older userspace tools, you might need to say Y here. Do not say Y,
+         if the original kernel, that came with your distribution, has
+         this option set to N.
  
  config PROC_PID_CPUSET
         bool "Include legacy /proc/<pid>/cpuset file"
@@ -584,6 +626,13 @@ config KALLSYMS_ALL
  
            Say N.
  
+config KALLSYMS_STRIP_GENERATED
+       bool "Strip machine generated symbols from kallsyms"
+       depends on KALLSYMS_ALL
+       default y
+       help
+         Say N if you want kallsyms to retain even machine generated symbols.
+
  config KALLSYMS_EXTRA_PASS
         bool "Do an extra kallsyms pass"
         depends on KALLSYMS
@@ -739,7 +788,8 @@ config VM_EVENT_COUNTERS
  
  config PCI_QUIRKS
         default y
-       bool "Enable PCI quirk workarounds" if EMBEDDED && PCI
+       bool "Enable PCI quirk workarounds" if EMBEDDED
+       depends on PCI
         help
           This enables workarounds for various PCI chipset
            bugs/quirks. Disable this only if your target machine is
@@ -766,8 +816,7 @@ config SLAB
         help
           The regular slab allocator that is established and known to work
           well in all environments. It organizes cache hot objects in
-         per cpu and per node queues. SLAB is the default choice for
-         a slab allocator.
+         per cpu and per node queues.
  
  config SLUB
         bool "SLUB (Unqueued Allocator)"
@@ -776,7 +825,8 @@ config SLUB
            instead of managing queues of cached objects (SLAB approach).
            Per cpu caching is realized using slabs of objects instead
            of queues of objects. SLUB can use memory efficiently
-          and has enhanced diagnostics.
+          and has enhanced diagnostics. SLUB is the default choice for
+          a slab allocator.
  
  config SLOB
         depends on EMBEDDED
@@ -803,6 +853,7 @@ config TRACEPOINTS
  
  config MARKERS
         bool "Activate markers"
+       depends on TRACEPOINTS
         help
           Place an empty function call at each marker site. Can be
           dynamically changed for a probe function.
@@ -825,10 +876,6 @@ config RT_MUTEXES
         boolean
         select PLIST
  
-config TINY_SHMEM
-       default !SHMEM
-       bool
-
  config BASE_SMALL
         int
         default 0 if BASE_FULL
@@ -903,14 +950,17 @@ config MODULE_SRCVERSION_ALL
           the version).  With this option, such a "srcversion" field
           will be created for all modules.  If unsure, say N.
  
-config KMOD
-       def_bool y
-       help
-         This is being removed soon.  These days, CONFIG_MODULES
-         implies CONFIG_KMOD, so use that instead.
-
  endif # MODULES
  
+config INIT_ALL_POSSIBLE
+       bool
+       help
+         Back when each arch used to define their own cpu_online_map and
+         cpu_possible_map, some of them chose to initialize cpu_possible_map
+         with all 1s, and others with all 0s.  When they were centralised,
+         it was better to provide this option than to break all the archs
+         and have several arch maintainers persuing me down dark alleys.
+
  config STOP_MACHINE
         bool
         default y
@@ -923,10 +973,90 @@ source "block/Kconfig"
  config PREEMPT_NOTIFIERS
         bool
  
+choice
+       prompt "RCU Implementation"
+       default CLASSIC_RCU
+
  config CLASSIC_RCU
-       def_bool !PREEMPT_RCU
+       bool "Classic RCU"
         help
           This option selects the classic RCU implementation that is
           designed for best read-side performance on non-realtime
-         systems.  Classic RCU is the default.  Note that the
-         PREEMPT_RCU symbol is used to select/deselect this option.
+         systems.
+
+         Select this option if you are unsure.
+
+config TREE_RCU
+       bool "Tree-based hierarchical RCU"
+       help
+         This option selects the RCU implementation that is
+         designed for very large SMP system with hundreds or
+         thousands of CPUs.
+
+config PREEMPT_RCU
+       bool "Preemptible RCU"
+       depends on PREEMPT
+       help
+         This option reduces the latency of the kernel by making certain
+         RCU sections preemptible. Normally RCU code is non-preemptible, if
+         this option is selected then read-only RCU sections become
+         preemptible. This helps latency, but may expose bugs due to
+         now-naive assumptions about each RCU read-side critical section
+         remaining on a given CPU through its execution.
+
+endchoice
+
+config RCU_TRACE
+       bool "Enable tracing for RCU"
+       depends on TREE_RCU || PREEMPT_RCU
+       help
+         This option provides tracing in RCU which presents stats
+         in debugfs for debugging RCU implementation.
+
+         Say Y here if you want to enable RCU tracing
+         Say N if you are unsure.
+
+config RCU_FANOUT
+       int "Tree-based hierarchical RCU fanout value"
+       range 2 64 if 64BIT
+       range 2 32 if !64BIT
+       depends on TREE_RCU
+       default 64 if 64BIT
+       default 32 if !64BIT
+       help
+         This option controls the fanout of hierarchical implementations
+         of RCU, allowing RCU to work efficiently on machines with
+         large numbers of CPUs.  This value must be at least the cube
+         root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+         systems and up to 262,144 for 64-bit systems.
+
+         Select a specific number if testing RCU itself.
+         Take the default if unsure.
+
+config RCU_FANOUT_EXACT
+       bool "Disable tree-based hierarchical RCU auto-balancing"
+       depends on TREE_RCU
+       default n
+       help
+         This option forces use of the exact RCU_FANOUT value specified,
+         regardless of imbalances in the hierarchy.  This is useful for
+         testing RCU itself, and might one day be useful on systems with
+         strong NUMA behavior.
+
+         Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+
+         Say N if unsure.
+
+config TREE_RCU_TRACE
+       def_bool RCU_TRACE && TREE_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the TREE_RCU implementation,
+         permitting Makefile to trivially select kernel/rcutree_trace.c.
+
+config PREEMPT_RCU_TRACE
+       def_bool RCU_TRACE && PREEMPT_RCU
+       select DEBUG_FS
+       help
+         This option provides tracing for the PREEMPT_RCU implementation,
+         permitting Makefile to trivially select kernel/rcupreempt_trace.c.