#ifndef _LINUX_MMZONE_H
#define _LINUX_MMZONE_H
-#ifdef __KERNEL__
#ifndef __ASSEMBLY__
+#ifndef __GENERATING_BOUNDS_H
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/wait.h>
+#include <linux/bitops.h>
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/numa.h>
#include <linux/seqlock.h>
#include <linux/nodemask.h>
#include <linux/pageblock-flags.h>
+#include <linux/bounds.h>
#include <asm/atomic.h>
#include <asm/page.h>
#define MIGRATE_UNMOVABLE 0
#define MIGRATE_RECLAIMABLE 1
#define MIGRATE_MOVABLE 2
-#define MIGRATE_HIGHATOMIC 3
-#define MIGRATE_RESERVE 4
+#define MIGRATE_RESERVE 3
+#define MIGRATE_ISOLATE 4 /* can't allocate from here */
#define MIGRATE_TYPES 5
#define for_each_migratetype_order(order, type) \
for (order = 0; order < MAX_ORDER; order++) \
for (type = 0; type < MIGRATE_TYPES; type++)
+extern int page_group_by_mobility_disabled;
+
+static inline int get_pageblock_migratetype(struct page *page)
+{
+ return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
+}
+
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
- NR_INACTIVE,
- NR_ACTIVE,
+ NR_LRU_BASE,
+ NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+ NR_ACTIVE_ANON, /* " " " " " */
+ NR_INACTIVE_FILE, /* " " " " " */
+ NR_ACTIVE_FILE, /* " " " " " */
+ NR_UNEVICTABLE, /* " " " " " */
+ NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_ANON_PAGES, /* Mapped anonymous pages */
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
only modified from process context */
NR_FILE_PAGES,
NR_FILE_DIRTY,
NR_WRITEBACK,
- /* Second 128 byte cacheline */
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_BOUNCE,
NR_VMSCAN_WRITE,
+ /* Second 128 byte cacheline */
+ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
NUMA_MISS, /* allocated in non intended node */
#endif
NR_VM_ZONE_STAT_ITEMS };
+/*
+ * We do arithmetic on the LRU lists in various places in the code,
+ * so it is important to keep the active lists LRU_ACTIVE higher in
+ * the array than the corresponding inactive lists, and to keep
+ * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
+ *
+ * This has to be kept in sync with the statistics in zone_stat_item
+ * above and the descriptions in vmstat_text in mm/vmstat.c
+ */
+#define LRU_BASE 0
+#define LRU_ACTIVE 1
+#define LRU_FILE 2
+
+enum lru_list {
+ LRU_INACTIVE_ANON = LRU_BASE,
+ LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
+ LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
+ LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
+ LRU_UNEVICTABLE,
+ NR_LRU_LISTS
+};
+
+#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+
+#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+
+static inline int is_file_lru(enum lru_list l)
+{
+ return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_active_lru(enum lru_list l)
+{
+ return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
+}
+
+static inline int is_unevictable_lru(enum lru_list l)
+{
+ return (l == LRU_UNEVICTABLE);
+}
+
+enum zone_watermarks {
+ WMARK_MIN,
+ WMARK_LOW,
+ WMARK_HIGH,
+ NR_WMARK
+};
+
+#define min_wmark_pages(z) (z->watermark[WMARK_MIN])
+#define low_wmark_pages(z) (z->watermark[WMARK_LOW])
+#define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
+
struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
};
struct per_cpu_pageset {
- struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */
+ struct per_cpu_pages pcp;
#ifdef CONFIG_NUMA
s8 expire;
#endif
#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
#endif
+#endif /* !__GENERATING_BOUNDS.H */
+
enum zone_type {
#ifdef CONFIG_ZONE_DMA
/*
ZONE_HIGHMEM,
#endif
ZONE_MOVABLE,
- MAX_NR_ZONES
+ __MAX_NR_ZONES
};
+#ifndef __GENERATING_BOUNDS_H
+
/*
* When a memory allocation must conform to specific limitations (such
* as being suitable for DMA) the caller will pass in hints to the
* match the requested limits. See gfp_zone() in include/linux/gfp.h
*/
-/*
- * Count the active zones. Note that the use of defined(X) outside
- * #if and family is not necessarily defined so ensure we cannot use
- * it later. Use __ZONE_COUNT to work out how many shift bits we need.
- */
-#define __ZONE_COUNT ( \
- defined(CONFIG_ZONE_DMA) \
- + defined(CONFIG_ZONE_DMA32) \
- + 1 \
- + defined(CONFIG_HIGHMEM) \
- + 1 \
-)
-#if __ZONE_COUNT < 2
+#if MAX_NR_ZONES < 2
#define ZONES_SHIFT 0
-#elif __ZONE_COUNT <= 2
+#elif MAX_NR_ZONES <= 2
#define ZONES_SHIFT 1
-#elif __ZONE_COUNT <= 4
+#elif MAX_NR_ZONES <= 4
#define ZONES_SHIFT 2
#else
#error ZONES_SHIFT -- too many zones configured adjust calculation
#endif
-#undef __ZONE_COUNT
+
+struct zone_reclaim_stat {
+ /*
+ * The pageout code in vmscan.c keeps track of how many of the
+ * mem/swap backed and file backed pages are refeferenced.
+ * The higher the rotated/scanned ratio, the more valuable
+ * that cache is.
+ *
+ * The anon LRU stats live in [0], file LRU stats in [1]
+ */
+ unsigned long recent_rotated[2];
+ unsigned long recent_scanned[2];
+};
struct zone {
/* Fields commonly accessed by the page allocator */
- unsigned long pages_min, pages_low, pages_high;
+
+ /* zone watermarks, access with *_wmark_pages(zone) macros */
+ unsigned long watermark[NR_WMARK];
+
/*
* We don't know if the memory that we're going to allocate will be freeable
* or/and it will be released eventually, so to avoid totally wasting several
#ifndef CONFIG_SPARSEMEM
/*
- * Flags for a MAX_ORDER_NR_PAGES block. See pageblock-flags.h.
+ * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
unsigned long *pageblock_flags;
/* Fields commonly accessed by the page reclaim scanner */
spinlock_t lru_lock;
- struct list_head active_list;
- struct list_head inactive_list;
- unsigned long nr_scan_active;
- unsigned long nr_scan_inactive;
- unsigned long pages_scanned; /* since last reclaim */
- int all_unreclaimable; /* All pages pinned */
+ struct zone_lru {
+ struct list_head list;
+ unsigned long nr_saved_scan; /* accumulated for batching */
+ } lru[NR_LRU_LISTS];
- /* A count of how many reclaimers are scanning this zone */
- atomic_t reclaim_in_progress;
+ struct zone_reclaim_stat reclaim_stat;
+
+ unsigned long pages_scanned; /* since last reclaim */
+ unsigned long flags; /* zone flags, see below */
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
*/
int prev_priority;
+ /*
+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+ * this zone's LRU. Maintained by the pageout code.
+ */
+ unsigned int inactive_ratio;
+
ZONE_PADDING(_pad2_)
/* Rarely used or read-mostly fields */
const char *name;
} ____cacheline_internodealigned_in_smp;
+typedef enum {
+ ZONE_ALL_UNRECLAIMABLE, /* all pages pinned */
+ ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
+ ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */
+} zone_flags_t;
+
+static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
+{
+ set_bit(flag, &zone->flags);
+}
+
+static inline int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag)
+{
+ return test_and_set_bit(flag, &zone->flags);
+}
+
+static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
+{
+ clear_bit(flag, &zone->flags);
+}
+
+static inline int zone_is_all_unreclaimable(const struct zone *zone)
+{
+ return test_bit(ZONE_ALL_UNRECLAIMABLE, &zone->flags);
+}
+
+static inline int zone_is_reclaim_locked(const struct zone *zone)
+{
+ return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
+}
+
+static inline int zone_is_oom_locked(const struct zone *zone)
+{
+ return test_bit(ZONE_OOM_LOCKED, &zone->flags);
+}
+
/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
* The NUMA zonelists are doubled becausse we need zonelists that restrict the
* allocations to a single node for GFP_THISNODE.
*
- * [0 .. MAX_NR_ZONES -1] : Zonelists with fallback
- * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1] : No fallback (GFP_THISNODE)
+ * [0] : Zonelist with fallback
+ * [1] : No fallback (GFP_THISNODE)
*/
-#define MAX_ZONELISTS (2 * MAX_NR_ZONES)
+#define MAX_ZONELISTS 2
/*
unsigned long last_full_zap; /* when last zap'd (jiffies) */
};
#else
-#define MAX_ZONELISTS MAX_NR_ZONES
+#define MAX_ZONELISTS 1
struct zonelist_cache;
#endif
/*
+ * This struct contains information about a zone in a zonelist. It is stored
+ * here to avoid dereferences into large structures and lookups of tables
+ */
+struct zoneref {
+ struct zone *zone; /* Pointer to actual zone */
+ int zone_idx; /* zone_idx(zoneref->zone) */
+};
+
+/*
* One allocation request operates on a zonelist. A zonelist
* is a list of zones, the first one is the 'goal' of the
* allocation, the other zones are fallback zones, in decreasing
*
* If zlcache_ptr is not NULL, then it is just the address of zlcache,
* as explained above. If zlcache_ptr is NULL, there is no zlcache.
+ * *
+ * To speed the reading of the zonelist, the zonerefs contain the zone index
+ * of the entry being read. Helper functions to access information given
+ * a struct zoneref are
+ *
+ * zonelist_zone() - Return the struct zone * for an entry in _zonerefs
+ * zonelist_zone_idx() - Return the index of the zone for an entry
+ * zonelist_node_idx() - Return the index of the node for an entry
*/
-
struct zonelist {
struct zonelist_cache *zlcache_ptr; // NULL or &zlcache
- struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited
+ struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
#ifdef CONFIG_NUMA
struct zonelist_cache zlcache; // optional ...
#endif
};
-#ifdef CONFIG_NUMA
-/*
- * Only custom zonelists like MPOL_BIND need to be filtered as part of
- * policies. As described in the comment for struct zonelist_cache, these
- * zonelists will not have a zlcache so zlcache_ptr will not be set. Use
- * that to determine if the zonelists needs to be filtered or not.
- */
-static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
-{
- return !zonelist->zlcache_ptr;
-}
-#else
-static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
-{
- return 0;
-}
-#endif /* CONFIG_NUMA */
-
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
struct node_active_region {
unsigned long start_pfn;
struct zone node_zones[MAX_NR_ZONES];
struct zonelist node_zonelists[MAX_ZONELISTS];
int nr_zones;
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+ struct page_cgroup *node_page_cgroup;
+#endif
#endif
struct bootmem_data *bdata;
#ifdef CONFIG_MEMORY_HOTPLUG
static inline int is_highmem(struct zone *zone)
{
#ifdef CONFIG_HIGHMEM
- int zone_idx = zone - zone->zone_pgdat->node_zones;
- return zone_idx == ZONE_HIGHMEM ||
- (zone_idx == ZONE_MOVABLE && zone_movable_is_highmem());
+ int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones;
+ return zone_off == ZONE_HIGHMEM * sizeof(*zone) ||
+ (zone_off == ZONE_MOVABLE * sizeof(*zone) &&
+ zone_movable_is_highmem());
#else
return 0;
#endif
extern char numa_zonelist_order[];
#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */
-#include <linux/topology.h>
-/* Returns the number of the current Node. */
-#ifndef numa_node_id
-#define numa_node_id() (cpu_to_node(raw_smp_processor_id()))
-#endif
-
#ifndef CONFIG_NEED_MULTIPLE_NODES
extern struct pglist_data contig_page_data;
#define NODE_DATA(nid) (&contig_page_data)
#define NODE_MEM_MAP(nid) mem_map
-#define MAX_NODES_SHIFT 1
#else /* CONFIG_NEED_MULTIPLE_NODES */
extern struct zone *next_zone(struct zone *zone);
/**
- * for_each_pgdat - helper macro to iterate over all nodes
+ * for_each_online_pgdat - helper macro to iterate over all online nodes
* @pgdat - pointer to a pg_data_t variable
*/
#define for_each_online_pgdat(pgdat) \
zone; \
zone = next_zone(zone))
-#ifdef CONFIG_SPARSEMEM
-#include <asm/sparsemem.h>
-#endif
+#define for_each_populated_zone(zone) \
+ for (zone = (first_online_pgdat())->node_zones; \
+ zone; \
+ zone = next_zone(zone)) \
+ if (!populated_zone(zone)) \
+ ; /* do nothing */ \
+ else
-#if BITS_PER_LONG == 32
-/*
- * with 32 bit page->flags field, we reserve 9 bits for node/zone info.
- * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes.
+static inline struct zone *zonelist_zone(struct zoneref *zoneref)
+{
+ return zoneref->zone;
+}
+
+static inline int zonelist_zone_idx(struct zoneref *zoneref)
+{
+ return zoneref->zone_idx;
+}
+
+static inline int zonelist_node_idx(struct zoneref *zoneref)
+{
+#ifdef CONFIG_NUMA
+ /* zone_to_nid not available in this context */
+ return zoneref->zone->node;
+#else
+ return 0;
+#endif /* CONFIG_NUMA */
+}
+
+/**
+ * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
+ * @z - The cursor used as a starting point for the search
+ * @highest_zoneidx - The zone index of the highest zone to return
+ * @nodes - An optional nodemask to filter the zonelist with
+ * @zone - The first suitable zone found is returned via this parameter
+ *
+ * This function returns the next zone at or below a given zone index that is
+ * within the allowed nodemask using a cursor as the starting point for the
+ * search. The zoneref returned is a cursor that represents the current zone
+ * being examined. It should be advanced by one before calling
+ * next_zones_zonelist again.
*/
-#define FLAGS_RESERVED 9
+struct zoneref *next_zones_zonelist(struct zoneref *z,
+ enum zone_type highest_zoneidx,
+ nodemask_t *nodes,
+ struct zone **zone);
-#elif BITS_PER_LONG == 64
-/*
- * with 64 bit flags field, there's plenty of room.
+/**
+ * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
+ * @zonelist - The zonelist to search for a suitable zone
+ * @highest_zoneidx - The zone index of the highest zone to return
+ * @nodes - An optional nodemask to filter the zonelist with
+ * @zone - The first suitable zone found is returned via this parameter
+ *
+ * This function returns the first zone at or below a given zone index that is
+ * within the allowed nodemask. The zoneref returned is a cursor that can be
+ * used to iterate the zonelist with next_zones_zonelist by advancing it by
+ * one before calling.
*/
-#define FLAGS_RESERVED 32
+static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
+ enum zone_type highest_zoneidx,
+ nodemask_t *nodes,
+ struct zone **zone)
+{
+ return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes,
+ zone);
+}
-#else
+/**
+ * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
+ * @zone - The current zone in the iterator
+ * @z - The current pointer within zonelist->zones being iterated
+ * @zlist - The zonelist being iterated
+ * @highidx - The zone index of the highest zone to return
+ * @nodemask - Nodemask allowed by the allocator
+ *
+ * This iterator iterates though all zones at or below a given zone index and
+ * within a given nodemask
+ */
+#define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
+ for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \
+ zone; \
+ z = next_zones_zonelist(++z, highidx, nodemask, &zone)) \
-#error BITS_PER_LONG not defined
+/**
+ * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
+ * @zone - The current zone in the iterator
+ * @z - The current pointer within zonelist->zones being iterated
+ * @zlist - The zonelist being iterated
+ * @highidx - The zone index of the highest zone to return
+ *
+ * This iterator iterates though all zones at or below a given zone index.
+ */
+#define for_each_zone_zonelist(zone, z, zlist, highidx) \
+ for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
+#ifdef CONFIG_SPARSEMEM
+#include <asm/sparsemem.h>
#endif
#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
!defined(CONFIG_ARCH_POPULATES_NODE_MAP)
-#define early_pfn_to_nid(nid) (0UL)
+static inline unsigned long early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_FLATMEM
#define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1))
#define SECTION_BLOCKFLAGS_BITS \
- ((1 << (PFN_SECTION_SHIFT - (MAX_ORDER-1))) * NR_PAGEBLOCK_BITS)
+ ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
#error Allocator MAX_ORDER exceeds SECTION_SIZE
#endif
struct page;
+struct page_cgroup;
struct mem_section {
/*
* This is, logically, a pointer to an array of struct
/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+ /*
+ * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
+ * section. (see memcontrol.h/page_cgroup.h about this.)
+ */
+ struct page_cgroup *page_cgroup;
+ unsigned long pad;
+#endif
};
#ifdef CONFIG_SPARSEMEM_EXTREME
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
extern int __section_nr(struct mem_section* ms);
+extern unsigned long usemap_size(void);
/*
* We use the lower bits of the mem_map pointer to store
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_NODES_SPAN_OTHER_NODES
-#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid))
+bool early_pfn_in_nid(unsigned long pfn, int nid);
#else
#define early_pfn_in_nid(pfn, nid) (1)
#endif
#define pfn_valid_within(pfn) (1)
#endif
+#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
+/*
+ * pfn_valid() is meant to be able to tell if a given PFN has valid memmap
+ * associated with it or not. In FLATMEM, it is expected that holes always
+ * have valid memmap as long as there is valid PFNs either side of the hole.
+ * In SPARSEMEM, it is assumed that a valid section has a memmap for the
+ * entire section.
+ *
+ * However, an ARM, and maybe other embedded architectures in the future
+ * free memmap backing holes to save memory on the assumption the memmap is
+ * never used. The page_zone linkages are then broken even though pfn_valid()
+ * returns true. A walker of the full memmap must then do this additional
+ * check to ensure the memmap they are looking at is sane by making sure
+ * the zone and PFN linkages are still valid. This is expensive, but walkers
+ * of the full memmap are extremely rare.
+ */
+int memmap_valid_within(unsigned long pfn,
+ struct page *page, struct zone *zone);
+#else
+static inline int memmap_valid_within(unsigned long pfn,
+ struct page *page, struct zone *zone)
+{
+ return 1;
+}
+#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
+
+#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
-#endif /* __KERNEL__ */
#endif /* _LINUX_MMZONE_H */