Greg Thelen reported recent Johannes's stack diet patch makes kernel hang.
His test is following.
mount -t cgroup none /cgroups -o memory
mkdir /cgroups/cg1
echo $$ > /cgroups/cg1/tasks
dd bs=1024 count=1024 if=/dev/null of=/data/foo
echo $$ > /cgroups/tasks
echo 1 > /cgroups/cg1/memory.force_empty
Actually, This OOM hard to try logic have been corrupted since following
two years old patch.
commit
a41f24ea9fd6169b147c53c2392e2887cc1d9247
Author: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Tue Apr 29 00:58:25 2008 -0700
page allocator: smarter retry of costly-order allocations
Original intention was "return success if the system have shrinkable zones
though priority==0 reclaim was failure". But the above patch changed to
"return nr_reclaimed if .....". Oh, That forgot nr_reclaimed may be 0 if
priority==0 reclaim failure.
And Johannes's patch
0aeb2339e54e ("vmscan: remove all_unreclaimable scan
control") made it more corrupt. Originally, priority==0 reclaim failure
on memcg return 0, but this patch changed to return 1. It totally
confused memcg.
This patch fixes it completely.
Reported-by: Greg Thelen <gthelen@google.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Greg Thelen <gthelen@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*/
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*/
-static int shrink_zones(int priority, struct zonelist *zonelist,
+static bool shrink_zones(int priority, struct zonelist *zonelist,
struct scan_control *sc)
{
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
struct zoneref *z;
struct zone *zone;
struct scan_control *sc)
{
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
struct zoneref *z;
struct zone *zone;
+ bool all_unreclaimable = true;
for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
sc->nodemask) {
for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
sc->nodemask) {
}
shrink_zone(priority, zone, sc);
}
shrink_zone(priority, zone, sc);
+ all_unreclaimable = false;
+ return all_unreclaimable;
struct scan_control *sc)
{
int priority;
struct scan_control *sc)
{
int priority;
+ bool all_unreclaimable;
unsigned long total_scanned = 0;
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long lru_pages = 0;
unsigned long total_scanned = 0;
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long lru_pages = 0;
sc->nr_scanned = 0;
if (!priority)
disable_swap_token();
sc->nr_scanned = 0;
if (!priority)
disable_swap_token();
- ret = shrink_zones(priority, zonelist, sc);
+ all_unreclaimable = shrink_zones(priority, zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
* over limit cgroups
/*
* Don't shrink slabs when reclaiming memory from
* over limit cgroups
}
}
total_scanned += sc->nr_scanned;
}
}
total_scanned += sc->nr_scanned;
- if (sc->nr_reclaimed >= sc->nr_to_reclaim) {
- ret = sc->nr_reclaimed;
+ if (sc->nr_reclaimed >= sc->nr_to_reclaim)
/*
* Try to write back as many pages as we just scanned. This
/*
* Try to write back as many pages as we just scanned. This
priority < DEF_PRIORITY - 2)
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
priority < DEF_PRIORITY - 2)
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
- /* top priority shrink_zones still had more to do? don't OOM, then */
- if (ret && scanning_global_lru(sc))
- ret = sc->nr_reclaimed;
out:
/*
* Now that we've scanned all the zones at this priority level, note
out:
/*
* Now that we've scanned all the zones at this priority level, note
delayacct_freepages_end();
put_mems_allowed();
delayacct_freepages_end();
put_mems_allowed();
+ if (sc->nr_reclaimed)
+ return sc->nr_reclaimed;
+
+ /* top priority shrink_zones still had more to do? don't OOM, then */
+ if (scanning_global_lru(sc) && !all_unreclaimable)
+ return 1;
+
+ return 0;
}
unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
}
unsigned long try_to_free_pages(struct zonelist *zonelist, int order,