X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=mm%2Freadahead.c;h=bec83c15a78f61b58a1dfbb74a336b9848b5d876;hb=86e894899820f2b3094d5557124fc22743ae0fc7;hp=c094e4f5a25070e74e1119bdccc7560f57729655;hpb=122a21d11cbfda6d1e33cbc8ae9e4c4ee2f1886e;p=safe%2Fjmp%2Flinux-2.6 diff --git a/mm/readahead.c b/mm/readahead.c index c094e4f..bec83c1 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, Linus Torvalds * - * 09Apr2002 akpm@zip.com.au + * 09Apr2002 Andrew Morton * Initial version. */ @@ -15,22 +15,15 @@ #include #include #include +#include void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { } EXPORT_SYMBOL(default_unplug_io_fn); -/* - * Convienent macros for min/max read-ahead pages. - * Note that MAX_RA_PAGES is rounded down, while MIN_RA_PAGES is rounded up. - * The latter is necessary for systems with large page size(i.e. 64k). - */ -#define MAX_RA_PAGES (VM_MAX_READAHEAD*1024 / PAGE_CACHE_SIZE) -#define MIN_RA_PAGES DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE) - struct backing_dev_info default_backing_dev_info = { - .ra_pages = MAX_RA_PAGES, + .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, .state = 0, .capabilities = BDI_CAP_MAP_COPY, .unplug_io_fn = default_unplug_io_fn, @@ -45,86 +38,10 @@ void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) { ra->ra_pages = mapping->backing_dev_info->ra_pages; - ra->prev_index = -1; + ra->prev_pos = -1; } EXPORT_SYMBOL_GPL(file_ra_state_init); -/* - * Return max readahead size for this inode in number-of-pages. - */ -static inline unsigned long get_max_readahead(struct file_ra_state *ra) -{ - return ra->ra_pages; -} - -static inline unsigned long get_min_readahead(struct file_ra_state *ra) -{ - return MIN_RA_PAGES; -} - -static inline void reset_ahead_window(struct file_ra_state *ra) -{ - /* - * ... but preserve ahead_start + ahead_size value, - * see 'recheck:' label in page_cache_readahead(). - * Note: We never use ->ahead_size as rvalue without - * checking ->ahead_start != 0 first. - */ - ra->ahead_size += ra->ahead_start; - ra->ahead_start = 0; -} - -static inline void ra_off(struct file_ra_state *ra) -{ - ra->start = 0; - ra->flags = 0; - ra->size = 0; - reset_ahead_window(ra); - return; -} - -/* - * Set the initial window size, round to next power of 2 and square - * for small size, x 4 for medium, and x 2 for large - * for 128k (32 page) max ra - * 1-8 page = 32k initial, > 8 page = 128k initial - */ -static unsigned long get_init_ra_size(unsigned long size, unsigned long max) -{ - unsigned long newsize = roundup_pow_of_two(size); - - if (newsize <= max / 32) - newsize = newsize * 4; - else if (newsize <= max / 4) - newsize = newsize * 2; - else - newsize = max; - return newsize; -} - -/* - * Set the new window size, this is called only when I/O is to be submitted, - * not for each call to readahead. If a cache miss occured, reduce next I/O - * size, else increase depending on how close to max we are. - */ -static inline unsigned long get_next_ra_size(struct file_ra_state *ra) -{ - unsigned long max = get_max_readahead(ra); - unsigned long min = get_min_readahead(ra); - unsigned long cur = ra->size; - unsigned long newsize; - - if (ra->flags & RA_FLAG_MISS) { - ra->flags &= ~RA_FLAG_MISS; - newsize = max((cur - 2), min); - } else if (cur < max / 16) { - newsize = 4 * cur; - } else { - newsize = 2 * cur; - } - return min(newsize, max); -} - #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) /** @@ -141,28 +58,25 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, int (*filler)(void *, struct page *), void *data) { struct page *page; - struct pagevec lru_pvec; int ret = 0; - pagevec_init(&lru_pvec, 0); - while (!list_empty(pages)) { page = list_to_page(pages); list_del(&page->lru); - if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { + if (add_to_page_cache_lru(page, mapping, + page->index, GFP_KERNEL)) { page_cache_release(page); continue; } + page_cache_release(page); + ret = filler(data, page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - if (ret) { + if (unlikely(ret)) { put_pages_list(pages); break; } task_io_account_read(PAGE_CACHE_SIZE); } - pagevec_lru_add(&lru_pvec); return ret; } @@ -172,7 +86,6 @@ static int read_pages(struct address_space *mapping, struct file *filp, struct list_head *pages, unsigned nr_pages) { unsigned page_idx; - struct pagevec lru_pvec; int ret; if (mapping->a_ops->readpages) { @@ -182,85 +95,21 @@ static int read_pages(struct address_space *mapping, struct file *filp, goto out; } - pagevec_init(&lru_pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_to_page(pages); list_del(&page->lru); - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { mapping->a_ops->readpage(filp, page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else - page_cache_release(page); + } + page_cache_release(page); } - pagevec_lru_add(&lru_pvec); ret = 0; out: return ret; } /* - * Readahead design. - * - * The fields in struct file_ra_state represent the most-recently-executed - * readahead attempt: - * - * start: Page index at which we started the readahead - * size: Number of pages in that read - * Together, these form the "current window". - * Together, start and size represent the `readahead window'. - * prev_index: The page which the readahead algorithm most-recently inspected. - * It is mainly used to detect sequential file reading. - * If page_cache_readahead sees that it is again being called for - * a page which it just looked at, it can return immediately without - * making any state changes. - * offset: Offset in the prev_index where the last read ended - used for - * detection of sequential file reading. - * ahead_start, - * ahead_size: Together, these form the "ahead window". - * ra_pages: The externally controlled max readahead for this fd. - * - * When readahead is in the off state (size == 0), readahead is disabled. - * In this state, prev_index is used to detect the resumption of sequential I/O. - * - * The readahead code manages two windows - the "current" and the "ahead" - * windows. The intent is that while the application is walking the pages - * in the current window, I/O is underway on the ahead window. When the - * current window is fully traversed, it is replaced by the ahead window - * and the ahead window is invalidated. When this copying happens, the - * new current window's pages are probably still locked. So - * we submit a new batch of I/O immediately, creating a new ahead window. - * - * So: - * - * ----|----------------|----------------|----- - * ^start ^start+size - * ^ahead_start ^ahead_start+ahead_size - * - * ^ When this page is read, we submit I/O for the - * ahead window. - * - * A `readahead hit' occurs when a read request is made against a page which is - * the next sequential page. Ahead window calculations are done only when it - * is time to submit a new IO. The code ramps up the size agressively at first, - * but slow down as it approaches max_readhead. - * - * Any seek/ramdom IO will result in readahead being turned off. It will resume - * at the first sequential access. - * - * There is a special-case: if the first page which the application tries to - * read happens to be the first page of the file, it is assumed that a linear - * read is about to happen and the window is immediately set to the initial size - * based on I/O request size and the max_readahead. - * - * This function is to be called for every read request, rather than when - * it is time to perform readahead. It is called only once for the entire I/O - * regardless of size unless readahead is unable to start enough I/O to satisfy - * the request (I/O request > max_readahead). - */ - -/* * do_page_cache_readahead actually reads a chunk of disk. It allocates all * the pages first, then submits them all for I/O. This avoids the very bad * behaviour which would occur if page allocations are causing VM writeback. @@ -292,20 +141,19 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, /* * Preallocate as many pages as we will need. */ - read_lock_irq(&mapping->tree_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { pgoff_t page_offset = offset + page_idx; - + if (page_offset > end_index) break; + rcu_read_lock(); page = radix_tree_lookup(&mapping->page_tree, page_offset); + rcu_read_unlock(); if (page) continue; - read_unlock_irq(&mapping->tree_lock); page = page_cache_alloc_cold(mapping); - read_lock_irq(&mapping->tree_lock); if (!page) break; page->index = page_offset; @@ -314,7 +162,6 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, SetPageReadahead(page); ret++; } - read_unlock_irq(&mapping->tree_lock); /* * Now start the IO. We ignore I/O errors - if the page is not @@ -361,28 +208,6 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, } /* - * Check how effective readahead is being. If the amount of started IO is - * less than expected then the file is partly or fully in pagecache and - * readahead isn't helping. - * - */ -static inline int check_ra_success(struct file_ra_state *ra, - unsigned long nr_to_read, unsigned long actual) -{ - if (actual == 0) { - ra->cache_hit += nr_to_read; - if (ra->cache_hit >= VM_MAX_CACHE_HIT) { - ra_off(ra); - ra->flags |= RA_FLAG_INCACHE; - return 0; - } - } else { - ra->cache_hit=0; - } - return 1; -} - -/* * This version skips the IO if the queue is read-congested, and will tell the * block layer to abandon the readahead if request allocation would block. * @@ -399,233 +224,75 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, } /* - * Read 'nr_to_read' pages starting at page 'offset'. If the flag 'block' - * is set wait till the read completes. Otherwise attempt to read without - * blocking. - * Returns 1 meaning 'success' if read is successful without switching off - * readahead mode. Otherwise return failure. + * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a + * sensible upper limit. */ -static int -blockable_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read, - struct file_ra_state *ra, int block) +unsigned long max_sane_readahead(unsigned long nr) { - int actual; - - if (!block && bdi_read_congested(mapping->backing_dev_info)) - return 0; - - actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0); - - return check_ra_success(ra, nr_to_read, actual); + return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE) + + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); } -static int make_ahead_window(struct address_space *mapping, struct file *filp, - struct file_ra_state *ra, int force) +static int __init readahead_init(void) { - int block, ret; - - ra->ahead_size = get_next_ra_size(ra); - ra->ahead_start = ra->start + ra->size; - - block = force || (ra->prev_index >= ra->ahead_start); - ret = blockable_page_cache_readahead(mapping, filp, - ra->ahead_start, ra->ahead_size, ra, block); - - if (!ret && !force) { - /* A read failure in blocking mode, implies pages are - * all cached. So we can safely assume we have taken - * care of all the pages requested in this call. - * A read failure in non-blocking mode, implies we are - * reading more pages than requested in this call. So - * we safely assume we have taken care of all the pages - * requested in this call. - * - * Just reset the ahead window in case we failed due to - * congestion. The ahead window will any way be closed - * in case we failed due to excessive page cache hits. - */ - reset_ahead_window(ra); - } + int err; - return ret; -} + err = bdi_init(&default_backing_dev_info); + if (!err) + bdi_register(&default_backing_dev_info, NULL, "default"); -/** - * page_cache_readahead - generic adaptive readahead - * @mapping: address_space which holds the pagecache and I/O vectors - * @ra: file_ra_state which holds the readahead state - * @filp: passed on to ->readpage() and ->readpages() - * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units - * @req_size: hint: total size of the read which the caller is performing in - * PAGE_CACHE_SIZE units - * - * page_cache_readahead() is the main function. It performs the adaptive - * readahead window size management and submits the readahead I/O. - * - * Note that @filp is purely used for passing on to the ->readpage[s]() - * handler: it may refer to a different file from @mapping (so we may not use - * @filp->f_mapping or @filp->f_path.dentry->d_inode here). - * Also, @ra may not be equal to &@filp->f_ra. - * - */ -unsigned long -page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, - struct file *filp, pgoff_t offset, unsigned long req_size) -{ - unsigned long max, newsize; - int sequential; - - /* - * We avoid doing extra work and bogusly perturbing the readahead - * window expansion logic. - */ - if (offset == ra->prev_index && --req_size) - ++offset; - - /* Note that prev_index == -1 if it is a first read */ - sequential = (offset == ra->prev_index + 1); - ra->prev_index = offset; - ra->prev_offset = 0; - - max = get_max_readahead(ra); - newsize = min(req_size, max); - - /* No readahead or sub-page sized read or file already in cache */ - if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) - goto out; - - ra->prev_index += newsize - 1; - - /* - * Special case - first read at start of file. We'll assume it's - * a whole-file read and grow the window fast. Or detect first - * sequential access - */ - if (sequential && ra->size == 0) { - ra->size = get_init_ra_size(newsize, max); - ra->start = offset; - if (!blockable_page_cache_readahead(mapping, filp, offset, - ra->size, ra, 1)) - goto out; - - /* - * If the request size is larger than our max readahead, we - * at least want to be sure that we get 2 IOs in flight and - * we know that we will definitly need the new I/O. - * once we do this, subsequent calls should be able to overlap - * IOs,* thus preventing stalls. so issue the ahead window - * immediately. - */ - if (req_size >= max) - make_ahead_window(mapping, filp, ra, 1); - - goto out; - } - - /* - * Now handle the random case: - * partial page reads and first access were handled above, - * so this must be the next page otherwise it is random - */ - if (!sequential) { - ra_off(ra); - blockable_page_cache_readahead(mapping, filp, offset, - newsize, ra, 1); - goto out; - } - - /* - * If we get here we are doing sequential IO and this was not the first - * occurence (ie we have an existing window) - */ - if (ra->ahead_start == 0) { /* no ahead window yet */ - if (!make_ahead_window(mapping, filp, ra, 0)) - goto recheck; - } - - /* - * Already have an ahead window, check if we crossed into it. - * If so, shift windows and issue a new ahead window. - * Only return the #pages that are in the current window, so that - * we get called back on the first page of the ahead window which - * will allow us to submit more IO. - */ - if (ra->prev_index >= ra->ahead_start) { - ra->start = ra->ahead_start; - ra->size = ra->ahead_size; - make_ahead_window(mapping, filp, ra, 0); -recheck: - /* prev_index shouldn't overrun the ahead window */ - ra->prev_index = min(ra->prev_index, - ra->ahead_start + ra->ahead_size - 1); - } - -out: - return ra->prev_index + 1; + return err; } -EXPORT_SYMBOL_GPL(page_cache_readahead); +subsys_initcall(readahead_init); /* - * handle_ra_miss() is called when it is known that a page which should have - * been present in the pagecache (we just did some readahead there) was in fact - * not found. This will happen if it was evicted by the VM (readahead - * thrashing) - * - * Turn on the cache miss flag in the RA struct, this will cause the RA code - * to reduce the RA size on the next read. + * Submit IO for the read-ahead request in file_ra_state. */ -void handle_ra_miss(struct address_space *mapping, - struct file_ra_state *ra, pgoff_t offset) +static unsigned long ra_submit(struct file_ra_state *ra, + struct address_space *mapping, struct file *filp) { - ra->flags |= RA_FLAG_MISS; - ra->flags &= ~RA_FLAG_INCACHE; - ra->cache_hit = 0; -} + int actual; -/* - * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a - * sensible upper limit. - */ -unsigned long max_sane_readahead(unsigned long nr) -{ - return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) - + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); + actual = __do_page_cache_readahead(mapping, filp, + ra->start, ra->size, ra->async_size); + + return actual; } /* - * Submit IO for the read-ahead request in file_ra_state. + * Set the initial window size, round to next power of 2 and square + * for small size, x 4 for medium, and x 2 for large + * for 128k (32 page) max ra + * 1-8 page = 32k initial, > 8 page = 128k initial */ -unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, struct file *filp) +static unsigned long get_init_ra_size(unsigned long size, unsigned long max) { - unsigned long ra_size; - unsigned long la_size; - int actual; + unsigned long newsize = roundup_pow_of_two(size); - ra_size = ra_readahead_size(ra); - la_size = ra_lookahead_size(ra); - actual = __do_page_cache_readahead(mapping, filp, - ra->ra_index, ra_size, la_size); + if (newsize <= max / 32) + newsize = newsize * 4; + else if (newsize <= max / 4) + newsize = newsize * 2; + else + newsize = max; - return actual; + return newsize; } -EXPORT_SYMBOL_GPL(ra_submit); /* * Get the previous window size, ramp it up, and * return it as the new window size. */ -static unsigned long get_next_ra_size2(struct file_ra_state *ra, +static unsigned long get_next_ra_size(struct file_ra_state *ra, unsigned long max) { - unsigned long cur = ra->readahead_index - ra->ra_index; + unsigned long cur = ra->size; unsigned long newsize; if (cur < max / 16) - newsize = cur * 4; + newsize = 4 * cur; else - newsize = cur * 2; + newsize = 2 * cur; return min(newsize, max); } @@ -636,32 +303,25 @@ static unsigned long get_next_ra_size2(struct file_ra_state *ra, * The fields in struct file_ra_state represent the most-recently-executed * readahead attempt: * - * |-------- last readahead window -------->| - * |-- application walking here -->| - * ======#============|==================#=====================| - * ^la_index ^ra_index ^lookahead_index ^readahead_index - * - * [ra_index, readahead_index) represents the last readahead window. - * - * [la_index, lookahead_index] is where the application would be walking(in - * the common case of cache-cold sequential reads): the last window was - * established when the application was at la_index, and the next window will - * be bring in when the application reaches lookahead_index. + * |<----- async_size ---------| + * |------------------- size -------------------->| + * |==================#===========================| + * ^start ^page marked with PG_readahead * * To overlap application thinking time and disk I/O time, we do * `readahead pipelining': Do not wait until the application consumed all * readahead pages and stalled on the missing page at readahead_index; - * Instead, submit an asynchronous readahead I/O as early as the application - * reads on the page at lookahead_index. Normally lookahead_index will be - * equal to ra_index, for maximum pipelining. + * Instead, submit an asynchronous readahead I/O as soon as there are + * only async_size pages left in the readahead window. Normally async_size + * will be equal to size, for maximum pipelining. * * In interleaved sequential reads, concurrent streams on the same fd can * be invalidating each other's readahead state. So we flag the new readahead - * page at lookahead_index with PG_readahead, and use it as readahead + * page at (start+size-async_size) with PG_readahead, and use it as readahead * indicator. The flag won't be set on already cached pages, to avoid the * readahead-for-nothing fuss, saving pointless page cache lookups. * - * prev_index tracks the last visited page in the _previous_ read request. + * prev_pos tracks the last visited byte in the _previous_ read request. * It should be maintained by the caller, and will be used for detecting * small random reads. Note that the readahead algorithm checks loosely * for sequential patterns. Hence interleaved reads might be served as @@ -682,106 +342,142 @@ static unsigned long get_next_ra_size2(struct file_ra_state *ra, static unsigned long ondemand_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, - struct page *page, pgoff_t offset, + bool hit_readahead_marker, pgoff_t offset, unsigned long req_size) { - unsigned long max; /* max readahead pages */ - pgoff_t ra_index; /* readahead index */ - unsigned long ra_size; /* readahead size */ - unsigned long la_size; /* lookahead size */ - int sequential; - - max = ra->ra_pages; - sequential = (offset - ra->prev_index <= 1UL) || (req_size > max); + int max = ra->ra_pages; /* max readahead pages */ + pgoff_t prev_offset; + int sequential; /* - * Lookahead/readahead hit, assume sequential access. + * It's the expected callback offset, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - if (offset && (offset == ra->lookahead_index || - offset == ra->readahead_index)) { - ra_index = ra->readahead_index; - ra_size = get_next_ra_size2(ra, max); - la_size = ra_size; - goto fill_ra; + if (offset && (offset == (ra->start + ra->size - ra->async_size) || + offset == (ra->start + ra->size))) { + ra->start += ra->size; + ra->size = get_next_ra_size(ra, max); + ra->async_size = ra->size; + goto readit; } + prev_offset = ra->prev_pos >> PAGE_CACHE_SHIFT; + sequential = offset - prev_offset <= 1UL || req_size > max; + /* * Standalone, small read. * Read as is, and do not pollute the readahead state. */ - if (!page && !sequential) { + if (!hit_readahead_marker && !sequential) { return __do_page_cache_readahead(mapping, filp, offset, req_size, 0); } /* + * Hit a marked page without valid readahead state. + * E.g. interleaved reads. + * Query the pagecache for async_size, which normally equals to + * readahead size. Ramp it up and use it as the new readahead size. + */ + if (hit_readahead_marker) { + pgoff_t start; + + rcu_read_lock(); + start = radix_tree_next_hole(&mapping->page_tree, offset,max+1); + rcu_read_unlock(); + + if (!start || start - offset > max) + return 0; + + ra->start = start; + ra->size = start - offset; /* old async_size */ + ra->size = get_next_ra_size(ra, max); + ra->async_size = ra->size; + goto readit; + } + + /* * It may be one of * - first read on start of file * - sequential cache miss * - oversize random read * Start readahead for it. */ - ra_index = offset; - ra_size = get_init_ra_size(req_size, max); - la_size = ra_size > req_size ? ra_size - req_size : ra_size; + ra->start = offset; + ra->size = get_init_ra_size(req_size, max); + ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size; - /* - * Hit on a lookahead page without valid readahead state. - * E.g. interleaved reads. - * Not knowing its readahead pos/size, bet on the minimal possible one. - */ - if (page) { - ra_index++; - ra_size = min(4 * ra_size, max); - } +readit: + return ra_submit(ra, mapping, filp); +} -fill_ra: - ra_set_index(ra, offset, ra_index); - ra_set_size(ra, ra_size, la_size); +/** + * page_cache_sync_readahead - generic file readahead + * @mapping: address_space which holds the pagecache and I/O vectors + * @ra: file_ra_state which holds the readahead state + * @filp: passed on to ->readpage() and ->readpages() + * @offset: start offset into @mapping, in pagecache page-sized units + * @req_size: hint: total size of the read which the caller is performing in + * pagecache pages + * + * page_cache_sync_readahead() should be called when a cache miss happened: + * it will submit the read. The readahead logic may decide to piggyback more + * pages onto the read request if access patterns suggest it will improve + * performance. + */ +void page_cache_sync_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *filp, + pgoff_t offset, unsigned long req_size) +{ + /* no read-ahead */ + if (!ra->ra_pages) + return; - return ra_submit(ra, mapping, filp); + /* do read-ahead */ + ondemand_readahead(mapping, ra, filp, false, offset, req_size); } +EXPORT_SYMBOL_GPL(page_cache_sync_readahead); /** - * page_cache_readahead_ondemand - generic file readahead + * page_cache_async_readahead - file readahead for marked pages * @mapping: address_space which holds the pagecache and I/O vectors * @ra: file_ra_state which holds the readahead state * @filp: passed on to ->readpage() and ->readpages() - * @page: the page at @offset, or NULL if non-present - * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units + * @page: the page at @offset which has the PG_readahead flag set + * @offset: start offset into @mapping, in pagecache page-sized units * @req_size: hint: total size of the read which the caller is performing in - * PAGE_CACHE_SIZE units + * pagecache pages * - * page_cache_readahead_ondemand() is the entry point of readahead logic. - * This function should be called when it is time to perform readahead: - * 1) @page == NULL - * A cache miss happened, time for synchronous readahead. - * 2) @page != NULL && PageReadahead(@page) - * A look-ahead hit occured, time for asynchronous readahead. + * page_cache_async_ondemand() should be called when a page is used which + * has the PG_readahead flag; this is a marker to suggest that the application + * has used up enough of the readahead window that we should start pulling in + * more pages. */ -unsigned long -page_cache_readahead_ondemand(struct address_space *mapping, - struct file_ra_state *ra, struct file *filp, - struct page *page, pgoff_t offset, - unsigned long req_size) +void +page_cache_async_readahead(struct address_space *mapping, + struct file_ra_state *ra, struct file *filp, + struct page *page, pgoff_t offset, + unsigned long req_size) { /* no read-ahead */ if (!ra->ra_pages) - return 0; + return; - if (page) { - ClearPageReadahead(page); + /* + * Same bit is used for PG_readahead and PG_reclaim. + */ + if (PageWriteback(page)) + return; - /* - * Defer asynchronous read-ahead on IO congestion. - */ - if (bdi_read_congested(mapping->backing_dev_info)) - return 0; - } + ClearPageReadahead(page); + + /* + * Defer asynchronous read-ahead on IO congestion. + */ + if (bdi_read_congested(mapping->backing_dev_info)) + return; /* do read-ahead */ - return ondemand_readahead(mapping, ra, filp, page, - offset, req_size); + ondemand_readahead(mapping, ra, filp, true, offset, req_size); } -EXPORT_SYMBOL_GPL(page_cache_readahead_ondemand); +EXPORT_SYMBOL_GPL(page_cache_async_readahead);