From: Jens Axboe Date: Thu, 30 Mar 2006 13:16:46 +0000 (+0200) Subject: [PATCH] splice: add support for SPLICE_F_MOVE flag X-Git-Tag: v2.6.17-rc1~149 X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=commitdiff_plain;h=5abc97aa25b2c41413b3a520faee83f2282d9f18 [PATCH] splice: add support for SPLICE_F_MOVE flag This enables the caller to migrate pages from one address space page cache to another. In buzz word marketing, you can do zero-copy file copies! Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- diff --git a/fs/pipe.c b/fs/pipe.c index 2414bf2..109a102 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -121,11 +121,19 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer kunmap(buf->page); } +static int anon_pipe_buf_steal(struct pipe_inode_info *info, + struct pipe_buffer *buf) +{ + buf->stolen = 1; + return 0; +} + static struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, .map = anon_pipe_buf_map, .unmap = anon_pipe_buf_unmap, .release = anon_pipe_buf_release, + .steal = anon_pipe_buf_steal, }; static ssize_t diff --git a/fs/splice.c b/fs/splice.c index efa47c1..4a026f9 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Passed to the actors @@ -32,11 +33,37 @@ struct splice_desc { loff_t pos; /* file position */ }; +static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, + struct pipe_buffer *buf) +{ + struct page *page = buf->page; + + WARN_ON(!PageLocked(page)); + WARN_ON(!PageUptodate(page)); + + if (!remove_mapping(page_mapping(page), page)) + return 1; + + if (PageLRU(page)) { + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); + BUG_ON(!PageLRU(page)); + __ClearPageLRU(page); + del_page_from_lru(zone, page); + spin_unlock_irq(&zone->lru_lock); + } + + buf->stolen = 1; + return 0; +} + static void page_cache_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_release(buf->page); buf->page = NULL; + buf->stolen = 0; } static void *page_cache_pipe_buf_map(struct file *file, @@ -63,7 +90,8 @@ static void *page_cache_pipe_buf_map(struct file *file, static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) { - unlock_page(buf->page); + if (!buf->stolen) + unlock_page(buf->page); kunmap(buf->page); } @@ -72,6 +100,7 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { .map = page_cache_pipe_buf_map, .unmap = page_cache_pipe_buf_unmap, .release = page_cache_pipe_buf_release, + .steal = page_cache_pipe_buf_steal, }; static ssize_t move_to_pipe(struct inode *inode, struct page **pages, @@ -336,8 +365,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, struct address_space *mapping = file->f_mapping; unsigned int offset; struct page *page; - char *src, *dst; pgoff_t index; + char *src; int ret; /* @@ -350,40 +379,54 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; -find_page: - ret = -ENOMEM; - page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); - if (!page) - goto out; - /* - * If the page is uptodate, it is also locked. If it isn't - * uptodate, we can mark it uptodate if we are filling the - * full page. Otherwise we need to read it in first... + * reuse buf page, if SPLICE_F_MOVE is set */ - if (!PageUptodate(page)) { - if (sd->len < PAGE_CACHE_SIZE) { - ret = mapping->a_ops->readpage(file, page); - if (unlikely(ret)) - goto out; - - lock_page(page); - - if (!PageUptodate(page)) { - /* - * page got invalidated, repeat - */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto find_page; + if (sd->flags & SPLICE_F_MOVE) { + if (buf->ops->steal(info, buf)) + goto find_page; + + page = buf->page; + if (add_to_page_cache_lru(page, mapping, index, + mapping_gfp_mask(mapping))) + goto find_page; + } else { +find_page: + ret = -ENOMEM; + page = find_or_create_page(mapping, index, + mapping_gfp_mask(mapping)); + if (!page) + goto out; + + /* + * If the page is uptodate, it is also locked. If it isn't + * uptodate, we can mark it uptodate if we are filling the + * full page. Otherwise we need to read it in first... + */ + if (!PageUptodate(page)) { + if (sd->len < PAGE_CACHE_SIZE) { + ret = mapping->a_ops->readpage(file, page); + if (unlikely(ret)) + goto out; + + lock_page(page); + + if (!PageUptodate(page)) { + /* + * page got invalidated, repeat + */ + if (!page->mapping) { + unlock_page(page); + page_cache_release(page); + goto find_page; + } + ret = -EIO; + goto out; } - ret = -EIO; - goto out; + } else { + WARN_ON(!PageLocked(page)); + SetPageUptodate(page); } - } else { - WARN_ON(!PageLocked(page)); - SetPageUptodate(page); } } @@ -391,10 +434,13 @@ find_page: if (ret) goto out; - dst = kmap_atomic(page, KM_USER0); - memcpy(dst + offset, src + buf->offset, sd->len); - flush_dcache_page(page); - kunmap_atomic(dst, KM_USER0); + if (!buf->stolen) { + char *dst = kmap_atomic(page, KM_USER0); + + memcpy(dst + offset, src + buf->offset, sd->len); + flush_dcache_page(page); + kunmap_atomic(dst, KM_USER0); + } ret = mapping->a_ops->commit_write(file, page, 0, sd->len); if (ret < 0) @@ -405,7 +451,8 @@ find_page: out: if (ret < 0) unlock_page(page); - page_cache_release(page); + if (!buf->stolen) + page_cache_release(page); buf->ops->unmap(info, buf); return ret; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index b12e59c..75c7f55 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -9,6 +9,7 @@ struct pipe_buffer { struct page *page; unsigned int offset, len; struct pipe_buf_operations *ops; + unsigned int stolen; }; struct pipe_buf_operations { @@ -16,6 +17,7 @@ struct pipe_buf_operations { void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); + int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); }; struct pipe_inode_info { @@ -53,4 +55,10 @@ void pipe_wait(struct inode * inode); struct inode* pipe_new(struct inode* inode); void free_pipe_info(struct inode* inode); +/* + * splice is tied to pipes as a transport (at least for now), so we'll just + * add the splice flags here. + */ +#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ + #endif