*/
/*
- * This file implements VFS file and inode operations of regular files, device
+ * This file implements VFS file and inode operations for regular files, device
* nodes and symlinks as well as address space operations.
*
- * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
- * page is dirty and is used for budgeting purposes - dirty pages should not be
- * budgeted. The PG_checked flag is set if full budgeting is required for the
- * page e.g., when it corresponds to a file hole or it is just beyond the file
- * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
- * fail in this function, and the budget is released in 'ubifs_write_end()'. So
- * the PG_private and PG_checked flags carry the information about how the page
- * was budgeted, to make it possible to release the budget properly.
+ * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
+ * the page is dirty and is used for optimization purposes - dirty pages are
+ * not budgeted so the flag shows that 'ubifs_write_end()' should not release
+ * the budget for this page. The @PG_checked flag is set if full budgeting is
+ * required for the page e.g., when it corresponds to a file hole or it is
+ * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
+ * it is OK to fail in this function, and the budget is released in
+ * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
+ * information about how the page was budgeted, to make it possible to release
+ * the budget properly.
*
- * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
- * we implement. However, this is not true for '->writepage()', which might be
- * called with 'i_mutex' unlocked. For example, when pdflush is performing
- * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
- * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
- * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
- * path'. So, in '->writepage()' we are only guaranteed that the page is
- * locked.
+ * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
+ * implement. However, this is not true for 'ubifs_writepage()', which may be
+ * called with @i_mutex unlocked. For example, when pdflush is doing background
+ * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
+ * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
+ * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
+ * we are only guaranteed that the page is locked.
*
- * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
- * readahead path does not have it locked ("sys_read -> generic_file_aio_read
- * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
- * not set as well. However, UBIFS disables readahead.
- *
- * This, for example means that there might be 2 concurrent '->writepage()'
- * calls for the same inode, but different inode dirty pages.
+ * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
+ * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
+ * set as well. However, UBIFS disables readahead.
*/
#include "ubifs.h"
return err;
}
- ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum);
-
+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
+ ubifs_inode(inode)->creat_sqnum);
len = le32_to_cpu(dn->size);
if (len <= 0 || len > UBIFS_BLOCK_SIZE)
goto dump;
}
static int write_begin_slow(struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep)
+ loff_t pos, unsigned len, struct page **pagep,
+ unsigned flags)
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
if (unlikely(err))
return err;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page)) {
ubifs_release_budget(c, &req);
return -ENOMEM;
}
if (!PageUptodate(page)) {
- if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
SetPageChecked(page);
else {
err = do_readpage(page);
struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+ int skipped_read = 0;
struct page *page;
-
ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
if (unlikely(c->ro_media))
return -EROFS;
/* Try out the fast-path part first */
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page))
return -ENOMEM;
if (!PageUptodate(page)) {
/* The page is not loaded from the flash */
- if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
/*
* We change whole page so no need to load it. But we
* have to set the @PG_checked flag to make the further
- * code the page is new. This might be not true, but it
- * is better to budget more that to read the page from
- * the media.
+ * code know that the page is new. This might be not
+ * true, but it is better to budget more than to read
+ * the page from the media.
*/
SetPageChecked(page);
- else {
+ skipped_read = 1;
+ } else {
err = do_readpage(page);
if (err) {
unlock_page(page);
if (unlikely(err)) {
ubifs_assert(err == -ENOSPC);
/*
+ * If we skipped reading the page because we were going to
+ * write all of it, then it is not up to date.
+ */
+ if (skipped_read) {
+ ClearPageChecked(page);
+ ClearPageUptodate(page);
+ }
+ /*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
* request. Write-back cannot be done now, while we have the
unlock_page(page);
page_cache_release(page);
- return write_begin_slow(mapping, pos, len, pagep);
+ return write_begin_slow(mapping, pos, len, pagep, flags);
}
/*
- * Whee, we aquired budgeting quickly - without involving
- * garbage-collection, committing or forceing write-back. We return
+ * Whee, we acquired budgeting quickly - without involving
+ * garbage-collection, committing or forcing write-back. We return
* with @ui->ui_mutex locked if we are appending pages, and unlocked
* otherwise. This is an optimization (slightly hacky though).
*/
/*
* Return 0 to force VFS to repeat the whole operation, or the
- * error code if 'do_readpage()' failes.
+ * error code if 'do_readpage()' fails.
*/
copied = do_readpage(page);
goto out;
dn = bu->buf + (bu->zbranch[nn].offs - offs);
- ubifs_assert(dn->ch.sqnum >
+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
ubifs_inode(inode)->creat_sqnum);
len = le32_to_cpu(dn->size);
/**
* ubifs_do_bulk_read - do bulk-read.
* @c: UBIFS file-system description object
- * @page1: first page
+ * @bu: bulk-read information
+ * @page1: first page to read
*
* This function returns %1 if the bulk-read is done, otherwise %0 is returned.
*/
-static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
+static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
+ struct page *page1)
{
pgoff_t offset = page1->index, end_index;
struct address_space *mapping = page1->mapping;
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
- struct bu_info *bu;
int err, page_idx, page_cnt, ret = 0, n = 0;
+ int allocate = bu->buf ? 0 : 1;
loff_t isize;
- bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
- if (!bu)
- return 0;
-
- bu->buf_len = c->bulk_read_buf_size;
- bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
- if (!bu->buf)
- goto out_free;
-
- data_key_init(c, &bu->key, inode->i_ino,
- offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
-
err = ubifs_tnc_get_bu_keys(c, bu);
if (err)
goto out_warn;
* together. If all the pages were like this, bulk-read would
* reduce performance, so we turn it off for a while.
*/
- ui->read_in_a_row = 0;
- ui->bulk_read = 0;
- goto out_free;
+ goto out_bu_off;
}
if (bu->cnt) {
+ if (allocate) {
+ /*
+ * Allocate bulk-read buffer depending on how many data
+ * nodes we are going to read.
+ */
+ bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
+ bu->zbranch[bu->cnt - 1].len -
+ bu->zbranch[0].offs;
+ ubifs_assert(bu->buf_len > 0);
+ ubifs_assert(bu->buf_len <= c->leb_size);
+ bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
+ if (!bu->buf)
+ goto out_bu_off;
+ }
+
err = ubifs_tnc_bulk_read(c, bu);
if (err)
goto out_warn;
ui->last_page_read = offset + page_idx - 1;
out_free:
- kfree(bu->buf);
- kfree(bu);
+ if (allocate)
+ kfree(bu->buf);
return ret;
out_warn:
ubifs_warn("ignoring error %d and skipping bulk-read", err);
goto out_free;
+
+out_bu_off:
+ ui->read_in_a_row = ui->bulk_read = 0;
+ goto out_free;
}
/**
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = page->index, last_page_read = ui->last_page_read;
- int ret = 0;
+ struct bu_info *bu;
+ int err = 0, allocated = 0;
ui->last_page_read = index;
-
if (!c->bulk_read)
return 0;
+
/*
- * Bulk-read is protected by ui_mutex, but it is an optimization, so
- * don't bother if we cannot lock the mutex.
+ * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
+ * so don't bother if we cannot lock the mutex.
*/
if (!mutex_trylock(&ui->ui_mutex))
return 0;
+
if (index != last_page_read + 1) {
/* Turn off bulk-read if we stop reading sequentially */
ui->read_in_a_row = 1;
ui->bulk_read = 0;
goto out_unlock;
}
+
if (!ui->bulk_read) {
ui->read_in_a_row += 1;
if (ui->read_in_a_row < 3)
/* Three reads in a row, so switch on bulk-read */
ui->bulk_read = 1;
}
- ret = ubifs_do_bulk_read(c, page);
+
+ /*
+ * If possible, try to use pre-allocated bulk-read information, which
+ * is protected by @c->bu_mutex.
+ */
+ if (mutex_trylock(&c->bu_mutex))
+ bu = &c->bu;
+ else {
+ bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
+ if (!bu)
+ goto out_unlock;
+
+ bu->buf = NULL;
+ allocated = 1;
+ }
+
+ bu->buf_len = c->max_bu_buf_len;
+ data_key_init(c, &bu->key, inode->i_ino,
+ page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+ err = ubifs_do_bulk_read(c, bu, page);
+
+ if (!allocated)
+ mutex_unlock(&c->bu_mutex);
+ else
+ kfree(bu);
+
out_unlock:
mutex_unlock(&ui->ui_mutex);
- return ret;
+ return err;
}
static int ubifs_readpage(struct file *file, struct page *page)
* whole index and correct all inode sizes, which is long an unacceptable.
*
* To prevent situations like this, UBIFS writes pages back only if they are
- * within last synchronized inode size, i.e. the the size which has been
+ * within the last synchronized inode size, i.e. the size which has been
* written to the flash media last time. Otherwise, UBIFS forces inode
* write-back, thus making sure the on-flash inode contains current inode size,
* and then keeps writing pages back.
ui->ui_size = inode->i_size;
/* Truncation changes inode [mc]time */
inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
- /* The other attributes may be changed at the same time as well */
+ /* Other attributes may be changed at the same time as well */
do_attr_changes(inode, attr);
-
err = ubifs_jnl_truncate(c, inode, old_size, new_size);
mutex_unlock(&ui->ui_mutex);
+
out_budg:
if (budgeted)
ubifs_release_budget(c, &req);
unsigned long nr_segs, loff_t pos)
{
int err;
- ssize_t ret;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
if (err)
return err;
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
- if (ret < 0)
- return ret;
-
- if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
- err = ubifs_sync_wbufs_by_inode(c, inode);
- if (err)
- return err;
- }
-
- return ret;
+ return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
static int ubifs_set_page_dirty(struct page *page)
* mmap()d file has taken write protection fault and is being made
* writable. UBIFS must ensure page is budgeted for.
*/
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
+ struct page *page = vmf->page;
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec now = ubifs_current_time(inode);
ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
if (unlikely(c->ro_media))
- return -EROFS;
+ return VM_FAULT_SIGBUS; /* -EROFS */
/*
* We have not locked @page so far so we may budget for changing the
if (err == -ENOSPC)
ubifs_warn("out of space for mmapped file "
"(inode number %lu)", inode->i_ino);
- return err;
+ return VM_FAULT_SIGBUS;
}
lock_page(page);
out_unlock:
unlock_page(page);
ubifs_release_budget(c, &req);
+ if (err)
+ err = VM_FAULT_SIGBUS;
return err;
}
-static struct vm_operations_struct ubifs_file_vm_ops = {
+static const struct vm_operations_struct ubifs_file_vm_ops = {
.fault = filemap_fault,
.page_mkwrite = ubifs_vm_page_mkwrite,
};
return 0;
}
-struct address_space_operations ubifs_file_address_operations = {
+const struct address_space_operations ubifs_file_address_operations = {
.readpage = ubifs_readpage,
.writepage = ubifs_writepage,
.write_begin = ubifs_write_begin,
.releasepage = ubifs_releasepage,
};
-struct inode_operations ubifs_file_inode_operations = {
+const struct inode_operations ubifs_file_inode_operations = {
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
#ifdef CONFIG_UBIFS_FS_XATTR
#endif
};
-struct inode_operations ubifs_symlink_inode_operations = {
+const struct inode_operations ubifs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ubifs_follow_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
};
-struct file_operations ubifs_file_operations = {
+const struct file_operations ubifs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,