X-Git-Url: http://ftp.safe.ca/?p=safe%2Fjmp%2Flinux-2.6;a=blobdiff_plain;f=fs%2Fread_write.c;h=400fe81c973e90cccdb94c814325bc3ce4e39d19;hp=4ed839bcb91caa3301f13245cf725bc0c46873bf;hb=76a67ec6fb79ff3570dcb5342142c16098299911;hpb=543ade1fc901db4c3dbe9fb27241fb977f1f3eea diff --git a/fs/read_write.c b/fs/read_write.c index 4ed839b..400fe81 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "read_write.h" #include @@ -25,62 +26,76 @@ const struct file_operations generic_ro_fops = { .read = do_sync_read, .aio_read = generic_file_aio_read, .mmap = generic_file_readonly_mmap, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, }; EXPORT_SYMBOL(generic_ro_fops); -loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) +/** + * generic_file_llseek_unlocked - lockless generic llseek implementation + * @file: file structure to seek on + * @offset: file offset to seek to + * @origin: type of seek + * + * Updates the file offset to the value specified by @offset and @origin. + * Locking must be provided by the caller. + */ +loff_t +generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) { - long long retval; struct inode *inode = file->f_mapping->host; - mutex_lock(&inode->i_mutex); switch (origin) { - case 2: - offset += inode->i_size; - break; - case 1: - offset += file->f_pos; - } - retval = -EINVAL; - if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - retval = offset; + case SEEK_END: + offset += inode->i_size; + break; + case SEEK_CUR: + /* + * Here we special-case the lseek(fd, 0, SEEK_CUR) + * position-querying operation. Avoid rewriting the "same" + * f_pos value back to the file because a concurrent read(), + * write() or lseek() might have altered it + */ + if (offset == 0) + return file->f_pos; + offset += file->f_pos; + break; + } + + if (offset < 0 || offset > inode->i_sb->s_maxbytes) + return -EINVAL; + + /* Special lock needed here? */ + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; } - mutex_unlock(&inode->i_mutex); - return retval; -} -EXPORT_SYMBOL(generic_file_llseek); + return offset; +} +EXPORT_SYMBOL(generic_file_llseek_unlocked); -loff_t remote_llseek(struct file *file, loff_t offset, int origin) +/** + * generic_file_llseek - generic llseek implementation for regular files + * @file: file structure to seek on + * @offset: file offset to seek to + * @origin: type of seek + * + * This is a generic implemenation of ->llseek useable for all normal local + * filesystems. It just updates the file offset to the value specified by + * @offset and @origin under i_mutex. + */ +loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) { - long long retval; + loff_t rval; - lock_kernel(); - switch (origin) { - case 2: - offset += i_size_read(file->f_dentry->d_inode); - break; - case 1: - offset += file->f_pos; - } - retval = -EINVAL; - if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) { - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - retval = offset; - } - unlock_kernel(); - return retval; + mutex_lock(&file->f_dentry->d_inode->i_mutex); + rval = generic_file_llseek_unlocked(file, offset, origin); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); + + return rval; } -EXPORT_SYMBOL(remote_llseek); +EXPORT_SYMBOL(generic_file_llseek); loff_t no_llseek(struct file *file, loff_t offset, int origin) { @@ -90,14 +105,18 @@ EXPORT_SYMBOL(no_llseek); loff_t default_llseek(struct file *file, loff_t offset, int origin) { - long long retval; + loff_t retval; lock_kernel(); switch (origin) { - case 2: - offset += i_size_read(file->f_dentry->d_inode); + case SEEK_END: + offset += i_size_read(file->f_path.dentry->d_inode); break; - case 1: + case SEEK_CUR: + if (offset == 0) { + retval = file->f_pos; + goto out; + } offset += file->f_pos; } retval = -EINVAL; @@ -108,6 +127,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) } retval = offset; } +out: unlock_kernel(); return retval; } @@ -127,7 +147,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin) } EXPORT_SYMBOL(vfs_llseek); -asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) +SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) { off_t retval; struct file * file; @@ -139,7 +159,7 @@ asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) goto bad; retval = -EINVAL; - if (origin <= 2) { + if (origin <= SEEK_MAX) { loff_t res = vfs_llseek(file, offset, origin); retval = res; if (res != (loff_t)retval) @@ -151,9 +171,9 @@ bad: } #ifdef __ARCH_WANT_SYS_LLSEEK -asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, - unsigned long offset_low, loff_t __user * result, - unsigned int origin) +SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, + unsigned long, offset_low, loff_t __user *, result, + unsigned int, origin) { int retval; struct file * file; @@ -166,7 +186,7 @@ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, goto bad; retval = -EINVAL; - if (origin > 2) + if (origin > SEEK_MAX) goto out_putf; offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, @@ -196,25 +216,27 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count { struct inode *inode; loff_t pos; + int retval = -EINVAL; + inode = file->f_path.dentry->d_inode; if (unlikely((ssize_t) count < 0)) - goto Einval; + return retval; pos = *ppos; if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) - goto Einval; + return retval; - inode = file->f_dentry->d_inode; - if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) { - int retval = locks_mandatory_area( + if (unlikely(inode->i_flock && mandatory_lock(inode))) { + retval = locks_mandatory_area( read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); if (retval < 0) return retval; } + retval = security_file_permission(file, + read_write == READ ? MAY_READ : MAY_WRITE); + if (retval) + return retval; return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; - -Einval: - return -EINVAL; } static void wait_on_retry_sync_kiocb(struct kiocb *iocb) @@ -266,18 +288,15 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) ret = rw_verify_area(READ, file, pos, count); if (ret >= 0) { count = ret; - ret = security_file_permission (file, MAY_READ); - if (!ret) { - if (file->f_op->read) - ret = file->f_op->read(file, buf, count, pos); - else - ret = do_sync_read(file, buf, count, pos); - if (ret > 0) { - fsnotify_access(file->f_dentry); - current->rchar += ret; - } - current->syscr++; + if (file->f_op->read) + ret = file->f_op->read(file, buf, count, pos); + else + ret = do_sync_read(file, buf, count, pos); + if (ret > 0) { + fsnotify_access(file->f_path.dentry); + add_rchar(current, ret); } + inc_syscr(current); } return ret; @@ -324,18 +343,15 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ ret = rw_verify_area(WRITE, file, pos, count); if (ret >= 0) { count = ret; - ret = security_file_permission (file, MAY_WRITE); - if (!ret) { - if (file->f_op->write) - ret = file->f_op->write(file, buf, count, pos); - else - ret = do_sync_write(file, buf, count, pos); - if (ret > 0) { - fsnotify_modify(file->f_dentry); - current->wchar += ret; - } - current->syscw++; + if (file->f_op->write) + ret = file->f_op->write(file, buf, count, pos); + else + ret = do_sync_write(file, buf, count, pos); + if (ret > 0) { + fsnotify_modify(file->f_path.dentry); + add_wchar(current, ret); } + inc_syscw(current); } return ret; @@ -353,7 +369,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) file->f_pos = pos; } -asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) +SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { struct file *file; ssize_t ret = -EBADF; @@ -369,9 +385,9 @@ asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) return ret; } -EXPORT_SYMBOL_GPL(sys_read); -asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) +SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, + size_t, count) { struct file *file; ssize_t ret = -EBADF; @@ -388,8 +404,8 @@ asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t co return ret; } -asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, - size_t count, loff_t pos) +SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, + size_t count, loff_t pos) { struct file *file; ssize_t ret = -EBADF; @@ -408,9 +424,17 @@ asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) +{ + return SYSC_pread64((unsigned int) fd, (char __user *) buf, + (size_t) count, pos); +} +SYSCALL_ALIAS(sys_pread64, SyS_pread64); +#endif -asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, - size_t count, loff_t pos) +SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, + size_t count, loff_t pos) { struct file *file; ssize_t ret = -EBADF; @@ -429,6 +453,14 @@ asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) +{ + return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, + (size_t) count, pos); +} +SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); +#endif /* * Reduce an iovec's length in-place. Return the resulting number of segments @@ -449,8 +481,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) } return seg; } - -EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ +EXPORT_SYMBOL(iov_shorten); ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) @@ -511,6 +542,74 @@ ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) +ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer) + { + unsigned long seg; + ssize_t ret; + struct iovec *iov = fast_pointer; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + if (nr_segs == 0) { + ret = 0; + goto out; + } + + /* + * First get the "struct iovec" from user memory and + * verify all the pointers + */ + if (nr_segs > UIO_MAXIOV) { + ret = -EINVAL; + goto out; + } + if (nr_segs > fast_segs) { + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); + if (iov == NULL) { + ret = -ENOMEM; + goto out; + } + } + if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { + ret = -EFAULT; + goto out; + } + + /* + * According to the Single Unix Specification we should return EINVAL + * if an element length is < 0 when cast to ssize_t or if the + * total length would overflow the ssize_t return value of the + * system call. + */ + ret = 0; + for (seg = 0; seg < nr_segs; seg++) { + void __user *buf = iov[seg].iov_base; + ssize_t len = (ssize_t)iov[seg].iov_len; + + /* see if we we're about to use an invalid len or if + * it's about to overflow ssize_t */ + if (len < 0 || (ret + len < ret)) { + ret = -EINVAL; + goto out; + } + if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { + ret = -EFAULT; + goto out; + } + + ret += len; + } +out: + *ret_pointer = iov; + return ret; +} + static ssize_t do_readv_writev(int type, struct file *file, const struct iovec __user * uvector, unsigned long nr_segs, loff_t *pos) @@ -519,70 +618,23 @@ static ssize_t do_readv_writev(int type, struct file *file, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; ssize_t ret; - int seg; io_fn_t fn; iov_fn_t fnv; - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - ret = 0; - if (nr_segs == 0) + if (!file->f_op) { + ret = -EINVAL; goto out; - - /* - * First get the "struct iovec" from user memory and - * verify all the pointers - */ - ret = -EINVAL; - if (nr_segs > UIO_MAXIOV) - goto out; - if (!file->f_op) - goto out; - if (nr_segs > UIO_FASTIOV) { - ret = -ENOMEM; - iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - goto out; } - ret = -EFAULT; - if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) - goto out; - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. The total length is fitting an ssize_t - * - * Be careful here because iov_len is a size_t not an ssize_t - */ - tot_len = 0; - ret = -EINVAL; - for (seg = 0; seg < nr_segs; seg++) { - void __user *buf = iov[seg].iov_base; - ssize_t len = (ssize_t)iov[seg].iov_len; - - if (len < 0) /* size_t not fitting an ssize_t .. */ - goto out; - if (unlikely(!access_ok(vrfy_dir(type), buf, len))) - goto Efault; - tot_len += len; - if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ - goto out; - } - if (tot_len == 0) { - ret = 0; + ret = rw_copy_check_uvector(type, uvector, nr_segs, + ARRAY_SIZE(iovstack), iovstack, &iov); + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; - ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); - if (ret) - goto out; fnv = NULL; if (type == READ) { @@ -604,14 +656,11 @@ out: kfree(iov); if ((ret + (type == READ)) > 0) { if (type == READ) - fsnotify_access(file->f_dentry); + fsnotify_access(file->f_path.dentry); else - fsnotify_modify(file->f_dentry); + fsnotify_modify(file->f_path.dentry); } return ret; -Efault: - ret = -EFAULT; - goto out; } ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, @@ -640,8 +689,8 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_writev); -asmlinkage ssize_t -sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) +SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen) { struct file *file; ssize_t ret = -EBADF; @@ -656,13 +705,13 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) } if (ret > 0) - current->rchar += ret; - current->syscr++; + add_rchar(current, ret); + inc_syscr(current); return ret; } -asmlinkage ssize_t -sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) +SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen) { struct file *file; ssize_t ret = -EBADF; @@ -677,8 +726,8 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) } if (ret > 0) - current->wchar += ret; - current->syscw++; + add_wchar(current, ret); + inc_syscw(current); return ret; } @@ -689,7 +738,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, struct inode * in_inode, * out_inode; loff_t pos; ssize_t retval; - int fput_needed_in, fput_needed_out; + int fput_needed_in, fput_needed_out, fl; /* * Get input file, and verify that it is ok.. @@ -701,10 +750,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, if (!(in_file->f_mode & FMODE_READ)) goto fput_in; retval = -EINVAL; - in_inode = in_file->f_dentry->d_inode; + in_inode = in_file->f_path.dentry->d_inode; if (!in_inode) goto fput_in; - if (!in_file->f_op || !in_file->f_op->sendfile) + if (!in_file->f_op || !in_file->f_op->splice_read) goto fput_in; retval = -ESPIPE; if (!ppos) @@ -717,10 +766,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, goto fput_in; count = retval; - retval = security_file_permission (in_file, MAY_READ); - if (retval) - goto fput_in; - /* * Get output file, and verify that it is ok.. */ @@ -733,16 +778,12 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, retval = -EINVAL; if (!out_file->f_op || !out_file->f_op->sendpage) goto fput_out; - out_inode = out_file->f_dentry->d_inode; + out_inode = out_file->f_path.dentry->d_inode; retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); if (retval < 0) goto fput_out; count = retval; - retval = security_file_permission (out_file, MAY_WRITE); - if (retval) - goto fput_out; - if (!max) max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); @@ -757,15 +798,26 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, count = max - pos; } - retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); + fl = 0; +#if 0 + /* + * We need to debate whether we can enable this or not. The + * man page documents EAGAIN return for the output at least, + * and the application is arguably buggy if it doesn't expect + * EAGAIN on a non-blocking file descriptor. + */ + if (in_file->f_flags & O_NONBLOCK) + fl = SPLICE_F_NONBLOCK; +#endif + retval = do_splice_direct(in_file, ppos, out_file, count, fl); if (retval > 0) { - current->rchar += retval; - current->wchar += retval; + add_rchar(current, retval); + add_wchar(current, retval); } - current->syscr++; - current->syscw++; + inc_syscr(current); + inc_syscw(current); if (*ppos > max) retval = -EOVERFLOW; @@ -777,7 +829,7 @@ out: return retval; } -asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) +SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) { loff_t pos; off_t off; @@ -796,7 +848,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, siz return do_sendfile(out_fd, in_fd, NULL, count, 0); } -asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) +SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) { loff_t pos; ssize_t ret;