/* Fixed constants first: */
#undef NR_OPEN
-#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */
+extern int sysctl_nr_open;
#define INR_OPEN 1024 /* Initial setting for nfile rlimits */
#define BLOCK_SIZE_BITS 10
#define READ_SYNC (READ | (1 << BIO_RW_SYNC))
#define READ_META (READ | (1 << BIO_RW_META))
#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
+#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC))
#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
#define SEL_IN 1
#define MS_SLAVE (1<<19) /* change to slave */
#define MS_SHARED (1<<20) /* change to shared */
#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
#define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
+#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
#include <linux/init.h>
#include <linux/pid.h>
#include <linux/mutex.h>
-#include <linux/sysctl.h>
#include <linux/capability.h>
+#include <linux/semaphore.h>
#include <asm/atomic.h>
-#include <asm/semaphore.h>
#include <asm/byteorder.h>
struct export_operations;
struct vm_area_struct;
struct vfsmount;
-extern void __init inode_init(unsigned long);
+extern void __init inode_init(void);
extern void __init inode_init_early(void);
-extern void __init mnt_init(unsigned long);
extern void __init files_init(unsigned long);
struct buffer_head;
#define ATTR_KILL_SUID 2048
#define ATTR_KILL_SGID 4096
#define ATTR_FILE 8192
+#define ATTR_KILL_PRIV 16384
+#define ATTR_OPEN 32768 /* Truncating from open(O_TRUNC) */
/*
* This is the Inode Attributes structure, used for notify_change(). It
* trying again. The aop will be taking reasonable
* precautions not to livelock. If the caller held a page
* reference, it should drop it before retrying. Returned
- * by readpage(), prepare_write(), and commit_write().
+ * by readpage().
*
* address_space_operation functions return these large constants to indicate
* special semantics to the caller. These are much larger than the bytes in a
};
#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
+#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
/*
* oh the beauties of C type declarations.
int (*releasepage) (struct page *, gfp_t);
ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
loff_t offset, unsigned long nr_segs);
- struct page* (*get_xip_page)(struct address_space *, sector_t,
- int);
+ int (*get_xip_mem)(struct address_space *, pgoff_t, int,
+ void **, unsigned long *);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct address_space *,
struct page *, struct page *);
uid_t i_uid;
gid_t i_gid;
dev_t i_rdev;
- unsigned long i_version;
+ u64 i_version;
loff_t i_size;
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
index < ra->start + ra->size);
}
+#define FILE_MNT_WRITE_TAKEN 1
+#define FILE_MNT_WRITE_RELEASED 2
+
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
unsigned int f_uid, f_gid;
struct file_ra_state f_ra;
- unsigned long f_version;
+ u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
+#ifdef CONFIG_DEBUG_WRITECOUNT
+ unsigned long f_mnt_write_state;
+#endif
};
extern spinlock_t files_lock;
#define file_list_lock() spin_lock(&files_lock);
#define get_file(x) atomic_inc(&(x)->f_count)
#define file_count(x) atomic_read(&(x)->f_count)
+#ifdef CONFIG_DEBUG_WRITECOUNT
+static inline void file_take_write(struct file *f)
+{
+ WARN_ON(f->f_mnt_write_state != 0);
+ f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
+}
+static inline void file_release_write(struct file *f)
+{
+ f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
+}
+static inline void file_reset_write(struct file *f)
+{
+ f->f_mnt_write_state = 0;
+}
+static inline void file_check_state(struct file *f)
+{
+ /*
+ * At this point, either both or neither of these bits
+ * should be set.
+ */
+ WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
+ WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
+}
+static inline int file_check_writeable(struct file *f)
+{
+ if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
+ return 0;
+ printk(KERN_WARNING "writeable file with no "
+ "mnt_want_write()\n");
+ WARN_ON(1);
+ return -EINVAL;
+}
+#else /* !CONFIG_DEBUG_WRITECOUNT */
+static inline void file_take_write(struct file *filp) {}
+static inline void file_release_write(struct file *filp) {}
+static inline void file_reset_write(struct file *filp) {}
+static inline void file_check_state(struct file *filp) {}
+static inline int file_check_writeable(struct file *filp)
+{
+ return 0;
+}
+#endif /* CONFIG_DEBUG_WRITECOUNT */
+
#define MAX_NON_LFS ((1UL<<31) - 1)
/* Page cache limit. The filesystems should put that into their s_maxbytes
typedef struct files_struct *fl_owner_t;
struct file_lock_operations {
- void (*fl_insert)(struct file_lock *); /* lock insertion callback */
- void (*fl_remove)(struct file_lock *); /* lock removal callback */
void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
void (*fl_release_private)(struct file_lock *);
};
struct list_head fl_block; /* circular list of blocked processes */
fl_owner_t fl_owner;
unsigned int fl_pid;
+ struct pid *fl_nspid;
wait_queue_head_t fl_wait;
struct file *fl_file;
unsigned char fl_flags;
/* fs/locks.c */
extern void locks_init_lock(struct file_lock *);
extern void locks_copy_lock(struct file_lock *, struct file_lock *);
+extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
extern void locks_remove_posix(struct file *, fl_owner_t);
extern void locks_remove_flock(struct file *);
extern void posix_test_lock(struct file *, struct file_lock *);
extern struct list_head super_blocks;
extern spinlock_t sb_lock;
-#define sb_entry(list) list_entry((list), struct super_block, s_list)
#define S_BIAS (1<<30)
struct super_block {
struct list_head s_list; /* Keep this first */
const struct super_operations *s_op;
struct dquot_operations *dq_op;
struct quotactl_ops *s_qcop;
- struct export_operations *s_export_op;
+ const struct export_operations *s_export_op;
unsigned long s_flags;
unsigned long s_magic;
struct dentry *s_root;
struct list_head s_inodes; /* all inodes */
struct list_head s_dirty; /* dirty inodes */
struct list_head s_io; /* parked for writeback */
+ struct list_head s_more_io; /* parked for more writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_files;
* in /proc/mounts will be "type.subtype"
*/
char *s_subtype;
+
+ /*
+ * Saved mount options for lazy filesystems using
+ * generic_show_options()
+ */
+ char *s_options;
};
extern struct timespec current_fs_time(struct super_block *sb);
int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
long (*unlocked_ioctl) (struct file *, unsigned, unsigned long);
long (*compat_ioctl) (struct file *, unsigned, unsigned long);
- int (*direct_access) (struct block_device *, sector_t, unsigned long *);
+ int (*direct_access) (struct block_device *, sector_t,
+ void **, unsigned long *);
int (*media_changed) (struct gendisk *);
int (*revalidate_disk) (struct gendisk *);
int (*getgeo)(struct block_device *, struct hd_geometry *);
extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
unsigned long, loff_t *);
-/*
- * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
- * without the big kernel lock held in all filesystems.
- */
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
- void (*read_inode) (struct inode *);
-
void (*dirty_inode) (struct inode *);
int (*write_inode) (struct inode *, int);
- void (*put_inode) (struct inode *);
void (*drop_inode) (struct inode *);
void (*delete_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
void (*clear_inode) (struct inode *);
- void (*umount_begin) (struct vfsmount *, int);
+ void (*umount_begin) (struct super_block *);
int (*show_options)(struct seq_file *, struct vfsmount *);
int (*show_stats)(struct seq_file *, struct vfsmount *);
#endif
};
-/* Inode state bits. Protected by inode_lock. */
-#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */
-#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */
-#define I_DIRTY_PAGES 4 /* Data-related inode changes pending */
-#define __I_LOCK 3
+/*
+ * Inode state bits. Protected by inode_lock.
+ *
+ * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
+ * I_DIRTY_DATASYNC and I_DIRTY_PAGES.
+ *
+ * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
+ * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
+ * various stages of removing an inode.
+ *
+ * Two bits are used for locking and completion notification, I_LOCK and I_SYNC.
+ *
+ * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
+ * fdatasync(). i_atime is the usual cause.
+ * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
+ * these changes separately from I_DIRTY_SYNC so that we
+ * don't have to write inode on fdatasync() when only
+ * mtime has changed in it.
+ * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
+ * I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both
+ * are cleared by unlock_new_inode(), called from iget().
+ * I_WILL_FREE Must be set when calling write_inode_now() if i_count
+ * is zero. I_FREEING must be set when I_WILL_FREE is
+ * cleared.
+ * I_FREEING Set when inode is about to be freed but still has dirty
+ * pages or buffers attached or the inode itself is still
+ * dirty.
+ * I_CLEAR Set by clear_inode(). In this state the inode is clean
+ * and can be destroyed.
+ *
+ * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
+ * prohibited for many purposes. iget() must wait for
+ * the inode to be completely released, then create it
+ * anew. Other functions will just ignore such inodes,
+ * if appropriate. I_LOCK is used for waiting.
+ *
+ * I_LOCK Serves as both a mutex and completion notification.
+ * New inodes set I_LOCK. If two processes both create
+ * the same inode, one of them will release its inode and
+ * wait for I_LOCK to be released before returning.
+ * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
+ * also cause waiting on I_LOCK, without I_LOCK actually
+ * being set. find_inode() uses this to prevent returning
+ * nearly-dead inodes.
+ * I_SYNC Similar to I_LOCK, but limited in scope to writeback
+ * of inode dirty data. Having a separate lock for this
+ * purpose reduces latency and prevents some filesystem-
+ * specific deadlocks.
+ *
+ * Q: What is the difference between I_WILL_FREE and I_FREEING?
+ * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on
+ * I_CLEAR? If not, why?
+ */
+#define I_DIRTY_SYNC 1
+#define I_DIRTY_DATASYNC 2
+#define I_DIRTY_PAGES 4
+#define I_NEW 8
+#define I_WILL_FREE 16
+#define I_FREEING 32
+#define I_CLEAR 64
+#define __I_LOCK 7
#define I_LOCK (1 << __I_LOCK)
-#define I_FREEING 16
-#define I_CLEAR 32
-#define I_NEW 64
-#define I_WILL_FREE 128
+#define __I_SYNC 8
+#define I_SYNC (1 << __I_SYNC)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
mark_inode_dirty(inode);
}
+/**
+ * inode_inc_iversion - increments i_version
+ * @inode: inode that need to be updated
+ *
+ * Every time the inode is modified, the i_version field will be incremented.
+ * The filesystem has to be mounted with i_version flag
+ */
+
+static inline void inode_inc_iversion(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ inode->i_version++;
+ spin_unlock(&inode->i_lock);
+}
+
extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry);
static inline void file_accessed(struct file *file)
{
const struct super_operations *ops, unsigned long,
struct vfsmount *mnt);
extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
-int __put_super(struct super_block *sb);
int __put_super_and_need_restart(struct super_block *sb);
void unnamed_dev_init(void);
extern int register_filesystem(struct file_system_type *);
extern int unregister_filesystem(struct file_system_type *);
-extern struct vfsmount *kern_mount(struct file_system_type *);
+extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
+#define kern_mount(type) kern_mount_data(type, NULL)
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
-extern void umount_tree(struct vfsmount *, int, struct list_head *);
-extern void release_mounts(struct list_head *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
-extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
-extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
- struct vfsmount *);
+extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *);
+extern void drop_collected_mounts(struct vfsmount *);
extern int vfs_statfs(struct dentry *, struct kstatfs *);
/* /sys/fs */
-extern struct kset fs_subsys;
+extern struct kobject *fs_kobj;
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
struct file *filp);
-extern long do_sys_open(int fdf, const char __user *filename, int flags,
+extern long do_sys_open(int dfd, const char __user *filename, int flags,
int mode);
extern struct file *filp_open(const char *, int, int);
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *);
extern struct block_device *open_by_devnum(dev_t, unsigned);
-extern const struct address_space_operations def_blk_aops;
#else
static inline void bd_forget(struct inode *inode) {}
#endif
const struct file_operations *);
extern void unregister_chrdev(unsigned int, const char *);
extern void unregister_chrdev_region(dev_t, unsigned);
-extern int chrdev_open(struct inode *, struct file *);
extern void chrdev_show(struct seq_file *,off_t);
/* fs/block_dev.c */
extern struct file *create_write_pipe(void);
extern void free_write_pipe(struct file *);
-extern int open_namei(int dfd, const char *, int, int, struct nameidata *);
+extern struct file *do_filp_open(int dfd, const char *pathname,
+ int open_flag, int mode);
extern int may_open(struct nameidata *, int, int);
extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
extern struct inode * iget_locked(struct super_block *, unsigned long);
extern void unlock_new_inode(struct inode *);
-static inline struct inode *iget(struct super_block *sb, unsigned long ino)
-{
- struct inode *inode = iget_locked(sb, ino);
-
- if (inode && (inode->i_state & I_NEW)) {
- sb->s_op->read_inode(inode);
- unlock_new_inode(inode);
- }
-
- return inode;
-}
-
extern void __iget(struct inode * inode);
+extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
-extern int __remove_suid(struct dentry *, int);
extern int should_remove_suid(struct dentry *);
extern int remove_suid(struct dentry *);
extern int set_blocksize(struct block_device *, int);
extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);
+extern int sb_has_dirty_inodes(struct super_block *);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
unsigned long, loff_t, loff_t *, size_t, ssize_t);
extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern void do_generic_mapping_read(struct address_space *mapping,
- struct file_ra_state *, struct file *,
- loff_t *, read_descriptor_t *, read_actor_t);
extern int generic_segment_checks(const struct iovec *iov,
unsigned long *nr_segs, size_t *count, int access_flags);
}
#endif
-static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
- read_descriptor_t * desc,
- read_actor_t actor)
-{
- do_generic_mapping_read(filp->f_mapping,
- &filp->f_ra,
- filp,
- ppos,
- desc,
- actor);
-}
-
#ifdef CONFIG_BLOCK
ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *);
extern int vfs_fstat(unsigned int, struct kstat *);
-extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
+extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+extern void get_filesystem(struct file_system_type *fs);
+extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern struct super_block *get_super(struct block_device *);
extern struct super_block *user_get_super(dev_t);
extern int simple_readpage(struct file *file, struct page *page);
extern int simple_prepare_write(struct file *file, struct page *page,
unsigned offset, unsigned to);
-extern int simple_commit_write(struct file *file, struct page *page,
- unsigned offset, unsigned to);
extern int simple_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
extern void simple_release_fs(struct vfsmount **mount, int *count);
-extern ssize_t simple_read_from_buffer(void __user *, size_t, loff_t *, const void *, size_t);
+extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
+ loff_t *ppos, const void *from, size_t available);
+extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
+ const void *from, size_t available);
#ifdef CONFIG_MIGRATION
extern int buffer_migrate_page(struct address_space *,
extern void file_update_time(struct file *file);
+extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt);
+extern void save_mount_options(struct super_block *sb, char *options);
+
static inline ino_t parent_ino(struct dentry *dentry)
{
ino_t res;
return res;
}
-/* kernel/fork.c */
-extern int unshare_files(void);
-
/* Transaction based IO helpers */
/*
static struct file_operations __fops = { \
.owner = THIS_MODULE, \
.open = __fops ## _open, \
- .release = simple_attr_close, \
+ .release = simple_attr_release, \
.read = simple_attr_read, \
.write = simple_attr_write, \
};
}
int simple_attr_open(struct inode *inode, struct file *file,
- u64 (*get)(void *), void (*set)(void *, u64),
+ int (*get)(void *, u64 *), int (*set)(void *, u64),
const char *fmt);
-int simple_attr_close(struct inode *inode, struct file *file);
+int simple_attr_release(struct inode *inode, struct file *file);
ssize_t simple_attr_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos);
ssize_t simple_attr_write(struct file *file, const char __user *buf,
{ }
#endif /* CONFIG_SECURITY */
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+struct ctl_table;
+int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos);
+int get_filesystem_list(char * buf);
#endif /* __KERNEL__ */
#endif /* _LINUX_FS_H */