Skip to content

Commit

Permalink
truncate: new helpers
Browse files Browse the repository at this point in the history
Introduce new truncate helpers truncate_pagecache and inode_newsize_ok.
vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and
into mm/truncate.c.

Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Nick Piggin <[email protected]>
Signed-off-by: Al Viro <[email protected]>
  • Loading branch information
[email protected] authored and al committed Sep 24, 2009
1 parent eca6f53 commit 25d9e2d
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 108 deletions.
2 changes: 1 addition & 1 deletion Documentation/vm/locking
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
mm start up ... this is a loose form of stability on mm_users. For
example, it is used in copy_mm to protect against a racing tlb_gather_mmu
single address space optimization, so that the zap_page_range (from
vmtruncate) does not lose sending ipi's to cloned threads that might
truncate) does not lose sending ipi's to cloned threads that might
be spawned underneath it and go to user mode to drag in pte's into tlbs.

swap_lock
Expand Down
46 changes: 44 additions & 2 deletions fs/attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/* Taken over from the old code... */

/* POSIX UID/GID verification for setting inode attributes. */
int inode_change_ok(struct inode *inode, struct iattr *attr)
int inode_change_ok(const struct inode *inode, struct iattr *attr)
{
int retval = -EPERM;
unsigned int ia_valid = attr->ia_valid;
Expand Down Expand Up @@ -60,9 +60,51 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
error:
return retval;
}

EXPORT_SYMBOL(inode_change_ok);

/**
* inode_newsize_ok - may this inode be truncated to a given size
* @inode: the inode to be truncated
* @offset: the new size to assign to the inode
* @Returns: 0 on success, -ve errno on failure
*
* inode_newsize_ok will check filesystem limits and ulimits to check that the
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
* when necessary. Caller must not proceed with inode size change if failure is
* returned. @inode must be a file (not directory), with appropriate
* permissions to allow truncate (inode_newsize_ok does NOT check these
* conditions).
*
* inode_newsize_ok must be called with i_mutex held.
*/
int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
if (inode->i_size < offset) {
unsigned long limit;

limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
goto out_big;
} else {
/*
* truncation of in-use swapfiles is disallowed - it would
* cause subsequent swapout to scribble on the now-freed
* blocks.
*/
if (IS_SWAPFILE(inode))
return -ETXTBSY;
}

return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
out_big:
return -EFBIG;
}
EXPORT_SYMBOL(inode_newsize_ok);

int inode_setattr(struct inode * inode, struct iattr * attr)
{
unsigned int ia_valid = attr->ia_valid;
Expand Down
3 changes: 2 additions & 1 deletion include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *,
#define buffer_migrate_page NULL
#endif

extern int inode_change_ok(struct inode *, struct iattr *);
extern int inode_change_ok(const struct inode *, struct iattr *);
extern int inode_newsize_ok(const struct inode *, loff_t offset);
extern int __must_check inode_setattr(struct inode *, struct iattr *);

extern void file_update_time(struct file *file);
Expand Down
5 changes: 3 additions & 2 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
unmap_mapping_range(mapping, holebegin, holelen, 0);
}

extern int vmtruncate(struct inode * inode, loff_t offset);
extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
extern int vmtruncate(struct inode *inode, loff_t offset);
extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);

#ifdef CONFIG_MMU
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
Expand Down
2 changes: 1 addition & 1 deletion mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
/*
* Lock ordering:
*
* ->i_mmap_lock (vmtruncate)
* ->i_mmap_lock (truncate_pagecache)
* ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_lock (exclusive_swap_page, others)
* ->mapping->tree_lock
Expand Down
62 changes: 3 additions & 59 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long addr = vma->vm_start;

/*
* Hide vma from rmap and vmtruncate before freeing pgtables
* Hide vma from rmap and truncate_pagecache before freeing
* pgtables
*/
anon_vma_unlink(vma);
unlink_file_vma(vma);
Expand Down Expand Up @@ -2407,7 +2408,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
* boundary. Note that this is different from vmtruncate(), which
* boundary. Note that this is different from truncate_pagecache(), which
* must keep the partial page. In contrast, we must get rid of
* partial pages.
* @holelen: size of prospective hole in bytes. This will be rounded
Expand Down Expand Up @@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping,
}
EXPORT_SYMBOL(unmap_mapping_range);

/**
* vmtruncate - unmap mappings "freed" by truncate() syscall
* @inode: inode of the file used
* @offset: file offset to start truncating
*
* NOTE! We have to be ready to update the memory sharing
* between the file and the memory map for a potential last
* incomplete page. Ugly, but necessary.
*/
int vmtruncate(struct inode * inode, loff_t offset)
{
if (inode->i_size < offset) {
unsigned long limit;

limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
goto out_big;
i_size_write(inode, offset);
} else {
struct address_space *mapping = inode->i_mapping;

/*
* truncation of in-use swapfiles is disallowed - it would
* cause subsequent swapout to scribble on the now-freed
* blocks.
*/
if (IS_SWAPFILE(inode))
return -ETXTBSY;
i_size_write(inode, offset);

/*
* unmap_mapping_range is called twice, first simply for
* efficiency so that truncate_inode_pages does fewer
* single-page unmaps. However after this first call, and
* before truncate_inode_pages finishes, it is possible for
* private pages to be COWed, which remain after
* truncate_inode_pages finishes, hence the second
* unmap_mapping_range call must be made for correctness.
*/
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, offset);
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}

if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;

out_sig:
send_sig(SIGXFSZ, current, 0);
out_big:
return -EFBIG;
}
EXPORT_SYMBOL(vmtruncate);

int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
{
struct address_space *mapping = inode->i_mapping;
Expand Down
4 changes: 2 additions & 2 deletions mm/mremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
* moving file-based ptes, we must lock vmtruncate out,
* since it might clean the dst vma before the src vma,
* moving file-based ptes, we must lock truncate_pagecache
* out, since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
Expand Down
40 changes: 0 additions & 40 deletions mm/nommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
struct vm_operations_struct generic_file_vm_ops = {
};

/*
* Handle all mappings that got truncated by a "truncate()"
* system call.
*
* NOTE! We have to be ready to update the memory sharing
* between the file and the memory map for a potential last
* incomplete page. Ugly, but necessary.
*/
int vmtruncate(struct inode *inode, loff_t offset)
{
struct address_space *mapping = inode->i_mapping;
unsigned long limit;

if (inode->i_size < offset)
goto do_expand;
i_size_write(inode, offset);

truncate_inode_pages(mapping, offset);
goto out_truncate;

do_expand:
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
goto out;
i_size_write(inode, offset);

out_truncate:
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
out:
return -EFBIG;
}

EXPORT_SYMBOL(vmtruncate);

/*
* Return the total memory allocated for this pointer, not
* just what the caller asked for.
Expand Down
64 changes: 64 additions & 0 deletions mm/truncate.c
Original file line number Diff line number Diff line change
Expand Up @@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
return invalidate_inode_pages2_range(mapping, 0, -1);
}
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);

/**
* truncate_pagecache - unmap and remove pagecache that has been truncated
* @inode: inode
* @old: old file offset
* @new: new file offset
*
* inode's new i_size must already be written before truncate_pagecache
* is called.
*
* This function should typically be called before the filesystem
* releases resources associated with the freed range (eg. deallocates
* blocks). This way, pagecache will always stay logically coherent
* with on-disk format, and the filesystem would not have to deal with
* situations such as writepage being called for a page that has already
* had its underlying blocks deallocated.
*/
void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
{
if (new < old) {
struct address_space *mapping = inode->i_mapping;

/*
* unmap_mapping_range is called twice, first simply for
* efficiency so that truncate_inode_pages does fewer
* single-page unmaps. However after this first call, and
* before truncate_inode_pages finishes, it is possible for
* private pages to be COWed, which remain after
* truncate_inode_pages finishes, hence the second
* unmap_mapping_range call must be made for correctness.
*/
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, new);
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
}
}
EXPORT_SYMBOL(truncate_pagecache);

/**
* vmtruncate - unmap mappings "freed" by truncate() syscall
* @inode: inode of the file used
* @offset: file offset to start truncating
*
* NOTE! We have to be ready to update the memory sharing
* between the file and the memory map for a potential last
* incomplete page. Ugly, but necessary.
*/
int vmtruncate(struct inode *inode, loff_t offset)
{
loff_t oldsize;
int error;

error = inode_newsize_ok(inode, offset);
if (error)
return error;
oldsize = inode->i_size;
i_size_write(inode, offset);
truncate_pagecache(inode, oldsize, offset);
if (inode->i_op->truncate)
inode->i_op->truncate(inode);

return error;
}
EXPORT_SYMBOL(vmtruncate);

0 comments on commit 25d9e2d

Please sign in to comment.