hugetlb: really allocate vma lock for all sharable vmas

Commit bbff39cc6c ("hugetlb: allocate vma lock for all sharable vmas")
removed the pmd sharable checks in the vma lock helper routines.  However,
it left the functional version of helper routines behind #ifdef
CONFIG_ARCH_WANT_HUGE_PMD_SHARE.  Therefore, the vma lock is not being
used for sharable vmas on architectures that do not support pmd sharing. 
On these architectures, a potential fault/truncation race is exposed that
could leave pages in a hugetlb file past i_size until the file is removed.

Move the functional vma lock helpers outside the ifdef, and remove the
non-functional stubs.  Since the vma lock is not just for pmd sharing,
rename the routine __vma_shareable_flags_pmd.

Link: https://lkml.kernel.org/r/20221212235042.178355-1-mike.kravetz@oracle.com
Fixes: bbff39cc6c ("hugetlb: allocate vma lock for all sharable vmas")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Mike Kravetz 2022-12-12 15:50:41 -08:00 committed by Andrew Morton
parent 7ba594d700
commit e700898fa0
1 changed files with 148 additions and 185 deletions

View File

@ -255,6 +255,152 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
return subpool_inode(file_inode(vma->vm_file));
}
/*
* hugetlb vma_lock helper routines
*/
static bool __vma_shareable_lock(struct vm_area_struct *vma)
{
return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
vma->vm_private_data;
}
void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
if (__vma_shareable_lock(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_read(&vma_lock->rw_sema);
}
}
void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
{
if (__vma_shareable_lock(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_read(&vma_lock->rw_sema);
}
}
void hugetlb_vma_lock_write(struct vm_area_struct *vma)
{
if (__vma_shareable_lock(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_write(&vma_lock->rw_sema);
}
}
void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
{
if (__vma_shareable_lock(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_write(&vma_lock->rw_sema);
}
}
int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
if (!__vma_shareable_lock(vma))
return 1;
return down_write_trylock(&vma_lock->rw_sema);
}
void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
if (__vma_shareable_lock(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
lockdep_assert_held(&vma_lock->rw_sema);
}
}
void hugetlb_vma_lock_release(struct kref *kref)
{
struct hugetlb_vma_lock *vma_lock = container_of(kref,
struct hugetlb_vma_lock, refs);
kfree(vma_lock);
}
static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
{
struct vm_area_struct *vma = vma_lock->vma;
/*
* vma_lock structure may or not be released as a result of put,
* it certainly will no longer be attached to vma so clear pointer.
* Semaphore synchronizes access to vma_lock->vma field.
*/
vma_lock->vma = NULL;
vma->vm_private_data = NULL;
up_write(&vma_lock->rw_sema);
kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
}
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
{
if (__vma_shareable_lock(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
__hugetlb_vma_unlock_write_put(vma_lock);
}
}
static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
{
/*
* Only present in sharable vmas.
*/
if (!vma || !__vma_shareable_lock(vma))
return;
if (vma->vm_private_data) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_write(&vma_lock->rw_sema);
__hugetlb_vma_unlock_write_put(vma_lock);
}
}
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
struct hugetlb_vma_lock *vma_lock;
/* Only establish in (flags) sharable vmas */
if (!vma || !(vma->vm_flags & VM_MAYSHARE))
return;
/* Should never get here with non-NULL vm_private_data */
if (vma->vm_private_data)
return;
vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
if (!vma_lock) {
/*
* If we can not allocate structure, then vma can not
* participate in pmd sharing. This is only a possible
* performance enhancement and memory saving issue.
* However, the lock is also used to synchronize page
* faults with truncation. If the lock is not present,
* unlikely races could leave pages in a file past i_size
* until the file is removed. Warn in the unlikely case of
* allocation failure.
*/
pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
return;
}
kref_init(&vma_lock->refs);
init_rwsem(&vma_lock->rw_sema);
vma_lock->vma = vma;
vma->vm_private_data = vma_lock;
}
/* Helper that removes a struct file_region from the resv_map cache and returns
* it for use.
*/
@ -6613,7 +6759,8 @@ bool hugetlb_reserve_pages(struct inode *inode,
}
/*
* vma specific semaphore used for pmd sharing synchronization
* vma specific semaphore used for pmd sharing and fault/truncation
* synchronization
*/
hugetlb_vma_lock_alloc(vma);
@ -6869,149 +7016,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
*end = ALIGN(*end, PUD_SIZE);
}
static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma)
{
return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
vma->vm_private_data;
}
void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
if (__vma_shareable_flags_pmd(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_read(&vma_lock->rw_sema);
}
}
void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
{
if (__vma_shareable_flags_pmd(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_read(&vma_lock->rw_sema);
}
}
void hugetlb_vma_lock_write(struct vm_area_struct *vma)
{
if (__vma_shareable_flags_pmd(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_write(&vma_lock->rw_sema);
}
}
void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
{
if (__vma_shareable_flags_pmd(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_write(&vma_lock->rw_sema);
}
}
int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
if (!__vma_shareable_flags_pmd(vma))
return 1;
return down_write_trylock(&vma_lock->rw_sema);
}
void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
if (__vma_shareable_flags_pmd(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
lockdep_assert_held(&vma_lock->rw_sema);
}
}
void hugetlb_vma_lock_release(struct kref *kref)
{
struct hugetlb_vma_lock *vma_lock = container_of(kref,
struct hugetlb_vma_lock, refs);
kfree(vma_lock);
}
static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
{
struct vm_area_struct *vma = vma_lock->vma;
/*
* vma_lock structure may or not be released as a result of put,
* it certainly will no longer be attached to vma so clear pointer.
* Semaphore synchronizes access to vma_lock->vma field.
*/
vma_lock->vma = NULL;
vma->vm_private_data = NULL;
up_write(&vma_lock->rw_sema);
kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
}
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
{
if (__vma_shareable_flags_pmd(vma)) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
__hugetlb_vma_unlock_write_put(vma_lock);
}
}
static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
{
/*
* Only present in sharable vmas.
*/
if (!vma || !__vma_shareable_flags_pmd(vma))
return;
if (vma->vm_private_data) {
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_write(&vma_lock->rw_sema);
__hugetlb_vma_unlock_write_put(vma_lock);
}
}
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
struct hugetlb_vma_lock *vma_lock;
/* Only establish in (flags) sharable vmas */
if (!vma || !(vma->vm_flags & VM_MAYSHARE))
return;
/* Should never get here with non-NULL vm_private_data */
if (vma->vm_private_data)
return;
vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
if (!vma_lock) {
/*
* If we can not allocate structure, then vma can not
* participate in pmd sharing. This is only a possible
* performance enhancement and memory saving issue.
* However, the lock is also used to synchronize page
* faults with truncation. If the lock is not present,
* unlikely races could leave pages in a file past i_size
* until the file is removed. Warn in the unlikely case of
* allocation failure.
*/
pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
return;
}
kref_init(&vma_lock->refs);
init_rwsem(&vma_lock->rw_sema);
vma_lock->vma = vma;
vma->vm_private_data = vma_lock;
}
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
@ -7100,47 +7104,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
}
void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
{
}
void hugetlb_vma_lock_write(struct vm_area_struct *vma)
{
}
void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
{
}
int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
return 1;
}
void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
}
void hugetlb_vma_lock_release(struct kref *kref)
{
}
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
{
}
static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
{
}
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
}
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{