@@ -580,8 +580,9 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
580580 * the VMA that is created to span the source and destination of the move,
581581 * so we make an exception for it.
582582 */
583- static bool can_align_down (struct vm_area_struct * vma , unsigned long addr_to_align ,
584- unsigned long mask , bool for_stack )
583+ static bool can_align_down (struct pagetable_move_control * pmc ,
584+ struct vm_area_struct * vma , unsigned long addr_to_align ,
585+ unsigned long mask )
585586{
586587 unsigned long addr_masked = addr_to_align & mask ;
587588
@@ -590,11 +591,11 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali
590591 * of the corresponding VMA, we can't align down or we will destroy part
591592 * of the current mapping.
592593 */
593- if (!for_stack && vma -> vm_start != addr_to_align )
594+ if (!pmc -> for_stack && vma -> vm_start != addr_to_align )
594595 return false;
595596
596597 /* In the stack case we explicitly permit in-VMA alignment. */
597- if (for_stack && addr_masked >= vma -> vm_start )
598+ if (pmc -> for_stack && addr_masked >= vma -> vm_start )
598599 return true;
599600
600601 /*
@@ -604,54 +605,131 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali
604605 return find_vma_intersection (vma -> vm_mm , addr_masked , vma -> vm_start ) == NULL ;
605606}
606607
607- /* Opportunistically realign to specified boundary for faster copy. */
608- static void try_realign_addr (unsigned long * old_addr , struct vm_area_struct * old_vma ,
609- unsigned long * new_addr , struct vm_area_struct * new_vma ,
610- unsigned long mask , bool for_stack )
608+ /*
609+ * Determine if are in fact able to realign for efficiency to a higher page
610+ * table boundary.
611+ */
612+ static bool can_realign_addr (struct pagetable_move_control * pmc ,
613+ unsigned long pagetable_mask )
611614{
615+ unsigned long align_mask = ~pagetable_mask ;
616+ unsigned long old_align = pmc -> old_addr & align_mask ;
617+ unsigned long new_align = pmc -> new_addr & align_mask ;
618+ unsigned long pagetable_size = align_mask + 1 ;
619+ unsigned long old_align_next = pagetable_size - old_align ;
620+
621+ /*
622+ * We don't want to have to go hunting for VMAs from the end of the old
623+ * VMA to the next page table boundary, also we want to make sure the
624+ * operation is wortwhile.
625+ *
626+ * So ensure that we only perform this realignment if the end of the
627+ * range being copied reaches or crosses the page table boundary.
628+ *
629+ * boundary boundary
630+ * .<- old_align -> .
631+ * . |----------------.-----------|
632+ * . | vma . |
633+ * . |----------------.-----------|
634+ * . <----------------.----------->
635+ * . len_in
636+ * <------------------------------->
637+ * . pagetable_size .
638+ * . <---------------->
639+ * . old_align_next .
640+ */
641+ if (pmc -> len_in < old_align_next )
642+ return false;
643+
612644 /* Skip if the addresses are already aligned. */
613- if (( * old_addr & ~ mask ) == 0 )
614- return ;
645+ if (old_align == 0 )
646+ return false ;
615647
616648 /* Only realign if the new and old addresses are mutually aligned. */
617- if (( * old_addr & ~ mask ) != ( * new_addr & ~ mask ) )
618- return ;
649+ if (old_align != new_align )
650+ return false ;
619651
620652 /* Ensure realignment doesn't cause overlap with existing mappings. */
621- if (!can_align_down (old_vma , * old_addr , mask , for_stack ) ||
622- !can_align_down (new_vma , * new_addr , mask , for_stack ))
653+ if (!can_align_down (pmc , pmc -> old , pmc -> old_addr , pagetable_mask ) ||
654+ !can_align_down (pmc , pmc -> new , pmc -> new_addr , pagetable_mask ))
655+ return false;
656+
657+ return true;
658+ }
659+
660+ /*
661+ * Opportunistically realign to specified boundary for faster copy.
662+ *
663+ * Consider an mremap() of a VMA with page table boundaries as below, and no
664+ * preceding VMAs from the lower page table boundary to the start of the VMA,
665+ * with the end of the range reaching or crossing the page table boundary.
666+ *
667+ * boundary boundary
668+ * . |----------------.-----------|
669+ * . | vma . |
670+ * . |----------------.-----------|
671+ * . pmc->old_addr . pmc->old_end
672+ * . <---------------------------->
673+ * . move these page tables
674+ *
675+ * If we proceed with moving page tables in this scenario, we will have a lot of
676+ * work to do traversing old page tables and establishing new ones in the
677+ * destination across multiple lower level page tables.
678+ *
679+ * The idea here is simply to align pmc->old_addr, pmc->new_addr down to the
680+ * page table boundary, so we can simply copy a single page table entry for the
681+ * aligned portion of the VMA instead:
682+ *
683+ * boundary boundary
684+ * . |----------------.-----------|
685+ * . | vma . |
686+ * . |----------------.-----------|
687+ * pmc->old_addr . pmc->old_end
688+ * <------------------------------------------->
689+ * . move these page tables
690+ */
691+ static void try_realign_addr (struct pagetable_move_control * pmc ,
692+ unsigned long pagetable_mask )
693+ {
694+
695+ if (!can_realign_addr (pmc , pagetable_mask ))
623696 return ;
624697
625- * old_addr = * old_addr & mask ;
626- * new_addr = * new_addr & mask ;
698+ /*
699+ * Simply align to page table boundaries. Note that we do NOT update the
700+ * pmc->old_end value, and since the move_page_tables() operation spans
701+ * from [old_addr, old_end) (offsetting new_addr as it is performed),
702+ * this simply changes the start of the copy, not the end.
703+ */
704+ pmc -> old_addr &= pagetable_mask ;
705+ pmc -> new_addr &= pagetable_mask ;
627706}
628707
629- unsigned long move_page_tables (struct vm_area_struct * vma ,
630- unsigned long old_addr , struct vm_area_struct * new_vma ,
631- unsigned long new_addr , unsigned long len ,
632- bool need_rmap_locks , bool for_stack )
708+ unsigned long move_page_tables (struct pagetable_move_control * pmc )
633709{
634710 unsigned long extent , old_end ;
635711 struct mmu_notifier_range range ;
636712 pmd_t * old_pmd , * new_pmd ;
637713 pud_t * old_pud , * new_pud ;
714+ unsigned long old_addr , new_addr ;
715+ struct vm_area_struct * vma = pmc -> old ;
638716
639- if (!len )
717+ if (!pmc -> len_in )
640718 return 0 ;
641719
642- old_end = old_addr + len ;
643-
644720 if (is_vm_hugetlb_page (vma ))
645- return move_hugetlb_page_tables (vma , new_vma , old_addr ,
646- new_addr , len );
721+ return move_hugetlb_page_tables (pmc -> old , pmc -> new , pmc -> old_addr ,
722+ pmc -> new_addr , pmc -> len_in );
647723
724+ old_end = pmc -> old_end ;
648725 /*
649726 * If possible, realign addresses to PMD boundary for faster copy.
650727 * Only realign if the mremap copying hits a PMD boundary.
651728 */
652- if (len >= PMD_SIZE - (old_addr & ~PMD_MASK ))
653- try_realign_addr (& old_addr , vma , & new_addr , new_vma , PMD_MASK ,
654- for_stack );
729+ try_realign_addr (pmc , PMD_MASK );
730+ /* These may have been changed. */
731+ old_addr = pmc -> old_addr ;
732+ new_addr = pmc -> new_addr ;
655733
656734 flush_cache_range (vma , old_addr , old_end );
657735 mmu_notifier_range_init (& range , MMU_NOTIFY_UNMAP , 0 , vma -> vm_mm ,
@@ -675,12 +753,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
675753 if (pud_trans_huge (* old_pud ) || pud_devmap (* old_pud )) {
676754 if (extent == HPAGE_PUD_SIZE ) {
677755 move_pgt_entry (HPAGE_PUD , vma , old_addr , new_addr ,
678- old_pud , new_pud , need_rmap_locks );
756+ old_pud , new_pud , pmc -> need_rmap_locks );
679757 /* We ignore and continue on error? */
680758 continue ;
681759 }
682760 } else if (IS_ENABLED (CONFIG_HAVE_MOVE_PUD ) && extent == PUD_SIZE ) {
683-
684761 if (move_pgt_entry (NORMAL_PUD , vma , old_addr , new_addr ,
685762 old_pud , new_pud , true))
686763 continue ;
@@ -698,7 +775,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
698775 pmd_devmap (* old_pmd )) {
699776 if (extent == HPAGE_PMD_SIZE &&
700777 move_pgt_entry (HPAGE_PMD , vma , old_addr , new_addr ,
701- old_pmd , new_pmd , need_rmap_locks ))
778+ old_pmd , new_pmd , pmc -> need_rmap_locks ))
702779 continue ;
703780 split_huge_pmd (vma , old_pmd , old_addr );
704781 } else if (IS_ENABLED (CONFIG_HAVE_MOVE_PMD ) &&
@@ -713,10 +790,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
713790 }
714791 if (pmd_none (* old_pmd ))
715792 continue ;
716- if (pte_alloc (new_vma -> vm_mm , new_pmd ))
793+ if (pte_alloc (pmc -> new -> vm_mm , new_pmd ))
717794 break ;
718795 if (move_ptes (vma , old_pmd , old_addr , old_addr + extent ,
719- new_vma , new_pmd , new_addr , need_rmap_locks ) < 0 )
796+ pmc -> new , new_pmd , new_addr , pmc -> need_rmap_locks ) < 0 )
720797 goto again ;
721798 }
722799
@@ -726,10 +803,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
726803 * Prevent negative return values when {old,new}_addr was realigned
727804 * but we broke out of the above loop for the first PMD itself.
728805 */
729- if (old_addr < old_end - len )
806+ if (old_addr < old_end - pmc -> len_in )
730807 return 0 ;
731808
732- return len + old_addr - old_end ; /* how much done */
809+ return pmc -> len_in + old_addr - old_end ; /* how much done */
733810}
734811
735812/* Set vrm->delta to the difference in VMA size specified by user. */
@@ -1040,37 +1117,40 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
10401117 unsigned long internal_pgoff = internal_offset >> PAGE_SHIFT ;
10411118 unsigned long new_pgoff = vrm -> vma -> vm_pgoff + internal_pgoff ;
10421119 unsigned long moved_len ;
1043- bool need_rmap_locks ;
1044- struct vm_area_struct * vma ;
1120+ struct vm_area_struct * vma = vrm -> vma ;
10451121 struct vm_area_struct * new_vma ;
10461122 int err = 0 ;
1123+ PAGETABLE_MOVE (pmc , NULL , NULL , vrm -> addr , vrm -> new_addr , vrm -> old_len );
10471124
1048- new_vma = copy_vma (& vrm -> vma , vrm -> new_addr , vrm -> new_len , new_pgoff ,
1049- & need_rmap_locks );
1125+ new_vma = copy_vma (& vma , vrm -> new_addr , vrm -> new_len , new_pgoff ,
1126+ & pmc . need_rmap_locks );
10501127 if (!new_vma ) {
10511128 vrm_uncharge (vrm );
10521129 * new_vma_ptr = NULL ;
10531130 return - ENOMEM ;
10541131 }
1055- vma = vrm -> vma ;
1132+ vrm -> vma = vma ;
1133+ pmc .old = vma ;
1134+ pmc .new = new_vma ;
10561135
1057- moved_len = move_page_tables (vma , vrm -> addr , new_vma ,
1058- vrm -> new_addr , vrm -> old_len ,
1059- need_rmap_locks , /* for_stack= */ false);
1136+ moved_len = move_page_tables (& pmc );
10601137 if (moved_len < vrm -> old_len )
10611138 err = - ENOMEM ;
10621139 else if (vma -> vm_ops && vma -> vm_ops -> mremap )
10631140 err = vma -> vm_ops -> mremap (new_vma );
10641141
10651142 if (unlikely (err )) {
1143+ PAGETABLE_MOVE (pmc_revert , new_vma , vma , vrm -> new_addr ,
1144+ vrm -> addr , moved_len );
1145+
10661146 /*
10671147 * On error, move entries back from new area to old,
10681148 * which will succeed since page tables still there,
10691149 * and then proceed to unmap new area instead of old.
10701150 */
1071- move_page_tables ( new_vma , vrm -> new_addr , vma , vrm -> addr ,
1072- moved_len , /* need_rmap_locks = */ true,
1073- /* for_stack= */ false);
1151+ pmc_revert . need_rmap_locks = true;
1152+ move_page_tables ( & pmc_revert );
1153+
10741154 vrm -> vma = new_vma ;
10751155 vrm -> old_len = vrm -> new_len ;
10761156 vrm -> addr = vrm -> new_addr ;
0 commit comments