@@ -462,6 +462,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value)
462462 hv_ghcb_msr_read (msr , value );
463463}
464464
465+ /*
466+ * Keep track of the PFN regions which were shared with the host. The access
467+ * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()).
468+ */
469+ struct hv_enc_pfn_region {
470+ struct list_head list ;
471+ u64 pfn ;
472+ int count ;
473+ };
474+
475+ static LIST_HEAD (hv_list_enc );
476+ static DEFINE_RAW_SPINLOCK (hv_list_enc_lock );
477+
478+ static int hv_list_enc_add (const u64 * pfn_list , int count )
479+ {
480+ struct hv_enc_pfn_region * ent ;
481+ unsigned long flags ;
482+ u64 pfn ;
483+ int i ;
484+
485+ for (i = 0 ; i < count ; i ++ ) {
486+ pfn = pfn_list [i ];
487+
488+ raw_spin_lock_irqsave (& hv_list_enc_lock , flags );
489+ /* Check if the PFN already exists in some region first */
490+ list_for_each_entry (ent , & hv_list_enc , list ) {
491+ if ((ent -> pfn <= pfn ) && (ent -> pfn + ent -> count - 1 >= pfn ))
492+ /* Nothing to do - pfn is already in the list */
493+ goto unlock_done ;
494+ }
495+
496+ /*
497+ * Check if the PFN is adjacent to an existing region. Growing
498+ * a region can make it adjacent to another one but merging is
499+ * not (yet) implemented for simplicity. A PFN cannot be added
500+ * to two regions to keep the logic in hv_list_enc_remove()
501+ * correct.
502+ */
503+ list_for_each_entry (ent , & hv_list_enc , list ) {
504+ if (ent -> pfn + ent -> count == pfn ) {
505+ /* Grow existing region up */
506+ ent -> count ++ ;
507+ goto unlock_done ;
508+ } else if (pfn + 1 == ent -> pfn ) {
509+ /* Grow existing region down */
510+ ent -> pfn -- ;
511+ ent -> count ++ ;
512+ goto unlock_done ;
513+ }
514+ }
515+ raw_spin_unlock_irqrestore (& hv_list_enc_lock , flags );
516+
517+ /* No adjacent region found -- create a new one */
518+ ent = kzalloc (sizeof (struct hv_enc_pfn_region ), GFP_KERNEL );
519+ if (!ent )
520+ return - ENOMEM ;
521+
522+ ent -> pfn = pfn ;
523+ ent -> count = 1 ;
524+
525+ raw_spin_lock_irqsave (& hv_list_enc_lock , flags );
526+ list_add (& ent -> list , & hv_list_enc );
527+
528+ unlock_done :
529+ raw_spin_unlock_irqrestore (& hv_list_enc_lock , flags );
530+ }
531+
532+ return 0 ;
533+ }
534+
535+ static int hv_list_enc_remove (const u64 * pfn_list , int count )
536+ {
537+ struct hv_enc_pfn_region * ent , * t ;
538+ struct hv_enc_pfn_region new_region ;
539+ unsigned long flags ;
540+ u64 pfn ;
541+ int i ;
542+
543+ for (i = 0 ; i < count ; i ++ ) {
544+ pfn = pfn_list [i ];
545+
546+ raw_spin_lock_irqsave (& hv_list_enc_lock , flags );
547+ list_for_each_entry_safe (ent , t , & hv_list_enc , list ) {
548+ if (pfn == ent -> pfn + ent -> count - 1 ) {
549+ /* Removing tail pfn */
550+ ent -> count -- ;
551+ if (!ent -> count ) {
552+ list_del (& ent -> list );
553+ kfree (ent );
554+ }
555+ goto unlock_done ;
556+ } else if (pfn == ent -> pfn ) {
557+ /* Removing head pfn */
558+ ent -> count -- ;
559+ ent -> pfn ++ ;
560+ if (!ent -> count ) {
561+ list_del (& ent -> list );
562+ kfree (ent );
563+ }
564+ goto unlock_done ;
565+ } else if (pfn > ent -> pfn && pfn < ent -> pfn + ent -> count - 1 ) {
566+ /*
567+ * Removing a pfn in the middle. Cut off the tail
568+ * of the existing region and create a template for
569+ * the new one.
570+ */
571+ new_region .pfn = pfn + 1 ;
572+ new_region .count = ent -> count - (pfn - ent -> pfn + 1 );
573+ ent -> count = pfn - ent -> pfn ;
574+ goto unlock_split ;
575+ }
576+
577+ }
578+ unlock_done :
579+ raw_spin_unlock_irqrestore (& hv_list_enc_lock , flags );
580+ continue ;
581+
582+ unlock_split :
583+ raw_spin_unlock_irqrestore (& hv_list_enc_lock , flags );
584+
585+ ent = kzalloc (sizeof (struct hv_enc_pfn_region ), GFP_KERNEL );
586+ if (!ent )
587+ return - ENOMEM ;
588+
589+ ent -> pfn = new_region .pfn ;
590+ ent -> count = new_region .count ;
591+
592+ raw_spin_lock_irqsave (& hv_list_enc_lock , flags );
593+ list_add (& ent -> list , & hv_list_enc );
594+ raw_spin_unlock_irqrestore (& hv_list_enc_lock , flags );
595+ }
596+
597+ return 0 ;
598+ }
599+
600+ /* Stop new private<->shared conversions */
601+ static void hv_vtom_kexec_begin (void )
602+ {
603+ if (!IS_ENABLED (CONFIG_KEXEC_CORE ))
604+ return ;
605+
606+ /*
607+ * Crash kernel reaches here with interrupts disabled: can't wait for
608+ * conversions to finish.
609+ *
610+ * If race happened, just report and proceed.
611+ */
612+ if (!set_memory_enc_stop_conversion ())
613+ pr_warn ("Failed to stop shared<->private conversions\n" );
614+ }
615+
616+ static void hv_vtom_kexec_finish (void )
617+ {
618+ struct hv_gpa_range_for_visibility * input ;
619+ struct hv_enc_pfn_region * ent ;
620+ unsigned long flags ;
621+ u64 hv_status ;
622+ int cur , i ;
623+
624+ local_irq_save (flags );
625+ input = * this_cpu_ptr (hyperv_pcpu_input_arg );
626+
627+ if (unlikely (!input ))
628+ goto out ;
629+
630+ list_for_each_entry (ent , & hv_list_enc , list ) {
631+ for (i = 0 , cur = 0 ; i < ent -> count ; i ++ ) {
632+ input -> gpa_page_list [cur ] = ent -> pfn + i ;
633+ cur ++ ;
634+
635+ if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent -> count - 1 ) {
636+ input -> partition_id = HV_PARTITION_ID_SELF ;
637+ input -> host_visibility = VMBUS_PAGE_NOT_VISIBLE ;
638+ input -> reserved0 = 0 ;
639+ input -> reserved1 = 0 ;
640+ hv_status = hv_do_rep_hypercall (
641+ HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY ,
642+ cur , 0 , input , NULL );
643+ WARN_ON_ONCE (!hv_result_success (hv_status ));
644+ cur = 0 ;
645+ }
646+ }
647+
648+ }
649+
650+ out :
651+ local_irq_restore (flags );
652+ }
653+
465654/*
466655 * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
467656 *
@@ -475,6 +664,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
475664 struct hv_gpa_range_for_visibility * input ;
476665 u64 hv_status ;
477666 unsigned long flags ;
667+ int ret ;
478668
479669 /* no-op if partition isolation is not enabled */
480670 if (!hv_is_isolation_supported ())
@@ -486,6 +676,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
486676 return - EINVAL ;
487677 }
488678
679+ if (visibility == VMBUS_PAGE_NOT_VISIBLE )
680+ ret = hv_list_enc_remove (pfn , count );
681+ else
682+ ret = hv_list_enc_add (pfn , count );
683+ if (ret )
684+ return ret ;
685+
489686 local_irq_save (flags );
490687 input = * this_cpu_ptr (hyperv_pcpu_input_arg );
491688
@@ -506,8 +703,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
506703
507704 if (hv_result_success (hv_status ))
508705 return 0 ;
706+
707+ if (visibility == VMBUS_PAGE_NOT_VISIBLE )
708+ ret = hv_list_enc_add (pfn , count );
509709 else
510- return - EFAULT ;
710+ ret = hv_list_enc_remove (pfn , count );
711+ /*
712+ * There's no good way to recover from -ENOMEM here, the accounting is
713+ * wrong either way.
714+ */
715+ WARN_ON_ONCE (ret );
716+
717+ return - EFAULT ;
511718}
512719
513720/*
@@ -669,6 +876,8 @@ void __init hv_vtom_init(void)
669876 x86_platform .guest .enc_tlb_flush_required = hv_vtom_tlb_flush_required ;
670877 x86_platform .guest .enc_status_change_prepare = hv_vtom_clear_present ;
671878 x86_platform .guest .enc_status_change_finish = hv_vtom_set_host_visibility ;
879+ x86_platform .guest .enc_kexec_begin = hv_vtom_kexec_begin ;
880+ x86_platform .guest .enc_kexec_finish = hv_vtom_kexec_finish ;
672881
673882 /* Set WB as the default cache mode. */
674883 mtrr_overwrite_state (NULL , 0 , MTRR_TYPE_WRBACK );
0 commit comments