Skip to content

Commit e32f525

Browse files
committed
Merge: x86/hyperv: Fix kdump on Azure CVMs
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7349 JIRA: https://issues.redhat.com/browse/RHEL-70228 Fix kdump on Azure CVMs. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Approved-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> Approved-by: Maxim Levitsky <mlevitsk@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Patrick Talbert <ptalbert@redhat.com>
2 parents 9a43839 + 708da51 commit e32f525

File tree

1 file changed

+210
-1
lines changed

1 file changed

+210
-1
lines changed

arch/x86/hyperv/ivm.c

Lines changed: 210 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value)
462462
hv_ghcb_msr_read(msr, value);
463463
}
464464

465+
/*
466+
* Keep track of the PFN regions which were shared with the host. The access
467+
* must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()).
468+
*/
469+
struct hv_enc_pfn_region {
470+
struct list_head list;
471+
u64 pfn;
472+
int count;
473+
};
474+
475+
static LIST_HEAD(hv_list_enc);
476+
static DEFINE_RAW_SPINLOCK(hv_list_enc_lock);
477+
478+
static int hv_list_enc_add(const u64 *pfn_list, int count)
479+
{
480+
struct hv_enc_pfn_region *ent;
481+
unsigned long flags;
482+
u64 pfn;
483+
int i;
484+
485+
for (i = 0; i < count; i++) {
486+
pfn = pfn_list[i];
487+
488+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
489+
/* Check if the PFN already exists in some region first */
490+
list_for_each_entry(ent, &hv_list_enc, list) {
491+
if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn))
492+
/* Nothing to do - pfn is already in the list */
493+
goto unlock_done;
494+
}
495+
496+
/*
497+
* Check if the PFN is adjacent to an existing region. Growing
498+
* a region can make it adjacent to another one but merging is
499+
* not (yet) implemented for simplicity. A PFN cannot be added
500+
* to two regions to keep the logic in hv_list_enc_remove()
501+
* correct.
502+
*/
503+
list_for_each_entry(ent, &hv_list_enc, list) {
504+
if (ent->pfn + ent->count == pfn) {
505+
/* Grow existing region up */
506+
ent->count++;
507+
goto unlock_done;
508+
} else if (pfn + 1 == ent->pfn) {
509+
/* Grow existing region down */
510+
ent->pfn--;
511+
ent->count++;
512+
goto unlock_done;
513+
}
514+
}
515+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
516+
517+
/* No adjacent region found -- create a new one */
518+
ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
519+
if (!ent)
520+
return -ENOMEM;
521+
522+
ent->pfn = pfn;
523+
ent->count = 1;
524+
525+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
526+
list_add(&ent->list, &hv_list_enc);
527+
528+
unlock_done:
529+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
530+
}
531+
532+
return 0;
533+
}
534+
535+
static int hv_list_enc_remove(const u64 *pfn_list, int count)
536+
{
537+
struct hv_enc_pfn_region *ent, *t;
538+
struct hv_enc_pfn_region new_region;
539+
unsigned long flags;
540+
u64 pfn;
541+
int i;
542+
543+
for (i = 0; i < count; i++) {
544+
pfn = pfn_list[i];
545+
546+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
547+
list_for_each_entry_safe(ent, t, &hv_list_enc, list) {
548+
if (pfn == ent->pfn + ent->count - 1) {
549+
/* Removing tail pfn */
550+
ent->count--;
551+
if (!ent->count) {
552+
list_del(&ent->list);
553+
kfree(ent);
554+
}
555+
goto unlock_done;
556+
} else if (pfn == ent->pfn) {
557+
/* Removing head pfn */
558+
ent->count--;
559+
ent->pfn++;
560+
if (!ent->count) {
561+
list_del(&ent->list);
562+
kfree(ent);
563+
}
564+
goto unlock_done;
565+
} else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) {
566+
/*
567+
* Removing a pfn in the middle. Cut off the tail
568+
* of the existing region and create a template for
569+
* the new one.
570+
*/
571+
new_region.pfn = pfn + 1;
572+
new_region.count = ent->count - (pfn - ent->pfn + 1);
573+
ent->count = pfn - ent->pfn;
574+
goto unlock_split;
575+
}
576+
577+
}
578+
unlock_done:
579+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
580+
continue;
581+
582+
unlock_split:
583+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
584+
585+
ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
586+
if (!ent)
587+
return -ENOMEM;
588+
589+
ent->pfn = new_region.pfn;
590+
ent->count = new_region.count;
591+
592+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
593+
list_add(&ent->list, &hv_list_enc);
594+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
595+
}
596+
597+
return 0;
598+
}
599+
600+
/* Stop new private<->shared conversions */
601+
static void hv_vtom_kexec_begin(void)
602+
{
603+
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
604+
return;
605+
606+
/*
607+
* Crash kernel reaches here with interrupts disabled: can't wait for
608+
* conversions to finish.
609+
*
610+
* If race happened, just report and proceed.
611+
*/
612+
if (!set_memory_enc_stop_conversion())
613+
pr_warn("Failed to stop shared<->private conversions\n");
614+
}
615+
616+
static void hv_vtom_kexec_finish(void)
617+
{
618+
struct hv_gpa_range_for_visibility *input;
619+
struct hv_enc_pfn_region *ent;
620+
unsigned long flags;
621+
u64 hv_status;
622+
int cur, i;
623+
624+
local_irq_save(flags);
625+
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
626+
627+
if (unlikely(!input))
628+
goto out;
629+
630+
list_for_each_entry(ent, &hv_list_enc, list) {
631+
for (i = 0, cur = 0; i < ent->count; i++) {
632+
input->gpa_page_list[cur] = ent->pfn + i;
633+
cur++;
634+
635+
if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) {
636+
input->partition_id = HV_PARTITION_ID_SELF;
637+
input->host_visibility = VMBUS_PAGE_NOT_VISIBLE;
638+
input->reserved0 = 0;
639+
input->reserved1 = 0;
640+
hv_status = hv_do_rep_hypercall(
641+
HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY,
642+
cur, 0, input, NULL);
643+
WARN_ON_ONCE(!hv_result_success(hv_status));
644+
cur = 0;
645+
}
646+
}
647+
648+
}
649+
650+
out:
651+
local_irq_restore(flags);
652+
}
653+
465654
/*
466655
* hv_mark_gpa_visibility - Set pages visible to host via hvcall.
467656
*
@@ -475,6 +664,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
475664
struct hv_gpa_range_for_visibility *input;
476665
u64 hv_status;
477666
unsigned long flags;
667+
int ret;
478668

479669
/* no-op if partition isolation is not enabled */
480670
if (!hv_is_isolation_supported())
@@ -486,6 +676,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
486676
return -EINVAL;
487677
}
488678

679+
if (visibility == VMBUS_PAGE_NOT_VISIBLE)
680+
ret = hv_list_enc_remove(pfn, count);
681+
else
682+
ret = hv_list_enc_add(pfn, count);
683+
if (ret)
684+
return ret;
685+
489686
local_irq_save(flags);
490687
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
491688

@@ -506,8 +703,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
506703

507704
if (hv_result_success(hv_status))
508705
return 0;
706+
707+
if (visibility == VMBUS_PAGE_NOT_VISIBLE)
708+
ret = hv_list_enc_add(pfn, count);
509709
else
510-
return -EFAULT;
710+
ret = hv_list_enc_remove(pfn, count);
711+
/*
712+
* There's no good way to recover from -ENOMEM here, the accounting is
713+
* wrong either way.
714+
*/
715+
WARN_ON_ONCE(ret);
716+
717+
return -EFAULT;
511718
}
512719

513720
/*
@@ -669,6 +876,8 @@ void __init hv_vtom_init(void)
669876
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
670877
x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
671878
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
879+
x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin;
880+
x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish;
672881

673882
/* Set WB as the default cache mode. */
674883
mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);

0 commit comments

Comments
 (0)