The package rpms/xen.git has added or updated architecture specific content in its
spec file (ExclusiveArch/ExcludeArch or %ifarch/%ifnarch) in commit(s):
https://src.fedoraproject.org/cgit/rpms/xen.git/commit/?id=8d8e60110e5f98....
Change:
+%ifnarch %{ix86}
Thanks.
Full change:
============
commit 8d8e60110e5f984678323f5724cdf95bcb5406dc
Author: Michael Young <m.a.young(a)durham.ac.uk>
Date: Sun Jan 21 16:35:50 2018 +0000
add comet and xpti mitigations for XSA-254
add or correct build fixes
diff --git a/4.10.0-shim-comet-3.patch b/4.10.0-shim-comet-3.patch
new file mode 100644
index 0000000..61f2645
--- /dev/null
+++ b/4.10.0-shim-comet-3.patch
@@ -0,0 +1,10861 @@
+From ab7be6ce4ac8cc3f32952d8c9c260412e780e939 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 20 Dec 2017 15:40:58 +0100
+Subject: [PATCH 02/77] xen/pv: Construct d0v0's GDT properly
+
+c/s cf6d39f8199 "x86/PV: properly populate descriptor tables" changed the GDT
+to reference zero_page for intermediate frames between the guest and Xen
+frames.
+
+Because dom0_construct_pv() doesn't call arch_set_info_guest(), some bits of
+initialisation are missed, including the pv_destroy_gdt() which initially
+fills the references to zero_page.
+
+In practice, this means there is a window between starting and the first call
+to HYPERCALL_set_gdt() were lar/lsl/verr/verw suffer non-architectural
+behaviour.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: 08f27f4468eedbeccaac9fdda4ef732247efd74e
+master date: 2017-12-01 19:03:26 +0000
+---
+ xen/arch/x86/pv/dom0_build.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
+index 44601d08d3..a13412efb9 100644
+--- a/xen/arch/x86/pv/dom0_build.c
++++ b/xen/arch/x86/pv/dom0_build.c
+@@ -18,6 +18,7 @@
+ #include <asm/bzimage.h>
+ #include <asm/dom0_build.h>
+ #include <asm/page.h>
++#include <asm/pv/mm.h>
+ #include <asm/setup.h>
+
+ /* Allow ring-3 access in long mode as guest cannot use ring 1 ... */
+@@ -866,6 +867,13 @@ int __init dom0_construct_pv(struct domain *d,
+ regs->rsi = vstartinfo_start;
+ regs->eflags = X86_EFLAGS_IF;
+
++ /*
++ * We don't call arch_set_info_guest(), so some initialisation needs doing
++ * by hand:
++ * - Reset the GDT to reference zero_page
++ */
++ pv_destroy_gdt(v);
++
+ if ( test_bit(XENFEAT_supervisor_mode_kernel, parms.f_required) )
+ panic("Dom0 requires supervisor-mode execution");
+
+--
+2.14.3
+
+
+From 4150501b717e7fde77c9ab4e96dd9916d7345b55 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Date: Wed, 20 Dec 2017 15:41:33 +0100
+Subject: [PATCH 03/77] x86/vvmx: don't enable vmcs shadowing for nested guests
+
+Running "./xtf_runner vvmx" in L1 Xen under L0 Xen produces the
+following result on H/W with VMCS shadowing:
+
+ Test: vmxon
+ Failure in test_vmxon_in_root_cpl0()
+ Expected 0x8200000f: VMfailValid(15) VMXON_IN_ROOT
+ Got 0x82004400: VMfailValid(17408) <unknown>
+ Test result: FAILURE
+
+This happens because SDM allows vmentries with enabled VMCS shadowing
+VM-execution control and VMCS link pointer value of ~0ull. But results
+of a nested VMREAD are undefined in such cases.
+
+Fix this by not copying the value of VMCS shadowing control from vmcs01
+to vmcs02.
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Acked-by: Kevin Tian <kevin.tian(a)intel.com>
+master commit: 19fdb8e258619aea265af9c183e035e545cbc2d2
+master date: 2017-12-01 19:03:27 +0000
+---
+ xen/arch/x86/hvm/vmx/vvmx.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
+index dde02c076b..013d049f8a 100644
+--- a/xen/arch/x86/hvm/vmx/vvmx.c
++++ b/xen/arch/x86/hvm/vmx/vvmx.c
+@@ -633,6 +633,7 @@ void nvmx_update_secondary_exec_control(struct vcpu *v,
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+
+ host_cntrl &= ~apicv_bit;
++ host_cntrl &= ~SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
+ shadow_cntrl = get_vvmcs(v, SECONDARY_VM_EXEC_CONTROL);
+
+ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
+--
+2.14.3
+
+
+From c8f4f45e04dd782ac5dfdf58866339ac97186324 Mon Sep 17 00:00:00 2001
+From: Daniel Kiper <daniel.kiper(a)oracle.com>
+Date: Wed, 20 Dec 2017 15:42:13 +0100
+Subject: [PATCH 04/77] x86/mb2: avoid Xen image when looking for
+ module/crashkernel position
+
+Commit e22e1c4 (x86/EFI: avoid Xen image when looking for module/kexec
+position) added relevant check for EFI case. However, since commit
+f75a304 (x86: add multiboot2 protocol support for relocatable images)
+Multiboot2 compatible bootloaders are able to relocate Xen image too.
+So, we have to avoid also Xen image region in such cases.
+
+Reported-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reported-by: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com>
+Signed-off-by: Daniel Kiper <daniel.kiper(a)oracle.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: 9589927e5bf9e123ec42b6e0b0809f153bd92732
+master date: 2017-12-12 14:30:53 +0100
+---
+ xen/arch/x86/setup.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index 32bb02e3a5..2e10c6bdf4 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -653,7 +653,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ module_t *mod = (module_t *)__va(mbi->mods_addr);
+ unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
+ int i, j, e820_warn = 0, bytes = 0;
+- bool acpi_boot_table_init_done = false;
++ bool acpi_boot_table_init_done = false, relocated = false;
+ struct domain *dom0;
+ struct ns16550_defaults ns16550 = {
+ .data_bits = 8,
+@@ -904,8 +904,10 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ mod[i].reserved = 0;
+ }
+
+- if ( efi_enabled(EFI_LOADER) )
++ if ( xen_phys_start )
+ {
++ relocated = true;
++
+ /*
+ * This needs to remain in sync with xen_in_range() and the
+ * respective reserve_e820_ram() invocation below.
+@@ -1098,8 +1100,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+
+ /* Don't overlap with other modules (or Xen itself). */
+ end = consider_modules(s, e, size, mod,
+- mbi->mods_count + efi_enabled(EFI_LOADER),
+- j);
++ mbi->mods_count + relocated, j);
+
+ if ( highmem_start && end > highmem_start )
+ continue;
+@@ -1126,7 +1127,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ {
+ /* Don't overlap with modules (or Xen itself). */
+ e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size), mod,
+- mbi->mods_count + efi_enabled(EFI_LOADER), -1);
++ mbi->mods_count + relocated, -1);
+ if ( s >= e )
+ break;
+ if ( e > kexec_crash_area_limit )
+--
+2.14.3
+
+
+From e2dc7b584f4c7ab6ad7ab543e5cf7ee2e6d1d569 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich(a)suse.com>
+Date: Wed, 20 Dec 2017 15:42:42 +0100
+Subject: [PATCH 05/77] x86/mm: drop bogus paging mode assertion
+
+Olaf has observed this assertion to trigger after an aborted migration
+of a PV guest:
+
+(XEN) Xen call trace:
+(XEN) [<ffff82d0802a85dc>] do_page_fault+0x39f/0x55c
+(XEN) [<ffff82d08036b7d8>] x86_64/entry.S#handle_exception_saved+0x66/0xa4
+(XEN) [<ffff82d0802a9274>] __copy_to_user_ll+0x22/0x30
+(XEN) [<ffff82d0802772d4>] update_runstate_area+0x19c/0x228
+(XEN) [<ffff82d080277371>] domain.c#_update_runstate_area+0x11/0x39
+(XEN) [<ffff82d080277596>] context_switch+0x1fd/0xf25
+(XEN) [<ffff82d0802395c5>] schedule.c#schedule+0x303/0x6a8
+(XEN) [<ffff82d08023d067>] softirq.c#__do_softirq+0x6c/0x95
+(XEN) [<ffff82d08023d0da>] do_softirq+0x13/0x15
+(XEN) [<ffff82d08036b2f1>] x86_64/entry.S#process_softirqs+0x21/0x30
+
+Release builds work fine, which is a first indication that the assertion
+isn't really needed.
+
+What's worse though - there appears to be a timing window where the
+guest runs in shadow mode, but not in log-dirty mode, and that is what
+triggers the assertion (the same could, afaict, be achieved by test-
+enabling shadow mode on a PV guest). This is because turing off log-
+dirty mode is being performed in two steps: First the log-dirty bit gets
+cleared (paging_log_dirty_disable() [having paused the domain] ->
+sh_disable_log_dirty() -> shadow_one_bit_disable()), followed by
+unpausing the domain and only then clearing shadow mode (via
+shadow_test_disable(), which pauses the domain a second time).
+
+Hence besides removing the ASSERT() here (or optionally replacing it by
+explicit translate and refcounts mode checks, but this seems rather
+pointless now that the three are tied together) I wonder whether either
+shadow_one_bit_disable() should turn off shadow mode if no other bit
+besides PG_SH_enable remains set (just like shadow_one_bit_enable()
+enables it if not already set), or the domain pausing scope should be
+extended so that both steps occur without the domain getting a chance to
+run in between.
+
+Reported-by: Olaf Hering <olaf(a)aepfle.de>
+Signed-off-by: Jan Beulich <jbeulich(a)suse.com>
+Reviewed-by: Tim Deegan <tim(a)xen.org>
+Acked-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+master commit: b95f7be32d668fa4b09300892ebe19636ecebe36
+master date: 2017-12-12 16:56:15 +0100
+---
+ xen/arch/x86/traps.c | 6 +-----
+ xen/include/asm-x86/paging.h | 3 ---
+ 2 files changed, 1 insertion(+), 8 deletions(-)
+
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index 86506f3747..642f3cc6d7 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -1338,12 +1338,8 @@ static int fixup_page_fault(unsigned long addr, struct
cpu_user_regs *regs)
+ */
+ if ( paging_mode_enabled(d) && !paging_mode_external(d) )
+ {
+- int ret;
++ int ret = paging_fault(addr, regs);
+
+- /* Logdirty mode is the only expected paging mode for PV guests. */
+- ASSERT(paging_mode_only_log_dirty(d));
+-
+- ret = paging_fault(addr, regs);
+ if ( ret == EXCRET_fault_fixed )
+ trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->rip, addr);
+ return ret;
+diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
+index d99ddedec0..5607ab4b1f 100644
+--- a/xen/include/asm-x86/paging.h
++++ b/xen/include/asm-x86/paging.h
+@@ -69,9 +69,6 @@
+ #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
+ #define paging_mode_external(_d) (!!((_d)->arch.paging.mode & PG_external))
+
+-#define paging_mode_only_log_dirty(_d) \
+- (((_d)->arch.paging.mode & PG_MASK) == PG_log_dirty)
+-
+ /* flags used for paging debug */
+ #define PAGING_DEBUG_LOGDIRTY 0
+
+--
+2.14.3
+
+
+From e5364c32c650fef60b91b9be9b10f38055ffc2cf Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky(a)amd.com>
+Date: Wed, 20 Dec 2017 15:43:14 +0100
+Subject: [PATCH 06/77] x86/microcode: Add support for fam17h microcode loading
+
+The size for the Microcode Patch Block (MPB) for an AMD family 17h
+processor is 3200 bytes. Add a #define for fam17h so that it does
+not default to 2048 bytes and fail a microcode load/update.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky(a)amd.com>
+Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
+Reviewed-by: Borislav Petkov <bp(a)alien8.de>
+Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
+[Linux commit f4e9b7af0cd58dd039a0fb2cd67d57cea4889abf]
+
+Ported to Xen.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Acked-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: 61d458ba8c171809e8dd9abd19339c87f3f934ca
+master date: 2017-12-13 14:30:10 +0000
+---
+ xen/arch/x86/microcode_amd.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/xen/arch/x86/microcode_amd.c b/xen/arch/x86/microcode_amd.c
+index b54b0b99e4..53f9f548cd 100644
+--- a/xen/arch/x86/microcode_amd.c
++++ b/xen/arch/x86/microcode_amd.c
+@@ -107,6 +107,7 @@ static bool_t verify_patch_size(uint32_t patch_size)
+ #define F14H_MPB_MAX_SIZE 1824
+ #define F15H_MPB_MAX_SIZE 4096
+ #define F16H_MPB_MAX_SIZE 3458
++#define F17H_MPB_MAX_SIZE 3200
+
+ switch (boot_cpu_data.x86)
+ {
+@@ -119,6 +120,9 @@ static bool_t verify_patch_size(uint32_t patch_size)
+ case 0x16:
+ max_size = F16H_MPB_MAX_SIZE;
+ break;
++ case 0x17:
++ max_size = F17H_MPB_MAX_SIZE;
++ break;
+ default:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+--
+2.14.3
+
+
+From 19dcd8e47dfc81b8e9f867ee79c7ff8e15b975fb Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich(a)suse.com>
+Date: Wed, 20 Dec 2017 15:43:53 +0100
+Subject: [PATCH 07/77] gnttab: correct GNTTABOP_cache_flush empty batch
+ handling
+
+Jann validly points out that with a caller bogusly requesting a zero-
+element batch with non-zero high command bits (the ones used for
+continuation encoding), the assertion right before the call to
+hypercall_create_continuation() would trigger. A similar situation would
+arise afaict for non-empty batches with op and/or length zero in every
+element.
+
+While we want the former to succeed (as we do elsewhere for similar
+no-op requests), the latter can clearly be converted to an error, as
+this is a state that can't be the result of a prior operation.
+
+Take the opportunity and also correct the order of argument checks:
+We shouldn't accept zero-length elements with unknown bits set in "op".
+Also constify cache_flush()'s first parameter.
+
+Reported-by: Jann Horn <jannh(a)google.com>
+Signed-off-by: Jan Beulich <jbeulich(a)suse.com>
+Reviewed-by: Andre Przywara <andre.przywara(a)linaro.org>
+Acked-by: Stefano Stabellini <sstabellini(a)kernel.org>
+master commit: 9c22e4d67f5552c7c896ed83bd95d5d4c5837a9d
+master date: 2017-12-04 11:03:32 +0100
+---
+ xen/common/grant_table.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index c5950f2b3f..bce224be6e 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -3208,7 +3208,7 @@
gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop,
+ return 0;
+ }
+
+-static int cache_flush(gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
++static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
+ {
+ struct domain *d, *owner;
+ struct page_info *page;
+@@ -3218,19 +3218,17 @@ static int cache_flush(gnttab_cache_flush_t *cflush, grant_ref_t
*cur_ref)
+
+ if ( (cflush->offset >= PAGE_SIZE) ||
+ (cflush->length > PAGE_SIZE) ||
+- (cflush->offset + cflush->length > PAGE_SIZE) )
++ (cflush->offset + cflush->length > PAGE_SIZE) ||
++ (cflush->op & ~(GNTTAB_CACHE_INVAL | GNTTAB_CACHE_CLEAN)) )
+ return -EINVAL;
+
+ if ( cflush->length == 0 || cflush->op == 0 )
+- return 0;
++ return !*cur_ref ? 0 : -EILSEQ;
+
+ /* currently unimplemented */
+ if ( cflush->op & GNTTAB_CACHE_SOURCE_GREF )
+ return -EOPNOTSUPP;
+
+- if ( cflush->op & ~(GNTTAB_CACHE_INVAL|GNTTAB_CACHE_CLEAN) )
+- return -EINVAL;
+-
+ d = rcu_lock_current_domain();
+ mfn = cflush->a.dev_bus_addr >> PAGE_SHIFT;
+
+@@ -3310,6 +3308,9 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t)
uop,
+ *cur_ref = 0;
+ guest_handle_add_offset(uop, 1);
+ }
++
++ *cur_ref = 0;
++
+ return 0;
+ }
+
+--
+2.14.3
+
+
+From 682a9d8d37f1141b199bc3aadf8d5d276b22baf9 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich(a)suse.com>
+Date: Wed, 20 Dec 2017 15:44:20 +0100
+Subject: [PATCH 08/77] gnttab: improve GNTTABOP_cache_flush locking
+
+Dropping the lock before returning from grant_map_exists() means handing
+possibly stale information back to the caller. Return back the pointer
+to the active entry instead, for the caller to release the lock once
+done.
+
+Signed-off-by: Jan Beulich <jbeulich(a)suse.com>
+Reviewed-by: Andre Przywara <andre.przywara(a)linaro.org>
+Reviewed-by: Stefano Stabellini <sstabellini(a)kernel.org>
+master commit: 553ac37137c2d1c03bf1b69cfb192ffbfe29daa4
+master date: 2017-12-04 11:04:18 +0100
+---
+ xen/common/grant_table.c | 37 +++++++++++++++++--------------------
+ 1 file changed, 17 insertions(+), 20 deletions(-)
+
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index bce224be6e..250450bdda 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -786,10 +786,10 @@ static int _set_status(unsigned gt_version,
+ return _set_status_v2(domid, readonly, mapflag, shah, act, status);
+ }
+
+-static int grant_map_exists(const struct domain *ld,
+- struct grant_table *rgt,
+- unsigned long mfn,
+- grant_ref_t *cur_ref)
++static struct active_grant_entry *grant_map_exists(const struct domain *ld,
++ struct grant_table *rgt,
++ unsigned long mfn,
++ grant_ref_t *cur_ref)
+ {
+ grant_ref_t ref, max_iter;
+
+@@ -805,28 +805,20 @@ static int grant_map_exists(const struct domain *ld,
+ nr_grant_entries(rgt));
+ for ( ref = *cur_ref; ref < max_iter; ref++ )
+ {
+- struct active_grant_entry *act;
+- bool_t exists;
+-
+- act = active_entry_acquire(rgt, ref);
+-
+- exists = act->pin
+- && act->domid == ld->domain_id
+- && act->frame == mfn;
++ struct active_grant_entry *act = active_entry_acquire(rgt, ref);
+
++ if ( act->pin && act->domid == ld->domain_id &&
act->frame == mfn )
++ return act;
+ active_entry_release(act);
+-
+- if ( exists )
+- return 0;
+ }
+
+ if ( ref < nr_grant_entries(rgt) )
+ {
+ *cur_ref = ref;
+- return 1;
++ return NULL;
+ }
+
+- return -EINVAL;
++ return ERR_PTR(-EINVAL);
+ }
+
+ #define MAPKIND_READ 1
+@@ -3213,6 +3205,7 @@ static int cache_flush(const gnttab_cache_flush_t *cflush,
grant_ref_t *cur_ref)
+ struct domain *d, *owner;
+ struct page_info *page;
+ unsigned long mfn;
++ struct active_grant_entry *act = NULL;
+ void *v;
+ int ret;
+
+@@ -3250,13 +3243,13 @@ static int cache_flush(const gnttab_cache_flush_t *cflush,
grant_ref_t *cur_ref)
+ {
+ grant_read_lock(owner->grant_table);
+
+- ret = grant_map_exists(d, owner->grant_table, mfn, cur_ref);
+- if ( ret != 0 )
++ act = grant_map_exists(d, owner->grant_table, mfn, cur_ref);
++ if ( IS_ERR_OR_NULL(act) )
+ {
+ grant_read_unlock(owner->grant_table);
+ rcu_unlock_domain(d);
+ put_page(page);
+- return ret;
++ return act ? PTR_ERR(act) : 1;
+ }
+ }
+
+@@ -3273,7 +3266,11 @@ static int cache_flush(const gnttab_cache_flush_t *cflush,
grant_ref_t *cur_ref)
+ ret = 0;
+
+ if ( d != owner )
++ {
++ active_entry_release(act);
+ grant_read_unlock(owner->grant_table);
++ }
++
+ unmap_domain_page(v);
+ put_page(page);
+
+--
+2.14.3
+
+
+From 135b67e9bd5281084efe9fb1d3604915dac07ce8 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 20 Dec 2017 15:44:57 +0100
+Subject: [PATCH 09/77] xen/efi: Fix build with clang-5.0
+
+The clang-5.0 build is reliably failing with:
+
+ Error: size of boot.o:.text is 0x01
+
+which is because efi_arch_flush_dcache_area() exists as a single ret
+instruction. Mark it as __init like everything else in the files.
+
+Spotted by Travis.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Stefano Stabellini <sstabellini(a)kernel.org>
+Acked-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: c4f6ad4c5fd25cb0ccc0cdbe711db97e097f0407
+master date: 2017-12-14 10:59:26 +0000
+---
+ xen/arch/arm/efi/efi-boot.h | 2 +-
+ xen/arch/x86/efi/efi-boot.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/arm/efi/efi-boot.h b/xen/arch/arm/efi/efi-boot.h
+index 56de26e918..ca655ff003 100644
+--- a/xen/arch/arm/efi/efi-boot.h
++++ b/xen/arch/arm/efi/efi-boot.h
+@@ -597,7 +597,7 @@ static void __init efi_arch_video_init(EFI_GRAPHICS_OUTPUT_PROTOCOL
*gop,
+ {
+ }
+
+-static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size)
++static void __init efi_arch_flush_dcache_area(const void *vaddr, UINTN size)
+ {
+ __flush_dcache_area(vaddr, size);
+ }
+diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
+index 8d295ff9af..d30f688a5a 100644
+--- a/xen/arch/x86/efi/efi-boot.h
++++ b/xen/arch/x86/efi/efi-boot.h
+@@ -668,7 +668,7 @@ static bool __init efi_arch_use_config_file(EFI_SYSTEM_TABLE
*SystemTable)
+ return true; /* x86 always uses a config file */
+ }
+
+-static void efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
++static void __init efi_arch_flush_dcache_area(const void *vaddr, UINTN size) { }
+
+ void __init efi_multiboot2(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
+ {
+--
+2.14.3
+
+
+From 9dc5eda576bafca47abc7202f075f28d6250bf4d Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 20 Dec 2017 15:45:32 +0100
+Subject: [PATCH 10/77] x86/vmx: Don't use hvm_inject_hw_exception() in
+ long_mode_do_msr_write()
+
+Since c/s 49de10f3c1718 "x86/hvm: Don't raise #GP behind the emulators back
+for MSR accesses", returning X86EMUL_EXCEPTION has pushed the exception
+generation to the top of the call tree.
+
+Using hvm_inject_hw_exception() and returning X86EMUL_EXCEPTION causes a
+double #GP injection, which combines to #DF.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Acked-by: Kevin Tian <kevin.tian(a)intel.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: 896ee3980e72866b602e743396751384de301fb0
+master date: 2017-12-14 18:05:45 +0000
+---
+ xen/arch/x86/hvm/vmx/vmx.c | 11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index b18cceab55..73254bf5d4 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -542,7 +542,7 @@ long_mode_do_msr_write(unsigned int msr, uint64_t msr_content)
+ case MSR_GS_BASE:
+ case MSR_SHADOW_GS_BASE:
+ if ( !is_canonical_address(msr_content) )
+- goto uncanonical_address;
++ return HNDL_exception_raised;
+
+ if ( msr == MSR_FS_BASE )
+ __vmwrite(GUEST_FS_BASE, msr_content);
+@@ -560,14 +560,14 @@ long_mode_do_msr_write(unsigned int msr, uint64_t msr_content)
+
+ case MSR_LSTAR:
+ if ( !is_canonical_address(msr_content) )
+- goto uncanonical_address;
++ return HNDL_exception_raised;
+ v->arch.hvm_vmx.lstar = msr_content;
+ wrmsrl(MSR_LSTAR, msr_content);
+ break;
+
+ case MSR_CSTAR:
+ if ( !is_canonical_address(msr_content) )
+- goto uncanonical_address;
++ return HNDL_exception_raised;
+ v->arch.hvm_vmx.cstar = msr_content;
+ break;
+
+@@ -581,11 +581,6 @@ long_mode_do_msr_write(unsigned int msr, uint64_t msr_content)
+ }
+
+ return HNDL_done;
+-
+- uncanonical_address:
+- HVM_DBG_LOG(DBG_LEVEL_MSR, "Not cano address of msr write %x", msr);
+- hvm_inject_hw_exception(TRAP_gp_fault, 0);
+- return HNDL_exception_raised;
+ }
+
+ /*
+--
+2.14.3
+
+
+From a87ec4833af47cdd166294f3f4db21231930d65d Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 4 Jan 2018 14:32:01 +0100
+Subject: [PATCH 11/77] x86/msr: Free msr_vcpu_policy during vcpu destruction
+
+c/s 4187f79dc7 "x86/msr: introduce struct msr_vcpu_policy" introduced a
+per-vcpu memory allocation, but failed to free it in the clean vcpu
+destruction case.
+
+This is XSA-253.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: e204e60f77702bf5c884dd37c3f1b01f14e396ae
+master date: 2018-01-04 14:27:38 +0100
+---
+ xen/arch/x86/domain.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 735f45c133..b44c95b493 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -382,6 +382,9 @@ void vcpu_destroy(struct vcpu *v)
+
+ vcpu_destroy_fpu(v);
+
++ xfree(v->arch.msr);
++ v->arch.msr = NULL;
++
+ if ( !is_idle_domain(v->domain) )
+ vpmu_destroy(v);
+
+--
+2.14.3
+
+
+From 69e302e59cfd281449eafb6193476a11a1c286df Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:51:14 +0000
+Subject: [PATCH 12/77] x86/upcall: inject a spurious event after setting
+ upcall vector
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In case the vCPU has pending events to inject. This fixes a bug that
+happened if the guest mapped the vcpu info area using
+VCPUOP_register_vcpu_info without having setup the event channel
+upcall, and then setup the upcall vector.
+
+In this scenario the guest would not receive any upcalls, because the
+call to VCPUOP_register_vcpu_info would have marked the vCPU as having
+pending events, but the vector could not be injected because it was
+not yet setup.
+
+This has not caused issues so far because all the consumers first
+setup the vector callback and then map the vcpu info page, but there's
+no limitation that prevents doing it in the inverse order.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+---
+ xen/arch/x86/hvm/hvm.c | 1 +
+ xen/arch/x86/hvm/irq.c | 5 +++++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index 28bc7e4252..9f7b096072 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -4069,6 +4069,7 @@ static int hvmop_set_evtchn_upcall_vector(
+ printk(XENLOG_G_INFO "%pv: upcall vector %02x\n", v, op.vector);
+
+ v->arch.hvm_vcpu.evtchn_upcall_vector = op.vector;
++ hvm_assert_evtchn_irq(v);
+ return 0;
+ }
+
+diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
+index 0077f68a83..f528e2d081 100644
+--- a/xen/arch/x86/hvm/irq.c
++++ b/xen/arch/x86/hvm/irq.c
+@@ -385,6 +385,7 @@ void hvm_set_callback_via(struct domain *d, uint64_t via)
+ struct hvm_irq *hvm_irq = hvm_domain_irq(d);
+ unsigned int gsi=0, pdev=0, pintx=0;
+ uint8_t via_type;
++ struct vcpu *v;
+
+ via_type = (uint8_t)MASK_EXTR(via, HVM_PARAM_CALLBACK_IRQ_TYPE_MASK) + 1;
+ if ( ((via_type == HVMIRQ_callback_gsi) && (via == 0)) ||
+@@ -447,6 +448,10 @@ void hvm_set_callback_via(struct domain *d, uint64_t via)
+
+ spin_unlock(&d->arch.hvm_domain.irq_lock);
+
++ for_each_vcpu ( d, v )
++ if ( is_vcpu_online(v) )
++ hvm_assert_evtchn_irq(v);
++
+ #ifndef NDEBUG
+ printk(XENLOG_G_INFO "Dom%u callback via changed to ", d->domain_id);
+ switch ( via_type )
+--
+2.14.3
+
+
+From caff7f9b59455f1942c96ea7f631e6b0cd9b8e52 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:57 +0000
+Subject: [PATCH 13/77] x86/svm: Offer CPUID Faulting to AMD HVM guests as well
+
+CPUID Faulting can be virtulised for HVM guests without hardware support,
+meaning it can be offered to SVM guests.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ xen/arch/x86/hvm/svm/svm.c | 6 ++++++
+ xen/arch/x86/msr.c | 3 ++-
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index b9cf423fd9..8864d82c11 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -1784,6 +1784,12 @@ static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
+ if ( (inst_len = __get_instruction_length(curr, INSTR_CPUID)) == 0 )
+ return;
+
++ if ( hvm_check_cpuid_faulting(curr) )
++ {
++ hvm_inject_hw_exception(TRAP_gp_fault, 0);
++ return;
++ }
++
+ guest_cpuid(curr, regs->eax, regs->ecx, &res);
+ HVMTRACE_5D(CPUID, regs->eax, res.a, res.b, res.c, res.d);
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 31983edc54..187f8623a5 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -39,7 +39,8 @@ static void __init calculate_hvm_max_policy(void)
+ return;
+
+ /* 0x000000ce MSR_INTEL_PLATFORM_INFO */
+- if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
++ boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+ {
+ dp->plaform_info.available = true;
+ dp->plaform_info.cpuid_faulting = true;
+--
+2.14.3
+
+
+From 5840f40e88fbdcdcf748d0e581dad587ffdde0a1 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:58 +0000
+Subject: [PATCH 14/77] xen/x86: report domain id on cpuid
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Use the ECX register of the hypervisor leaf 5. The EAX register on
+this leaf is a flags field that can be used to notice the presence of
+the domain id in ECX. Note that this is only available to HVM guests.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+Changes since v1:
+ - Use leaf 5 instead.
+---
+ xen/arch/x86/traps.c | 5 +++++
+ xen/include/public/arch-x86/cpuid.h | 2 ++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index 642f3cc6d7..348866b8b5 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -928,6 +928,11 @@ void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
+ /* Indicate presence of vcpu id and set it in ebx */
+ res->a |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
+ res->b = v->vcpu_id;
++
++ /* Indicate presence of domain id and set it in ecx */
++ res->a |= XEN_HVM_CPUID_DOMID_PRESENT;
++ res->c = d->domain_id;
++
+ break;
+
+ case 5: /* PV-specific parameters */
+diff --git a/xen/include/public/arch-x86/cpuid.h b/xen/include/public/arch-x86/cpuid.h
+index eb76875d0e..665c4b644d 100644
+--- a/xen/include/public/arch-x86/cpuid.h
++++ b/xen/include/public/arch-x86/cpuid.h
+@@ -94,12 +94,14 @@
+ * HVM-specific features
+ * Sub-leaf 0: EAX: Features
+ * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
++ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
+ */
+ #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
+ #define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses
*/
+ /* Memory mapped from other domains has valid IOMMU entries */
+ #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
+ #define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */
++#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */
+
+ /*
+ * Leaf 6 (0x40000x05)
+--
+2.14.3
+
+
+From 40938b5d5696ccdec67b15fb3a49e8a9f1ab1998 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:58 +0000
+Subject: [PATCH 15/77] tools/libxc: remove extraneous newline in
+ xc_dom_load_acpi
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ tools/libxc/xc_dom_core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
+index b5f316a1dc..303cb971e8 100644
+--- a/tools/libxc/xc_dom_core.c
++++ b/tools/libxc/xc_dom_core.c
+@@ -1078,7 +1078,7 @@ static int xc_dom_load_acpi(struct xc_dom_image *dom)
+
+ while ( (i < MAX_ACPI_MODULES) && dom->acpi_modules[i].length )
+ {
+- DOMPRINTF("%s: %d bytes at address %" PRIx64 "\n",
__FUNCTION__,
++ DOMPRINTF("%s: %d bytes at address %" PRIx64, __FUNCTION__,
+ dom->acpi_modules[i].length,
+ dom->acpi_modules[i].guest_addr_out);
+
+--
+2.14.3
+
+
+From 4621c10f489de827742f95c31ac0f43fc3bcde88 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:58 +0000
+Subject: [PATCH 16/77] tools/libelf: fix elf notes check for PVH guest
+
+PVH only requires PHYS32_ENTRY to be set. Return immediately if that's
+the case.
+
+Also remove the printk in pvh_load_kernel.
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ xen/arch/x86/hvm/dom0_build.c | 4 ----
+ xen/common/libelf/libelf-dominfo.c | 9 ++++++++-
+ 2 files changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
+index a67071c739..303ae4e7b5 100644
+--- a/xen/arch/x86/hvm/dom0_build.c
++++ b/xen/arch/x86/hvm/dom0_build.c
+@@ -484,10 +484,6 @@ static int __init pvh_load_kernel(struct domain *d, const module_t
*image,
+ return -EINVAL;
+ }
+
+- printk("OS: %s version: %s loader: %s bitness: %s\n", parms.guest_os,
+- parms.guest_ver, parms.loader,
+- elf_64bit(&elf) ? "64-bit" : "32-bit");
+-
+ /* Copy the OS image and free temporary buffer. */
+ elf.dest_base = (void *)(parms.virt_kstart - parms.virt_base);
+ elf.dest_size = parms.virt_kend - parms.virt_kstart;
+diff --git a/xen/common/libelf/libelf-dominfo.c b/xen/common/libelf/libelf-dominfo.c
+index a52900c00c..378bc05f39 100644
+--- a/xen/common/libelf/libelf-dominfo.c
++++ b/xen/common/libelf/libelf-dominfo.c
+@@ -373,6 +373,13 @@ static elf_errorstatus elf_xen_note_check(struct elf_binary *elf,
+ return 0;
+ }
+
++ /* PVH only requires one ELF note to be set */
++ if ( parms->phys_entry != UNSET_ADDR32 )
++ {
++ elf_msg(elf, "ELF: Found PVH image\n");
++ return 0;
++ }
++
+ /* Check the contents of the Xen notes or guest string. */
+ if ( ((strlen(parms->loader) == 0) ||
+ strncmp(parms->loader, "generic", 7)) &&
+@@ -381,7 +388,7 @@ static elf_errorstatus elf_xen_note_check(struct elf_binary *elf,
+ {
+ elf_err(elf,
+ "ERROR: Will only load images built for the generic loader or Linux
images"
+- " (Not '%.*s' and '%.*s')\n",
++ " (Not '%.*s' and '%.*s') or with PHYS32_ENTRY
set\n",
+ (int)sizeof(parms->loader), parms->loader,
+ (int)sizeof(parms->guest_os), parms->guest_os);
+ return -1;
+--
+2.14.3
+
+
+From 667275050d83fdca61303b09d9c2448f0badf5a9 Mon Sep 17 00:00:00 2001
+From: Jonathan Ludlam <jonathan.ludlam(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:58 +0000
+Subject: [PATCH 17/77] tools/libxc: Multi modules support
+
+Signed-off-by: Jonathan Ludlam <jonathan.ludlam(a)citrix.com>
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ stubdom/grub/kexec.c | 7 +-
+ tools/helpers/init-xenstore-domain.c | 4 +-
+ tools/libxc/include/xc_dom.h | 48 ++++++-----
+ tools/libxc/xc_dom_compat_linux.c | 2 +-
+ tools/libxc/xc_dom_core.c | 152 +++++++++++++++++++++++------------
+ tools/libxc/xc_dom_x86.c | 65 ++++++++-------
+ tools/libxl/libxl_dom.c | 10 +--
+ 7 files changed, 175 insertions(+), 113 deletions(-)
+
+diff --git a/stubdom/grub/kexec.c b/stubdom/grub/kexec.c
+index 437a0a96e9..61ca082d42 100644
+--- a/stubdom/grub/kexec.c
++++ b/stubdom/grub/kexec.c
+@@ -202,7 +202,7 @@ static void tpm_hash2pcr(struct xc_dom_image *dom, char *cmdline)
+ ASSERT(rv == 0 && resp->status == 0);
+
+ cmd.pcr = bswap_32(5); // PCR #5 for initrd
+- sha1(dom->ramdisk_blob, dom->ramdisk_size, cmd.hash);
++ sha1(dom->modules[0].blob, dom->modules[0].size, cmd.hash);
+ rv = tpmfront_cmd(tpm, (void*)&cmd, sizeof(cmd), (void*)&resp, &resplen);
+ ASSERT(rv == 0 && resp->status == 0);
+
+@@ -231,13 +231,12 @@ void kexec(void *kernel, long kernel_size, void *module, long
module_size, char
+
+ /* We are using guest owned memory, therefore no limits. */
+ xc_dom_kernel_max_size(dom, 0);
+- xc_dom_ramdisk_max_size(dom, 0);
++ xc_dom_module_max_size(dom, 0);
+
+ dom->kernel_blob = kernel;
+ dom->kernel_size = kernel_size;
+
+- dom->ramdisk_blob = module;
+- dom->ramdisk_size = module_size;
++ xc_dom_module_mem(dom, module, module_size, NULL);
+
+ dom->flags = flags;
+ dom->console_evtchn = start_info.console.domU.evtchn;
+diff --git a/tools/helpers/init-xenstore-domain.c b/tools/helpers/init-xenstore-domain.c
+index 047ad0cb1d..8453be283b 100644
+--- a/tools/helpers/init-xenstore-domain.c
++++ b/tools/helpers/init-xenstore-domain.c
+@@ -145,10 +145,10 @@ static int build(xc_interface *xch)
+
+ if ( ramdisk )
+ {
+- rv = xc_dom_ramdisk_file(dom, ramdisk);
++ rv = xc_dom_module_file(dom, ramdisk, NULL);
+ if ( rv )
+ {
+- fprintf(stderr, "xc_dom_ramdisk_file failed\n");
++ fprintf(stderr, "xc_dom_module_file failed\n");
+ goto err;
+ }
+ }
+diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
+index cdcdd07d2b..08be8a8f3f 100644
+--- a/tools/libxc/include/xc_dom.h
++++ b/tools/libxc/include/xc_dom.h
+@@ -22,6 +22,7 @@
+ #define INVALID_PFN ((xen_pfn_t)-1)
+ #define X86_HVM_NR_SPECIAL_PAGES 8
+ #define X86_HVM_END_SPECIAL_REGION 0xff000u
++#define XG_MAX_MODULES 2
+
+ /* --- typedefs and structs ---------------------------------------- */
+
+@@ -56,17 +57,32 @@ struct xc_dom_phys {
+ xen_pfn_t count;
+ };
+
++struct xc_dom_module {
++ void *blob;
++ size_t size;
++ void *cmdline;
++ /* If seg.vstart is non zero then the module will be loaded at that
++ * address, otherwise it will automatically placed.
++ *
++ * If automatic placement is used and the module is gzip
++ * compressed then it will be decompressed as it is loaded. If the
++ * module has been explicitly placed then it is loaded as is
++ * otherwise decompressing risks undoing the manual placement.
++ */
++ struct xc_dom_seg seg;
++};
++
+ struct xc_dom_image {
+ /* files */
+ void *kernel_blob;
+ size_t kernel_size;
+- void *ramdisk_blob;
+- size_t ramdisk_size;
++ unsigned int num_modules;
++ struct xc_dom_module modules[XG_MAX_MODULES];
+ void *devicetree_blob;
+ size_t devicetree_size;
+
+ size_t max_kernel_size;
+- size_t max_ramdisk_size;
++ size_t max_module_size;
+ size_t max_devicetree_size;
+
+ /* arguments and parameters */
+@@ -80,15 +96,6 @@ struct xc_dom_image {
+
+ /* memory layout */
+ struct xc_dom_seg kernel_seg;
+- /* If ramdisk_seg.vstart is non zero then the ramdisk will be
+- * loaded at that address, otherwise it will automatically placed.
+- *
+- * If automatic placement is used and the ramdisk is gzip
+- * compressed then it will be decompressed as it is loaded. If the
+- * ramdisk has been explicitly placed then it is loaded as is
+- * otherwise decompressing risks undoing the manual placement.
+- */
+- struct xc_dom_seg ramdisk_seg;
+ struct xc_dom_seg p2m_seg;
+ struct xc_dom_seg pgtables_seg;
+ struct xc_dom_seg devicetree_seg;
+@@ -277,12 +284,12 @@ void xc_dom_release(struct xc_dom_image *dom);
+ int xc_dom_rambase_init(struct xc_dom_image *dom, uint64_t rambase);
+ int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
+
+-/* Set this larger if you have enormous ramdisks/kernels. Note that
++/* Set this larger if you have enormous modules/kernels. Note that
+ * you should trust all kernels not to be maliciously large (e.g. to
+ * exhaust all dom0 memory) if you do this (see CVE-2012-4544 /
+ * XSA-25). You can also set the default independently for
+- * ramdisks/kernels in xc_dom_allocate() or call
+- * xc_dom_{kernel,ramdisk}_max_size.
++ * modules/kernels in xc_dom_allocate() or call
++ * xc_dom_{kernel,module}_max_size.
+ */
+ #ifndef XC_DOM_DECOMPRESS_MAX
+ #define XC_DOM_DECOMPRESS_MAX (1024*1024*1024) /* 1GB */
+@@ -291,8 +298,8 @@ int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
+ int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz);
+ int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz);
+
+-int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz);
+-int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz);
++int xc_dom_module_check_size(struct xc_dom_image *dom, size_t sz);
++int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz);
+
+ int xc_dom_devicetree_max_size(struct xc_dom_image *dom, size_t sz);
+
+@@ -303,11 +310,12 @@ int xc_dom_do_gunzip(xc_interface *xch,
+ int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size);
+
+ int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename);
+-int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename);
++int xc_dom_module_file(struct xc_dom_image *dom, const char *filename,
++ const char *cmdline);
+ int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem,
+ size_t memsize);
+-int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+- size_t memsize);
++int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem,
++ size_t memsize, const char *cmdline);
+ int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename);
+ int xc_dom_devicetree_mem(struct xc_dom_image *dom, const void *mem,
+ size_t memsize);
+diff --git a/tools/libxc/xc_dom_compat_linux.c b/tools/libxc/xc_dom_compat_linux.c
+index c922c61e90..b3d43feed9 100644
+--- a/tools/libxc/xc_dom_compat_linux.c
++++ b/tools/libxc/xc_dom_compat_linux.c
+@@ -56,7 +56,7 @@ int xc_linux_build(xc_interface *xch, uint32_t domid,
+ if ( (rc = xc_dom_kernel_file(dom, image_name)) != 0 )
+ goto out;
+ if ( initrd_name && strlen(initrd_name) &&
+- ((rc = xc_dom_ramdisk_file(dom, initrd_name)) != 0) )
++ ((rc = xc_dom_module_file(dom, initrd_name, NULL)) != 0) )
+ goto out;
+
+ dom->flags |= flags;
+diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
+index 303cb971e8..3e65aff22b 100644
+--- a/tools/libxc/xc_dom_core.c
++++ b/tools/libxc/xc_dom_core.c
+@@ -314,16 +314,16 @@ int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz)
+ return 0;
+ }
+
+-int xc_dom_ramdisk_check_size(struct xc_dom_image *dom, size_t sz)
++int xc_dom_module_check_size(struct xc_dom_image *dom, size_t sz)
+ {
+ /* No limit */
+- if ( !dom->max_ramdisk_size )
++ if ( !dom->max_module_size )
+ return 0;
+
+- if ( sz > dom->max_ramdisk_size )
++ if ( sz > dom->max_module_size )
+ {
+ xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+- "ramdisk image too large");
++ "module image too large");
+ return 1;
+ }
+
+@@ -764,7 +764,7 @@ struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
+ dom->xch = xch;
+
+ dom->max_kernel_size = XC_DOM_DECOMPRESS_MAX;
+- dom->max_ramdisk_size = XC_DOM_DECOMPRESS_MAX;
++ dom->max_module_size = XC_DOM_DECOMPRESS_MAX;
+ dom->max_devicetree_size = XC_DOM_DECOMPRESS_MAX;
+
+ if ( cmdline )
+@@ -797,10 +797,10 @@ int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz)
+ return 0;
+ }
+
+-int xc_dom_ramdisk_max_size(struct xc_dom_image *dom, size_t sz)
++int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz)
+ {
+- DOMPRINTF("%s: ramdisk_max_size=%zx", __FUNCTION__, sz);
+- dom->max_ramdisk_size = sz;
++ DOMPRINTF("%s: module_max_size=%zx", __FUNCTION__, sz);
++ dom->max_module_size = sz;
+ return 0;
+ }
+
+@@ -821,16 +821,30 @@ int xc_dom_kernel_file(struct xc_dom_image *dom, const char
*filename)
+ return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+ }
+
+-int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename)
++int xc_dom_module_file(struct xc_dom_image *dom, const char *filename, const char
*cmdline)
+ {
++ unsigned int mod = dom->num_modules++;
++
+ DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
+- dom->ramdisk_blob =
+- xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size,
+- dom->max_ramdisk_size);
++ dom->modules[mod].blob =
++ xc_dom_malloc_filemap(dom, filename, &dom->modules[mod].size,
++ dom->max_module_size);
+
+- if ( dom->ramdisk_blob == NULL )
++ if ( dom->modules[mod].blob == NULL )
+ return -1;
+-// return xc_dom_try_gunzip(dom, &dom->ramdisk_blob,
&dom->ramdisk_size);
++
++ if ( cmdline )
++ {
++ dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline);
++
++ if ( dom->modules[mod].cmdline == NULL )
++ return -1;
++ }
++ else
++ {
++ dom->modules[mod].cmdline = NULL;
++ }
++
+ return 0;
+ }
+
+@@ -859,13 +873,28 @@ int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem,
size_t memsize)
+ return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+ }
+
+-int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+- size_t memsize)
++int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem,
++ size_t memsize, const char *cmdline)
+ {
++ unsigned int mod = dom->num_modules++;
++
+ DOMPRINTF_CALLED(dom->xch);
+- dom->ramdisk_blob = (void *)mem;
+- dom->ramdisk_size = memsize;
+-// return xc_dom_try_gunzip(dom, &dom->ramdisk_blob,
&dom->ramdisk_size);
++
++ dom->modules[mod].blob = (void *)mem;
++ dom->modules[mod].size = memsize;
++
++ if ( cmdline )
++ {
++ dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline);
++
++ if ( dom->modules[mod].cmdline == NULL )
++ return -1;
++ }
++ else
++ {
++ dom->modules[mod].cmdline = NULL;
++ }
++
+ return 0;
+ }
+
+@@ -990,41 +1019,42 @@ int xc_dom_update_guest_p2m(struct xc_dom_image *dom)
+ return 0;
+ }
+
+-static int xc_dom_build_ramdisk(struct xc_dom_image *dom)
++static int xc_dom_build_module(struct xc_dom_image *dom, unsigned int mod)
+ {
+- size_t unziplen, ramdisklen;
+- void *ramdiskmap;
++ size_t unziplen, modulelen;
++ void *modulemap;
++ char name[10];
+
+- if ( !dom->ramdisk_seg.vstart )
++ if ( !dom->modules[mod].seg.vstart )
+ {
+ unziplen = xc_dom_check_gzip(dom->xch,
+- dom->ramdisk_blob, dom->ramdisk_size);
+- if ( xc_dom_ramdisk_check_size(dom, unziplen) != 0 )
++ dom->modules[mod].blob,
dom->modules[mod].size);
++ if ( xc_dom_module_check_size(dom, unziplen) != 0 )
+ unziplen = 0;
+ }
+ else
+ unziplen = 0;
+
+- ramdisklen = unziplen ? unziplen : dom->ramdisk_size;
+-
+- if ( xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk",
+- dom->ramdisk_seg.vstart, ramdisklen) != 0 )
++ modulelen = unziplen ? unziplen : dom->modules[mod].size;
++ snprintf(name, sizeof(name), "module%u", mod);
++ if ( xc_dom_alloc_segment(dom, &dom->modules[mod].seg, name,
++ dom->modules[mod].seg.vstart, modulelen) != 0 )
+ goto err;
+- ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg);
+- if ( ramdiskmap == NULL )
++ modulemap = xc_dom_seg_to_ptr(dom, &dom->modules[mod].seg);
++ if ( modulemap == NULL )
+ {
+- DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg) =>
NULL",
+- __FUNCTION__);
++ DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->modules[%u].seg) =>
NULL",
++ __FUNCTION__, mod);
+ goto err;
+ }
+ if ( unziplen )
+ {
+- if ( xc_dom_do_gunzip(dom->xch, dom->ramdisk_blob, dom->ramdisk_size,
+- ramdiskmap, ramdisklen) == -1 )
++ if ( xc_dom_do_gunzip(dom->xch, dom->modules[mod].blob,
dom->modules[mod].size,
++ modulemap, modulelen) == -1 )
+ goto err;
+ }
+ else
+- memcpy(ramdiskmap, dom->ramdisk_blob, dom->ramdisk_size);
++ memcpy(modulemap, dom->modules[mod].blob, dom->modules[mod].size);
+
+ return 0;
+
+@@ -1131,6 +1161,7 @@ int xc_dom_build_image(struct xc_dom_image *dom)
+ {
+ unsigned int page_size;
+ bool unmapped_initrd;
++ unsigned int mod;
+
+ DOMPRINTF_CALLED(dom->xch);
+
+@@ -1154,15 +1185,24 @@ int xc_dom_build_image(struct xc_dom_image *dom)
+ if ( dom->kernel_loader->loader(dom) != 0 )
+ goto err;
+
+- /* Don't load ramdisk now if no initial mapping required. */
+- unmapped_initrd = dom->parms.unmapped_initrd &&
!dom->ramdisk_seg.vstart;
+-
+- if ( dom->ramdisk_blob && !unmapped_initrd )
++ /* Don't load ramdisk / other modules now if no initial mapping required. */
++ for ( mod = 0; mod < dom->num_modules; mod++ )
+ {
+- if ( xc_dom_build_ramdisk(dom) != 0 )
+- goto err;
+- dom->initrd_start = dom->ramdisk_seg.vstart;
+- dom->initrd_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
++ unmapped_initrd = (dom->parms.unmapped_initrd &&
++ !dom->modules[mod].seg.vstart);
++
++ if ( dom->modules[mod].blob && !unmapped_initrd )
++ {
++ if ( xc_dom_build_module(dom, mod) != 0 )
++ goto err;
++
++ if ( mod == 0 )
++ {
++ dom->initrd_start = dom->modules[mod].seg.vstart;
++ dom->initrd_len =
++ dom->modules[mod].seg.vend - dom->modules[mod].seg.vstart;
++ }
++ }
+ }
+
+ /* load devicetree */
+@@ -1216,14 +1256,24 @@ int xc_dom_build_image(struct xc_dom_image *dom)
+ if ( dom->virt_pgtab_end && xc_dom_alloc_pad(dom, dom->virt_pgtab_end)
)
+ return -1;
+
+- /* Load ramdisk if no initial mapping required. */
+- if ( dom->ramdisk_blob && unmapped_initrd )
++ for ( mod = 0; mod < dom->num_modules; mod++ )
+ {
+- if ( xc_dom_build_ramdisk(dom) != 0 )
+- goto err;
+- dom->flags |= SIF_MOD_START_PFN;
+- dom->initrd_start = dom->ramdisk_seg.pfn;
+- dom->initrd_len = page_size * dom->ramdisk_seg.pages;
++ unmapped_initrd = (dom->parms.unmapped_initrd &&
++ !dom->modules[mod].seg.vstart);
++
++ /* Load ramdisk / other modules if no initial mapping required. */
++ if ( dom->modules[mod].blob && unmapped_initrd )
++ {
++ if ( xc_dom_build_module(dom, mod) != 0 )
++ goto err;
++
++ if ( mod == 0 )
++ {
++ dom->flags |= SIF_MOD_START_PFN;
++ dom->initrd_start = dom->modules[mod].seg.pfn;
++ dom->initrd_len = page_size * dom->modules[mod].seg.pages;
++ }
++ }
+ }
+
+ /* Allocate p2m list if outside of initial kernel mapping. */
+diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
+index bff68a011f..0b65dab4bc 100644
+--- a/tools/libxc/xc_dom_x86.c
++++ b/tools/libxc/xc_dom_x86.c
+@@ -70,8 +70,8 @@
+ #define round_up(addr, mask) ((addr) | (mask))
+ #define round_pg_up(addr) (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1))
+
+-#define HVMLOADER_MODULE_MAX_COUNT 1
+-#define HVMLOADER_MODULE_NAME_SIZE 10
++#define HVMLOADER_MODULE_MAX_COUNT 2
++#define HVMLOADER_MODULE_CMDLINE_SIZE MAX_GUEST_CMDLINE
+
+ struct xc_dom_params {
+ unsigned levels;
+@@ -627,6 +627,12 @@ static int alloc_magic_pages_hvm(struct xc_dom_image *dom)
+ xc_hvm_param_set(xch, domid, HVM_PARAM_SHARING_RING_PFN,
+ special_pfn(SPECIALPAGE_SHARING));
+
++ start_info_size +=
++ sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT;
++
++ start_info_size +=
++ HVMLOADER_MODULE_CMDLINE_SIZE * HVMLOADER_MODULE_MAX_COUNT;
++
+ if ( !dom->device_model )
+ {
+ if ( dom->cmdline )
+@@ -634,22 +640,9 @@ static int alloc_magic_pages_hvm(struct xc_dom_image *dom)
+ dom->cmdline_size = ROUNDUP(strlen(dom->cmdline) + 1, 8);
+ start_info_size += dom->cmdline_size;
+ }
+-
+- /* Limited to one module. */
+- if ( dom->ramdisk_blob )
+- start_info_size += sizeof(struct hvm_modlist_entry);
+ }
+ else
+ {
+- start_info_size +=
+- sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT;
+- /*
+- * Add extra space to write modules name.
+- * The HVMLOADER_MODULE_NAME_SIZE accounts for NUL byte.
+- */
+- start_info_size +=
+- HVMLOADER_MODULE_NAME_SIZE * HVMLOADER_MODULE_MAX_COUNT;
+-
+ /*
+ * Allocate and clear additional ioreq server pages. The default
+ * server will use the IOREQ and BUFIOREQ special pages above.
+@@ -749,7 +742,7 @@ static int start_info_x86_32(struct xc_dom_image *dom)
+ start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn);
+ start_info->console.domU.evtchn = dom->console_evtchn;
+
+- if ( dom->ramdisk_blob )
++ if ( dom->modules[0].blob )
+ {
+ start_info->mod_start = dom->initrd_start;
+ start_info->mod_len = dom->initrd_len;
+@@ -800,7 +793,7 @@ static int start_info_x86_64(struct xc_dom_image *dom)
+ start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn);
+ start_info->console.domU.evtchn = dom->console_evtchn;
+
+- if ( dom->ramdisk_blob )
++ if ( dom->modules[0].blob )
+ {
+ start_info->mod_start = dom->initrd_start;
+ start_info->mod_len = dom->initrd_len;
+@@ -1237,7 +1230,7 @@ static int meminit_hvm(struct xc_dom_image *dom)
+ unsigned long target_pages = dom->target_pages;
+ unsigned long cur_pages, cur_pfn;
+ int rc;
+- unsigned long stat_normal_pages = 0, stat_2mb_pages = 0,
++ unsigned long stat_normal_pages = 0, stat_2mb_pages = 0,
+ stat_1gb_pages = 0;
+ unsigned int memflags = 0;
+ int claim_enabled = dom->claim_enabled;
+@@ -1303,6 +1296,8 @@ static int meminit_hvm(struct xc_dom_image *dom)
+ p2m_size = 0;
+ for ( i = 0; i < nr_vmemranges; i++ )
+ {
++ DOMPRINTF("range: start=0x%"PRIx64" end=0x%"PRIx64,
vmemranges[i].start, vmemranges[i].end);
++
+ total_pages += ((vmemranges[i].end - vmemranges[i].start)
+ >> PAGE_SHIFT);
+ p2m_size = p2m_size > (vmemranges[i].end >> PAGE_SHIFT) ?
+@@ -1633,7 +1628,7 @@ static int alloc_pgtables_hvm(struct xc_dom_image *dom)
+ */
+ static void add_module_to_list(struct xc_dom_image *dom,
+ struct xc_hvm_firmware_module *module,
+- const char *name,
++ const char *cmdline,
+ struct hvm_modlist_entry *modlist,
+ struct hvm_start_info *start_info)
+ {
+@@ -1648,16 +1643,20 @@ static void add_module_to_list(struct xc_dom_image *dom,
+ return;
+
+ assert(start_info->nr_modules < HVMLOADER_MODULE_MAX_COUNT);
+- assert(strnlen(name, HVMLOADER_MODULE_NAME_SIZE)
+- < HVMLOADER_MODULE_NAME_SIZE);
+
+ modlist[index].paddr = module->guest_addr_out;
+ modlist[index].size = module->length;
+
+- strncpy(modules_cmdline_start + HVMLOADER_MODULE_NAME_SIZE * index,
+- name, HVMLOADER_MODULE_NAME_SIZE);
++ if ( cmdline )
++ {
++ assert(strnlen(cmdline, HVMLOADER_MODULE_CMDLINE_SIZE)
++ < HVMLOADER_MODULE_CMDLINE_SIZE);
++ strncpy(modules_cmdline_start + HVMLOADER_MODULE_CMDLINE_SIZE * index,
++ cmdline, HVMLOADER_MODULE_CMDLINE_SIZE);
++ }
++
+ modlist[index].cmdline_paddr =
+- modules_cmdline_paddr + HVMLOADER_MODULE_NAME_SIZE * index;
++ modules_cmdline_paddr + HVMLOADER_MODULE_CMDLINE_SIZE * index;
+
+ start_info->nr_modules++;
+ }
+@@ -1669,10 +1668,10 @@ static int bootlate_hvm(struct xc_dom_image *dom)
+ struct hvm_start_info *start_info;
+ size_t start_info_size;
+ struct hvm_modlist_entry *modlist;
++ unsigned int i;
+
+ start_info_size = sizeof(*start_info) + dom->cmdline_size;
+- if ( dom->ramdisk_blob )
+- start_info_size += sizeof(struct hvm_modlist_entry);
++ start_info_size += sizeof(struct hvm_modlist_entry) * dom->num_modules;
+
+ if ( start_info_size >
+ dom->start_info_seg.pages << XC_DOM_PAGE_SHIFT(dom) )
+@@ -1703,12 +1702,18 @@ static int bootlate_hvm(struct xc_dom_image *dom)
+ ((uintptr_t)cmdline - (uintptr_t)start_info);
+ }
+
+- if ( dom->ramdisk_blob )
++ for ( i = 0; i < dom->num_modules; i++ )
+ {
++ struct xc_hvm_firmware_module mod;
++
++ DOMPRINTF("Adding module %u", i);
++ mod.guest_addr_out =
++ dom->modules[i].seg.vstart - dom->parms.virt_base;
++ mod.length =
++ dom->modules[i].seg.vend - dom->modules[i].seg.vstart;
+
+- modlist[0].paddr = dom->ramdisk_seg.vstart - dom->parms.virt_base;
+- modlist[0].size = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+- start_info->nr_modules = 1;
++ add_module_to_list(dom, &mod, dom->modules[i].cmdline,
++ modlist, start_info);
+ }
+
+ /* ACPI module 0 is the RSDP */
+diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
+index ef834e652d..fbbdb9ec2f 100644
+--- a/tools/libxl/libxl_dom.c
++++ b/tools/libxl/libxl_dom.c
+@@ -796,12 +796,12 @@ int libxl__build_pv(libxl__gc *gc, uint32_t domid,
+
+ if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
+ if (state->pv_ramdisk.mapped) {
+- if ( (ret = xc_dom_ramdisk_mem(dom, state->pv_ramdisk.data,
state->pv_ramdisk.size)) != 0 ) {
++ if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data,
state->pv_ramdisk.size, NULL)) != 0 ) {
+ LOGE(ERROR, "xc_dom_ramdisk_mem failed");
+ goto out;
+ }
+ } else {
+- if ( (ret = xc_dom_ramdisk_file(dom, state->pv_ramdisk.path)) != 0 ) {
++ if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 )
{
+ LOGE(ERROR, "xc_dom_ramdisk_file failed");
+ goto out;
+ }
+@@ -1043,14 +1043,14 @@ static int libxl__domain_firmware(libxl__gc *gc,
+
+ if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
+ if (state->pv_ramdisk.mapped) {
+- rc = xc_dom_ramdisk_mem(dom, state->pv_ramdisk.data,
+- state->pv_ramdisk.size);
++ rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
++ state->pv_ramdisk.size, NULL);
+ if (rc) {
+ LOGE(ERROR, "xc_dom_ramdisk_mem failed");
+ goto out;
+ }
+ } else {
+- rc = xc_dom_ramdisk_file(dom, state->pv_ramdisk.path);
++ rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
+ if (rc) {
+ LOGE(ERROR, "xc_dom_ramdisk_file failed");
+ goto out;
+--
+2.14.3
+
+
+From 78e9cc3488ffd55131b129a3ab90169d4e903efe Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:58 +0000
+Subject: [PATCH 18/77] xen/common: Widen the guest logging buffer slightly
+
+This reduces the amount of line wrapping from guests; Xen in particular likes
+to print lines longer than 80 characters.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/include/xen/sched.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 002ba29d6d..64abc1df6c 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -427,7 +427,7 @@ struct domain
+ xen_domain_handle_t handle;
+
+ /* hvm_print_line() and guest_console_write() logging. */
+-#define DOMAIN_PBUF_SIZE 80
++#define DOMAIN_PBUF_SIZE 200
+ char *pbuf;
+ unsigned pbuf_idx;
+ spinlock_t pbuf_lock;
+--
+2.14.3
+
+
+From 92a6295c30a9f323de9d741e2e43f49df4412308 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:59 +0000
+Subject: [PATCH 19/77] x86/time: Print a more helpful error when a platform
+ timer can't be found
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/arch/x86/time.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
+index eba7aed72d..6c20b1036d 100644
+--- a/xen/arch/x86/time.c
++++ b/xen/arch/x86/time.c
+@@ -708,7 +708,8 @@ static u64 __init init_platform_timer(void)
+ }
+ }
+
+- BUG_ON(rc <= 0);
++ if ( rc <= 0 )
++ panic("Unable to find usable platform timer");
+
+ printk("Platform timer is %s %s\n",
+ freq_string(pts->frequency), pts->name);
+--
+2.14.3
+
+
+From ff1fb8fe53bb91823a1a37b6dd0e816d519c19d8 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:59 +0000
+Subject: [PATCH 20/77] x86/link: Introduce and use SECTION_ALIGN
+
+... to reduce the quantity of #ifdef EFI.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+CC: Jan Beulich <JBeulich(a)suse.com>
+---
+ xen/arch/x86/xen.lds.S | 50 +++++++++++++-------------------------------------
+ 1 file changed, 13 insertions(+), 37 deletions(-)
+
+diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
+index d5e8821d41..6164ad094f 100644
+--- a/xen/arch/x86/xen.lds.S
++++ b/xen/arch/x86/xen.lds.S
+@@ -12,12 +12,14 @@
+ #define FORMAT "pei-x86-64"
+ #undef __XEN_VIRT_START
+ #define __XEN_VIRT_START __image_base__
++#define SECTION_ALIGN MB(2)
+
+ ENTRY(efi_start)
+
+ #else /* !EFI */
+
+ #define FORMAT "elf64-x86-64"
++#define SECTION_ALIGN PAGE_SIZE
+
+ ENTRY(start)
+
+@@ -67,11 +69,7 @@ SECTIONS
+ _etext = .; /* End of text section */
+ } :text = 0x9090
+
+-#ifdef EFI
+- . = ALIGN(MB(2));
+-#else
+- . = ALIGN(PAGE_SIZE);
+-#endif
++ . = ALIGN(SECTION_ALIGN);
+ __2M_text_end = .;
+
+ __2M_rodata_start = .; /* Start of 2M superpages, mapped RO. */
+@@ -149,11 +147,7 @@ SECTIONS
+ #endif
+ _erodata = .;
+
+-#ifdef EFI
+- . = ALIGN(MB(2));
+-#else
+- . = ALIGN(PAGE_SIZE);
+-#endif
++ . = ALIGN(SECTION_ALIGN);
+ __2M_rodata_end = .;
+
+ __2M_init_start = .; /* Start of 2M superpages, mapped RWX (boot only). */
+@@ -215,11 +209,7 @@ SECTIONS
+ __ctors_end = .;
+ } :text
+
+-#ifdef EFI
+- . = ALIGN(MB(2));
+-#else
+- . = ALIGN(PAGE_SIZE);
+-#endif
++ . = ALIGN(SECTION_ALIGN);
+ __init_end = .;
+ __2M_init_end = .;
+
+@@ -257,11 +247,7 @@ SECTIONS
+ } :text
+ _end = . ;
+
+-#ifdef EFI
+- . = ALIGN(MB(2));
+-#else
+- . = ALIGN(PAGE_SIZE);
+-#endif
++ . = ALIGN(SECTION_ALIGN);
+ __2M_rwdata_end = .;
+
+ #ifdef EFI
+@@ -310,23 +296,13 @@ ASSERT(__image_base__ > XEN_VIRT_START ||
+ ASSERT(kexec_reloc_size - kexec_reloc <= PAGE_SIZE, "kexec_reloc is too
large")
+ #endif
+
+-#ifdef EFI
+-ASSERT(IS_ALIGNED(__2M_text_end, MB(2)), "__2M_text_end misaligned")
+-ASSERT(IS_ALIGNED(__2M_rodata_start, MB(2)), "__2M_rodata_start misaligned")
+-ASSERT(IS_ALIGNED(__2M_rodata_end, MB(2)), "__2M_rodata_end misaligned")
+-ASSERT(IS_ALIGNED(__2M_init_start, MB(2)), "__2M_init_start misaligned")
+-ASSERT(IS_ALIGNED(__2M_init_end, MB(2)), "__2M_init_end misaligned")
+-ASSERT(IS_ALIGNED(__2M_rwdata_start, MB(2)), "__2M_rwdata_start misaligned")
+-ASSERT(IS_ALIGNED(__2M_rwdata_end, MB(2)), "__2M_rwdata_end misaligned")
+-#else
+-ASSERT(IS_ALIGNED(__2M_text_end, PAGE_SIZE), "__2M_text_end misaligned")
+-ASSERT(IS_ALIGNED(__2M_rodata_start, PAGE_SIZE), "__2M_rodata_start
misaligned")
+-ASSERT(IS_ALIGNED(__2M_rodata_end, PAGE_SIZE), "__2M_rodata_end
misaligned")
+-ASSERT(IS_ALIGNED(__2M_init_start, PAGE_SIZE), "__2M_init_start
misaligned")
+-ASSERT(IS_ALIGNED(__2M_init_end, PAGE_SIZE), "__2M_init_end misaligned")
+-ASSERT(IS_ALIGNED(__2M_rwdata_start, PAGE_SIZE), "__2M_rwdata_start
misaligned")
+-ASSERT(IS_ALIGNED(__2M_rwdata_end, PAGE_SIZE), "__2M_rwdata_end
misaligned")
+-#endif
++ASSERT(IS_ALIGNED(__2M_text_end, SECTION_ALIGN), "__2M_text_end
misaligned")
++ASSERT(IS_ALIGNED(__2M_rodata_start, SECTION_ALIGN), "__2M_rodata_start
misaligned")
++ASSERT(IS_ALIGNED(__2M_rodata_end, SECTION_ALIGN), "__2M_rodata_end
misaligned")
++ASSERT(IS_ALIGNED(__2M_init_start, SECTION_ALIGN), "__2M_init_start
misaligned")
++ASSERT(IS_ALIGNED(__2M_init_end, SECTION_ALIGN), "__2M_init_end
misaligned")
++ASSERT(IS_ALIGNED(__2M_rwdata_start, SECTION_ALIGN), "__2M_rwdata_start
misaligned")
++ASSERT(IS_ALIGNED(__2M_rwdata_end, SECTION_ALIGN), "__2M_rwdata_end
misaligned")
+
+ ASSERT(IS_ALIGNED(cpu0_stack, STACK_SIZE), "cpu0_stack misaligned")
+
+--
+2.14.3
+
+
+From 9e46ae12edc8be1dd846ce545600db28dabfabc8 Mon Sep 17 00:00:00 2001
+From: Bob Moore <robert.moore(a)intel.com>
+Date: Thu, 11 Jan 2018 17:47:59 +0000
+Subject: [PATCH 21/77] ACPICA: Make ACPI Power Management Timer (PM Timer)
+ optional.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+PM Timer is now optional.
+This support is already in Windows8 and "SHOULD" come out in ACPI 5.0A
+(if all goes well).
+
+The change doesn't affect Xen directly, because it does not rely
+on the presence of the PM timer.
+
+Signed-off-by: Bob Moore <robert.moore(a)intel.com>
+Signed-off-by: Lv Zheng <lv.zheng(a)intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
+[ported to Xen]
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+ xen/drivers/acpi/tables/tbfadt.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/xen/drivers/acpi/tables/tbfadt.c b/xen/drivers/acpi/tables/tbfadt.c
+index d62d8d5cb9..f11fd5a900 100644
+--- a/xen/drivers/acpi/tables/tbfadt.c
++++ b/xen/drivers/acpi/tables/tbfadt.c
+@@ -95,7 +95,8 @@ static struct acpi_fadt_info __initdata fadt_info_table[] = {
+
+ {"PmTimerBlock", ACPI_FADT_OFFSET(xpm_timer_block),
+ ACPI_FADT_OFFSET(pm_timer_block),
+- ACPI_FADT_OFFSET(pm_timer_length), ACPI_FADT_REQUIRED},
++ ACPI_FADT_OFFSET(pm_timer_length),
++ ACPI_FADT_SEPARATE_LENGTH}, /* ACPI 5.0A: Timer is optional */
+
+ {"Gpe0Block", ACPI_FADT_OFFSET(xgpe0_block),
+ ACPI_FADT_OFFSET(gpe0_block),
+@@ -437,7 +438,7 @@ static void __init acpi_tb_validate_fadt(void)
+
+ if (fadt_info_table[i].type & ACPI_FADT_REQUIRED) {
+ /*
+- * Field is required (Pm1a_event, Pm1a_control, pm_timer).
++ * Field is required (Pm1a_event, Pm1a_control).
+ * Both the address and length must be non-zero.
+ */
+ if (!address64->address || !length) {
+--
+2.14.3
+
+
+From e7c8187b91fbff4c15e2cba06e33a1dce4b0b55e Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:59 +0000
+Subject: [PATCH 22/77] xen/domctl: Return arch_config via getdomaininfo
+
+This allows toolstack software to distinguish HVM from PVH guests.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+---
+v2: bump domctl version number
+---
+ tools/libxc/include/xenctrl.h | 1 +
+ tools/libxc/xc_domain.c | 1 +
+ xen/arch/x86/domctl.c | 2 ++
+ xen/include/public/domctl.h | 3 ++-
+ 4 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
+index 666db0b919..a92a8d7a53 100644
+--- a/tools/libxc/include/xenctrl.h
++++ b/tools/libxc/include/xenctrl.h
+@@ -456,6 +456,7 @@ typedef struct xc_dominfo {
+ unsigned int max_vcpu_id;
+ xen_domain_handle_t handle;
+ unsigned int cpupool;
++ struct xen_arch_domainconfig arch_config;
+ } xc_dominfo_t;
+
+ typedef xen_domctl_getdomaininfo_t xc_domaininfo_t;
+diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
+index 3ccd27f101..8169284dc1 100644
+--- a/tools/libxc/xc_domain.c
++++ b/tools/libxc/xc_domain.c
+@@ -421,6 +421,7 @@ int xc_domain_getinfo(xc_interface *xch,
+ info->nr_online_vcpus = domctl.u.getdomaininfo.nr_online_vcpus;
+ info->max_vcpu_id = domctl.u.getdomaininfo.max_vcpu_id;
+ info->cpupool = domctl.u.getdomaininfo.cpupool;
++ info->arch_config = domctl.u.getdomaininfo.arch_config;
+
+ memcpy(info->handle, domctl.u.getdomaininfo.handle,
+ sizeof(xen_domain_handle_t));
+diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
+index 075ee92cd7..b52d6d9552 100644
+--- a/xen/arch/x86/domctl.c
++++ b/xen/arch/x86/domctl.c
+@@ -345,6 +345,8 @@ void arch_get_domain_info(const struct domain *d,
+ {
+ if ( paging_mode_hap(d) )
+ info->flags |= XEN_DOMINF_hap;
++
++ info->arch_config.emulation_flags = d->arch.emulation_flags;
+ }
+
+ #define MAX_IOPORTS 0x10000
+diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
+index 70027abc00..463f8cc420 100644
+--- a/xen/include/public/domctl.h
++++ b/xen/include/public/domctl.h
+@@ -38,7 +38,7 @@
+ #include "hvm/save.h"
+ #include "memory.h"
+
+-#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000e
++#define XEN_DOMCTL_INTERFACE_VERSION 0x0000000f
+
+ /*
+ * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
+@@ -116,6 +116,7 @@ struct xen_domctl_getdomaininfo {
+ uint32_t ssidref;
+ xen_domain_handle_t handle;
+ uint32_t cpupool;
++ struct xen_arch_domainconfig arch_config;
+ };
+ typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
+ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
+--
+2.14.3
+
+
+From 78898c9d1b5bffe141da923bf4b5b19cc388e260 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:59 +0000
+Subject: [PATCH 23/77] tools/ocaml: Expose arch_config in domaininfo
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ tools/ocaml/libs/xc/xenctrl.ml | 29 +++++++++++++++++++++++++++++
+ tools/ocaml/libs/xc/xenctrl.mli | 28 ++++++++++++++++++++++++++++
+ tools/ocaml/libs/xc/xenctrl_stubs.c | 26 ++++++++++++++++++++++++--
+ 3 files changed, 81 insertions(+), 2 deletions(-)
+
+diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
+index 70a325b0e9..d549068d60 100644
+--- a/tools/ocaml/libs/xc/xenctrl.ml
++++ b/tools/ocaml/libs/xc/xenctrl.ml
+@@ -28,6 +28,34 @@ type vcpuinfo =
+ cpumap: int32;
+ }
+
++type xen_arm_arch_domainconfig =
++{
++ gic_version: int;
++ nr_spis: int;
++ clock_frequency: int32;
++}
++
++type x86_arch_emulation_flags =
++ | X86_EMU_LAPIC
++ | X86_EMU_HPET
++ | X86_EMU_PM
++ | X86_EMU_RTC
++ | X86_EMU_IOAPIC
++ | X86_EMU_PIC
++ | X86_EMU_VGA
++ | X86_EMU_IOMMU
++ | X86_EMU_PIT
++ | X86_EMU_USE_PIRQ
++
++type xen_x86_arch_domainconfig =
++{
++ emulation_flags: x86_arch_emulation_flags list;
++}
++
++type arch_domainconfig =
++ | ARM of xen_arm_arch_domainconfig
++ | X86 of xen_x86_arch_domainconfig
++
+ type domaininfo =
+ {
+ domid : domid;
+@@ -46,6 +74,7 @@ type domaininfo =
+ max_vcpu_id : int;
+ ssidref : int32;
+ handle : int array;
++ arch_config : arch_domainconfig;
+ }
+
+ type sched_control =
+diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
+index 702d8a7ab8..08f1fd26ae 100644
+--- a/tools/ocaml/libs/xc/xenctrl.mli
++++ b/tools/ocaml/libs/xc/xenctrl.mli
+@@ -22,6 +22,33 @@ type vcpuinfo = {
+ cputime : int64;
+ cpumap : int32;
+ }
++
++type xen_arm_arch_domainconfig = {
++ gic_version: int;
++ nr_spis: int;
++ clock_frequency: int32;
++}
++
++type x86_arch_emulation_flags =
++ | X86_EMU_LAPIC
++ | X86_EMU_HPET
++ | X86_EMU_PM
++ | X86_EMU_RTC
++ | X86_EMU_IOAPIC
++ | X86_EMU_PIC
++ | X86_EMU_VGA
++ | X86_EMU_IOMMU
++ | X86_EMU_PIT
++ | X86_EMU_USE_PIRQ
++
++type xen_x86_arch_domainconfig = {
++ emulation_flags: x86_arch_emulation_flags list;
++}
++
++type arch_domainconfig =
++ | ARM of xen_arm_arch_domainconfig
++ | X86 of xen_x86_arch_domainconfig
++
+ type domaininfo = {
+ domid : domid;
+ dying : bool;
+@@ -39,6 +66,7 @@ type domaininfo = {
+ max_vcpu_id : int;
+ ssidref : int32;
+ handle : int array;
++ arch_config : arch_domainconfig;
+ }
+ type sched_control = { weight : int; cap : int; }
+ type physinfo_cap_flag = CAP_HVM | CAP_DirectIO
+diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
+index c66732f67c..124aa34fe8 100644
+--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
++++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
+@@ -273,10 +273,10 @@ CAMLprim value stub_xc_domain_shutdown(value xch, value domid,
value reason)
+ static value alloc_domaininfo(xc_domaininfo_t * info)
+ {
+ CAMLparam0();
+- CAMLlocal2(result, tmp);
++ CAMLlocal5(result, tmp, arch_config, x86_arch_config, emul_list);
+ int i;
+
+- result = caml_alloc_tuple(16);
++ result = caml_alloc_tuple(17);
+
+ Store_field(result, 0, Val_int(info->domain));
+ Store_field(result, 1, Val_bool(info->flags & XEN_DOMINF_dying));
+@@ -302,6 +302,28 @@ static value alloc_domaininfo(xc_domaininfo_t * info)
+
+ Store_field(result, 15, tmp);
+
++ /* emulation_flags: x86_arch_emulation_flags list; */
++ tmp = emul_list = Val_emptylist;
++ for (i = 0; i < 10; i++) {
++ if ((info->arch_config.emulation_flags >> i) & 1) {
++ tmp = caml_alloc_small(2, Tag_cons);
++ Field(tmp, 0) = Val_int(i);
++ Field(tmp, 1) = emul_list;
++ emul_list = tmp;
++ }
++ }
++
++ /* xen_x86_arch_domainconfig */
++ x86_arch_config = caml_alloc_tuple(1);
++ Store_field(x86_arch_config, 0, emul_list);
++
++ /* arch_config: arch_domainconfig */
++ arch_config = caml_alloc_small(1, 1);
++
++ Store_field(arch_config, 0, x86_arch_config);
++
++ Store_field(result, 16, arch_config);
++
+ CAMLreturn(result);
+ }
+
+--
+2.14.3
+
+
+From 48811d481cedd5838a2d0ba8dfa149133888c84b Mon Sep 17 00:00:00 2001
+From: Jon Ludlam <jonathan.ludlam(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:47:59 +0000
+Subject: [PATCH 24/77] tools/ocaml: Extend domain_create() to take
+ arch_domainconfig
+
+No longer passing NULL into xc_domain_create() allows for the creation
+of PVH guests.
+
+Signed-off-by: Jon Ludlam <jonathan.ludlam(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ tools/ocaml/libs/xc/xenctrl.ml | 2 +-
+ tools/ocaml/libs/xc/xenctrl.mli | 2 +-
+ tools/ocaml/libs/xc/xenctrl_stubs.c | 22 ++++++++++++++++++++--
+ 3 files changed, 22 insertions(+), 4 deletions(-)
+
+diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
+index d549068d60..9116aa222c 100644
+--- a/tools/ocaml/libs/xc/xenctrl.ml
++++ b/tools/ocaml/libs/xc/xenctrl.ml
+@@ -143,7 +143,7 @@ let with_intf f =
+ interface_close xc;
+ r
+
+-external _domain_create: handle -> int32 -> domain_create_flag list -> int
array -> domid
++external _domain_create: handle -> int32 -> domain_create_flag list -> int
array -> arch_domainconfig -> domid
+ = "stub_xc_domain_create"
+
+ let int_array_of_uuid_string s =
+diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
+index 08f1fd26ae..54c099c88f 100644
+--- a/tools/ocaml/libs/xc/xenctrl.mli
++++ b/tools/ocaml/libs/xc/xenctrl.mli
+@@ -102,7 +102,7 @@ external sizeof_xen_pfn : unit -> int =
"stub_sizeof_xen_pfn"
+ external interface_open : unit -> handle = "stub_xc_interface_open"
+ external interface_close : handle -> unit = "stub_xc_interface_close"
+ val with_intf : (handle -> 'a) -> 'a
+-val domain_create : handle -> int32 -> domain_create_flag list -> string ->
domid
++val domain_create : handle -> int32 -> domain_create_flag list -> string ->
arch_domainconfig -> domid
+ val domain_sethandle : handle -> domid -> string -> unit
+ external domain_max_vcpus : handle -> domid -> int -> unit
+ = "stub_xc_domain_max_vcpus"
+diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
+index 124aa34fe8..0b5a2361c0 100644
+--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
++++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
+@@ -144,7 +144,8 @@ static int domain_create_flag_table[] = {
+ };
+
+ CAMLprim value stub_xc_domain_create(value xch, value ssidref,
+- value flags, value handle)
++ value flags, value handle,
++ value domconfig)
+ {
+ CAMLparam4(xch, ssidref, flags, handle);
+
+@@ -155,6 +156,7 @@ CAMLprim value stub_xc_domain_create(value xch, value ssidref,
+ uint32_t c_ssidref = Int32_val(ssidref);
+ unsigned int c_flags = 0;
+ value l;
++ xc_domain_configuration_t config = {};
+
+ if (Wosize_val(handle) != 16)
+ caml_invalid_argument("Handle not a 16-integer array");
+@@ -168,8 +170,24 @@ CAMLprim value stub_xc_domain_create(value xch, value ssidref,
+ c_flags |= domain_create_flag_table[v];
+ }
+
++ switch(Tag_val(domconfig)) {
++ case 0: /* ARM - nothing to do */
++ caml_failwith("Unhandled: ARM");
++ break;
++
++ case 1: /* X86 - emulation flags in the block */
++ for (l = Field(Field(domconfig, 0), 0);
++ l != Val_none;
++ l = Field(l, 1))
++ config.emulation_flags |= 1u << Int_val(Field(l, 0));
++ break;
++
++ default:
++ caml_failwith("Unhandled domconfig type");
++ }
++
+ caml_enter_blocking_section();
+- result = xc_domain_create(_H(xch), c_ssidref, h, c_flags, &domid, NULL);
++ result = xc_domain_create(_H(xch), c_ssidref, h, c_flags, &domid, &config);
+ caml_leave_blocking_section();
+
+ if (result < 0)
+--
+2.14.3
+
+
+From 57dc22b80d3ba6db7eea87d84a009015e65eefb0 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:48:00 +0000
+Subject: [PATCH 25/77] x86/fixmap: Modify fix_to_virt() to return a void
+ pointer
+
+Almost all users of fix_to_virt() actually want a pointer. Include the cast
+within the definition, so the callers don't need to.
+
+Two users which need the integer value are switched to using __fix_to_virt()
+directly. A few users stay fully unchanged, due to GCC's void pointer
+arithmetic extension causing the same behaviour. Most users however have
+their explicit casting dropped.
+
+Since __iomem is not used consistently in Xen, we drop it too.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Wei Liu <wei.liu2(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+v2: update commit message and remove unnecessary parentheses.
+---
+ xen/arch/x86/acpi/lib.c | 2 +-
+ xen/arch/x86/mm.c | 4 ++--
+ xen/arch/x86/mpparse.c | 2 +-
+ xen/arch/x86/msi.c | 3 +--
+ xen/arch/x86/tboot.c | 4 ++--
+ xen/drivers/acpi/apei/apei-io.c | 2 +-
+ xen/drivers/char/ehci-dbgp.c | 2 +-
+ xen/drivers/char/ns16550.c | 2 +-
+ xen/include/asm-x86/apicdef.h | 2 +-
+ xen/include/asm-x86/fixmap.h | 2 +-
+ 10 files changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/acpi/lib.c b/xen/arch/x86/acpi/lib.c
+index 7d7c71848b..265b9ad819 100644
+--- a/xen/arch/x86/acpi/lib.c
++++ b/xen/arch/x86/acpi/lib.c
+@@ -49,7 +49,7 @@ char *__acpi_map_table(paddr_t phys, unsigned long size)
+ offset = phys & (PAGE_SIZE - 1);
+ mapped_size = PAGE_SIZE - offset;
+ set_fixmap(FIX_ACPI_END, phys);
+- base = fix_to_virt(FIX_ACPI_END);
++ base = __fix_to_virt(FIX_ACPI_END);
+
+ /*
+ * Most cases can be covered by the below.
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index a7a76a71db..0569342200 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5205,12 +5205,12 @@ void __set_fixmap(
+ enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
+ {
+ BUG_ON(idx >= __end_of_fixed_addresses);
+- map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
++ map_pages_to_xen(__fix_to_virt(idx), mfn, 1, flags);
+ }
+
+ void *__init arch_vmap_virt_end(void)
+ {
+- return (void *)fix_to_virt(__end_of_fixed_addresses);
++ return fix_to_virt(__end_of_fixed_addresses);
+ }
+
+ void __iomem *ioremap(paddr_t pa, size_t len)
+diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
+index a1a0738a19..49140e46f0 100644
+--- a/xen/arch/x86/mpparse.c
++++ b/xen/arch/x86/mpparse.c
+@@ -703,7 +703,7 @@ static void __init efi_check_config(void)
+ return;
+
+ __set_fixmap(FIX_EFI_MPF, PFN_DOWN(efi.mps), __PAGE_HYPERVISOR);
+- mpf = (void *)fix_to_virt(FIX_EFI_MPF) + ((long)efi.mps & (PAGE_SIZE-1));
++ mpf = fix_to_virt(FIX_EFI_MPF) + ((long)efi.mps & (PAGE_SIZE-1));
+
+ if (memcmp(mpf->mpf_signature, "_MP_", 4) == 0 &&
+ mpf->mpf_length == 1 &&
+diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
+index 4652b98c2d..475881ed89 100644
+--- a/xen/arch/x86/msi.c
++++ b/xen/arch/x86/msi.c
+@@ -961,8 +961,7 @@ static int msix_capability_init(struct pci_dev *dev,
+ xfree(entry);
+ return idx;
+ }
+- base = (void *)(fix_to_virt(idx) +
+- ((unsigned long)entry_paddr & (PAGE_SIZE - 1)));
++ base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1));
+
+ /* Mask interrupt here */
+ writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c
+index 59d7c477f4..d36bf33407 100644
+--- a/xen/arch/x86/tboot.c
++++ b/xen/arch/x86/tboot.c
+@@ -82,7 +82,7 @@ static void __init tboot_copy_memory(unsigned char *va, uint32_t size,
+ {
+ map_base = PFN_DOWN(pa + i);
+ set_fixmap(FIX_TBOOT_MAP_ADDRESS, map_base << PAGE_SHIFT);
+- map_addr = (unsigned char *)fix_to_virt(FIX_TBOOT_MAP_ADDRESS);
++ map_addr = fix_to_virt(FIX_TBOOT_MAP_ADDRESS);
+ }
+ va[i] = map_addr[pa + i - (map_base << PAGE_SHIFT)];
+ }
+@@ -98,7 +98,7 @@ void __init tboot_probe(void)
+
+ /* Map and check for tboot UUID. */
+ set_fixmap(FIX_TBOOT_SHARED_BASE, opt_tboot_pa);
+- tboot_shared = (tboot_shared_t *)fix_to_virt(FIX_TBOOT_SHARED_BASE);
++ tboot_shared = fix_to_virt(FIX_TBOOT_SHARED_BASE);
+ if ( tboot_shared == NULL )
+ return;
+ if ( memcmp(&tboot_shared_uuid, (uuid_t *)tboot_shared, sizeof(uuid_t)) )
+diff --git a/xen/drivers/acpi/apei/apei-io.c b/xen/drivers/acpi/apei/apei-io.c
+index 8955de935e..89b70f45ef 100644
+--- a/xen/drivers/acpi/apei/apei-io.c
++++ b/xen/drivers/acpi/apei/apei-io.c
+@@ -92,7 +92,7 @@ static void __iomem *__init apei_range_map(paddr_t paddr, unsigned long
size)
+ apei_range_nr++;
+ }
+
+- return (void __iomem *)fix_to_virt(FIX_APEI_RANGE_BASE + start_nr);
++ return fix_to_virt(FIX_APEI_RANGE_BASE + start_nr);
+ }
+
+ /*
+diff --git a/xen/drivers/char/ehci-dbgp.c b/xen/drivers/char/ehci-dbgp.c
+index d48e777c34..d0071d3114 100644
+--- a/xen/drivers/char/ehci-dbgp.c
++++ b/xen/drivers/char/ehci-dbgp.c
+@@ -1327,7 +1327,7 @@ static void __init ehci_dbgp_init_preirq(struct serial_port *port)
+ * than enough. 1k is the biggest that was seen.
+ */
+ set_fixmap_nocache(FIX_EHCI_DBGP, dbgp->bar_val);
+- ehci_bar = (void __iomem *)fix_to_virt(FIX_EHCI_DBGP);
++ ehci_bar = fix_to_virt(FIX_EHCI_DBGP);
+ ehci_bar += dbgp->bar_val & ~PAGE_MASK;
+ dbgp_printk("ehci_bar: %p\n", ehci_bar);
+
+diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
+index e0f8199f98..f32dbd3247 100644
+--- a/xen/drivers/char/ns16550.c
++++ b/xen/drivers/char/ns16550.c
+@@ -697,7 +697,7 @@ static void __init ns16550_init_preirq(struct serial_port *port)
+ enum fixed_addresses idx = FIX_COM_BEGIN + (uart - ns16550_com);
+
+ set_fixmap_nocache(idx, uart->io_base);
+- uart->remapped_io_base = (void __iomem *)fix_to_virt(idx);
++ uart->remapped_io_base = fix_to_virt(idx);
+ uart->remapped_io_base += uart->io_base & ~PAGE_MASK;
+ #else
+ uart->remapped_io_base = (char *)ioremap(uart->io_base,
uart->io_size);
+diff --git a/xen/include/asm-x86/apicdef.h b/xen/include/asm-x86/apicdef.h
+index eed504a31a..2fa0b77a8a 100644
+--- a/xen/include/asm-x86/apicdef.h
++++ b/xen/include/asm-x86/apicdef.h
+@@ -119,7 +119,7 @@
+ /* Only available in x2APIC mode */
+ #define APIC_SELF_IPI 0x3F0
+
+-#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
++#define APIC_BASE __fix_to_virt(FIX_APIC_BASE)
+
+ /* It's only used in x2APIC mode of an x2APIC unit. */
+ #define APIC_MSR_BASE 0x800
+diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h
+index 89bf6cb611..51b0e7e945 100644
+--- a/xen/include/asm-x86/fixmap.h
++++ b/xen/include/asm-x86/fixmap.h
+@@ -79,7 +79,7 @@ extern void __set_fixmap(
+ #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+ #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+-#define fix_to_virt(x) (__fix_to_virt(x))
++#define fix_to_virt(x) ((void *)__fix_to_virt(x))
+
+ static inline unsigned long virt_to_fix(const unsigned long vaddr)
+ {
+--
+2.14.3
+
+
+From b538a13a68b42dbe47832d76299011765bf59e60 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Thu, 11 Jan 2018 17:48:00 +0000
+Subject: [PATCH 26/77] x86: Common cpuid faulting support
+
+With CPUID Faulting offered to SVM guests, move Xen's faulting code to being
+common rather than Intel specific.
+
+This is necessary for nested Xen (inc. pv-shim mode) to prevent PV guests from
+finding the outer HVM Xen leaves via native cpuid.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ xen/arch/x86/cpu/amd.c | 16 +++++---
+ xen/arch/x86/cpu/common.c | 76 ++++++++++++++++++++++++++++++++++++--
+ xen/arch/x86/cpu/intel.c | 82 +++++++----------------------------------
+ xen/include/asm-x86/cpuid.h | 3 --
+ xen/include/asm-x86/processor.h | 4 +-
+ 5 files changed, 98 insertions(+), 83 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 5f36ac75a7..2bff3ee377 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -198,11 +198,12 @@ static void __init noinline probe_masking_msrs(void)
+ }
+
+ /*
+- * Context switch levelling state to the next domain. A parameter of NULL is
+- * used to context switch to the default host state (by the cpu bringup-code,
+- * crash path, etc).
++ * Context switch CPUID masking state to the next domain. Only called if
++ * CPUID Faulting isn't available, but masking MSRs have been detected. A
++ * parameter of NULL is used to context switch to the default host state (by
++ * the cpu bringup-code, crash path, etc).
+ */
+-static void amd_ctxt_switch_levelling(const struct vcpu *next)
++static void amd_ctxt_switch_masking(const struct vcpu *next)
+ {
+ struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
+ const struct domain *nextd = next ? next->domain : NULL;
+@@ -263,6 +264,9 @@ static void __init noinline amd_init_levelling(void)
+ {
+ const struct cpuidmask *m = NULL;
+
++ if (probe_cpuid_faulting())
++ return;
++
+ probe_masking_msrs();
+
+ if (*opt_famrev != '\0') {
+@@ -352,7 +356,7 @@ static void __init noinline amd_init_levelling(void)
+ }
+
+ if (levelling_caps)
+- ctxt_switch_levelling = amd_ctxt_switch_levelling;
++ ctxt_switch_masking = amd_ctxt_switch_masking;
+ }
+
+ /*
+@@ -518,7 +522,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
+ if (c == &boot_cpu_data)
+ amd_init_levelling();
+
+- amd_ctxt_switch_levelling(NULL);
++ ctxt_switch_levelling(NULL);
+ }
+
+ static void init_amd(struct cpuinfo_x86 *c)
+diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
+index 6cf362849e..157bae2026 100644
+--- a/xen/arch/x86/cpu/common.c
++++ b/xen/arch/x86/cpu/common.c
+@@ -113,12 +113,80 @@ static const struct cpu_dev default_cpu = {
+ };
+ static const struct cpu_dev *this_cpu = &default_cpu;
+
+-static void default_ctxt_switch_levelling(const struct vcpu *next)
++static DEFINE_PER_CPU(uint64_t, msr_misc_features);
++void (* __read_mostly ctxt_switch_masking)(const struct vcpu *next);
++
++bool __init probe_cpuid_faulting(void)
++{
++ uint64_t val;
++
++ if (rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val) ||
++ !(val & MSR_PLATFORM_INFO_CPUID_FAULTING) ||
++ rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES,
++ this_cpu(msr_misc_features)))
++ {
++ setup_clear_cpu_cap(X86_FEATURE_CPUID_FAULTING);
++ return false;
++ }
++
++ expected_levelling_cap |= LCAP_faulting;
++ levelling_caps |= LCAP_faulting;
++ setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING);
++
++ return true;
++}
++
++static void set_cpuid_faulting(bool enable)
++{
++ uint64_t *this_misc_features = &this_cpu(msr_misc_features);
++ uint64_t val = *this_misc_features;
++
++ if (!!(val & MSR_MISC_FEATURES_CPUID_FAULTING) == enable)
++ return;
++
++ val ^= MSR_MISC_FEATURES_CPUID_FAULTING;
++
++ wrmsrl(MSR_INTEL_MISC_FEATURES_ENABLES, val);
++ *this_misc_features = val;
++}
++
++void ctxt_switch_levelling(const struct vcpu *next)
+ {
+- /* Nop */
++ const struct domain *nextd = next ? next->domain : NULL;
++
++ if (cpu_has_cpuid_faulting) {
++ /*
++ * No need to alter the faulting setting if we are switching
++ * to idle; it won't affect any code running in idle context.
++ */
++ if (nextd && is_idle_domain(nextd))
++ return;
++ /*
++ * We *should* be enabling faulting for the control domain.
++ *
++ * Unfortunately, the domain builder (having only ever been a
++ * PV guest) expects to be able to see host cpuid state in a
++ * native CPUID instruction, to correctly build a CPUID policy
++ * for HVM guests (notably the xstate leaves).
++ *
++ * This logic is fundimentally broken for HVM toolstack
++ * domains, and faulting causes PV guests to behave like HVM
++ * guests from their point of view.
++ *
++ * Future development plans will move responsibility for
++ * generating the maximum full cpuid policy into Xen, at which
++ * this problem will disappear.
++ */
++ set_cpuid_faulting(nextd && !is_control_domain(nextd) &&
++ (is_pv_domain(nextd) ||
++ next->arch.msr->
++ misc_features_enables.cpuid_faulting));
++ return;
++ }
++
++ if (ctxt_switch_masking)
++ ctxt_switch_masking(next);
+ }
+-void (* __read_mostly ctxt_switch_levelling)(const struct vcpu *next) =
+- default_ctxt_switch_levelling;
+
+ bool_t opt_cpu_info;
+ boolean_param("cpuinfo", opt_cpu_info);
+diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c
+index ac932e5b38..508e56f5c1 100644
+--- a/xen/arch/x86/cpu/intel.c
++++ b/xen/arch/x86/cpu/intel.c
+@@ -17,41 +17,6 @@
+
+ #define select_idle_routine(x) ((void)0)
+
+-static bool __init probe_intel_cpuid_faulting(void)
+-{
+- uint64_t x;
+-
+- if (rdmsr_safe(MSR_INTEL_PLATFORM_INFO, x) ||
+- !(x & MSR_PLATFORM_INFO_CPUID_FAULTING))
+- return 0;
+-
+- expected_levelling_cap |= LCAP_faulting;
+- levelling_caps |= LCAP_faulting;
+- setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING);
+- return 1;
+-}
+-
+-DEFINE_PER_CPU(bool, cpuid_faulting_enabled);
+-
+-static void set_cpuid_faulting(bool enable)
+-{
+- bool *this_enabled = &this_cpu(cpuid_faulting_enabled);
+- uint32_t hi, lo;
+-
+- ASSERT(cpu_has_cpuid_faulting);
+-
+- if (*this_enabled == enable)
+- return;
+-
+- rdmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi);
+- lo &= ~MSR_MISC_FEATURES_CPUID_FAULTING;
+- if (enable)
+- lo |= MSR_MISC_FEATURES_CPUID_FAULTING;
+- wrmsr(MSR_INTEL_MISC_FEATURES_ENABLES, lo, hi);
+-
+- *this_enabled = enable;
+-}
+-
+ /*
+ * Set caps in expected_levelling_cap, probe a specific masking MSR, and set
+ * caps in levelling_caps if it is found, or clobber the MSR index if missing.
+@@ -147,40 +112,17 @@ static void __init probe_masking_msrs(void)
+ }
+
+ /*
+- * Context switch levelling state to the next domain. A parameter of NULL is
+- * used to context switch to the default host state (by the cpu bringup-code,
+- * crash path, etc).
++ * Context switch CPUID masking state to the next domain. Only called if
++ * CPUID Faulting isn't available, but masking MSRs have been detected. A
++ * parameter of NULL is used to context switch to the default host state (by
++ * the cpu bringup-code, crash path, etc).
+ */
+-static void intel_ctxt_switch_levelling(const struct vcpu *next)
++static void intel_ctxt_switch_masking(const struct vcpu *next)
+ {
+ struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
+ const struct domain *nextd = next ? next->domain : NULL;
+- const struct cpuidmasks *masks;
+-
+- if (cpu_has_cpuid_faulting) {
+- /*
+- * We *should* be enabling faulting for the control domain.
+- *
+- * Unfortunately, the domain builder (having only ever been a
+- * PV guest) expects to be able to see host cpuid state in a
+- * native CPUID instruction, to correctly build a CPUID policy
+- * for HVM guests (notably the xstate leaves).
+- *
+- * This logic is fundimentally broken for HVM toolstack
+- * domains, and faulting causes PV guests to behave like HVM
+- * guests from their point of view.
+- *
+- * Future development plans will move responsibility for
+- * generating the maximum full cpuid policy into Xen, at which
+- * this problem will disappear.
+- */
+- set_cpuid_faulting(nextd && !is_control_domain(nextd) &&
+- (is_pv_domain(nextd) ||
+- next->arch.msr->misc_features_enables.cpuid_faulting));
+- return;
+- }
+-
+- masks = (nextd && is_pv_domain(nextd) &&
nextd->arch.pv_domain.cpuidmasks)
++ const struct cpuidmasks *masks =
++ (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
+ ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
+
+ if (msr_basic) {
+@@ -225,8 +167,10 @@ static void intel_ctxt_switch_levelling(const struct vcpu *next)
+ */
+ static void __init noinline intel_init_levelling(void)
+ {
+- if (!probe_intel_cpuid_faulting())
+- probe_masking_msrs();
++ if (probe_cpuid_faulting())
++ return;
++
++ probe_masking_msrs();
+
+ if (msr_basic) {
+ uint32_t ecx, edx, tmp;
+@@ -280,7 +224,7 @@ static void __init noinline intel_init_levelling(void)
+ }
+
+ if (levelling_caps)
+- ctxt_switch_levelling = intel_ctxt_switch_levelling;
++ ctxt_switch_masking = intel_ctxt_switch_masking;
+ }
+
+ static void early_init_intel(struct cpuinfo_x86 *c)
+@@ -320,7 +264,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
+ if (c == &boot_cpu_data)
+ intel_init_levelling();
+
+- intel_ctxt_switch_levelling(NULL);
++ ctxt_switch_levelling(NULL);
+ }
+
+ /*
+diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h
+index d2dd841e15..74d6f123e5 100644
+--- a/xen/include/asm-x86/cpuid.h
++++ b/xen/include/asm-x86/cpuid.h
+@@ -58,9 +58,6 @@ DECLARE_PER_CPU(struct cpuidmasks, cpuidmasks);
+ /* Default masking MSR values, calculated at boot. */
+ extern struct cpuidmasks cpuidmask_defaults;
+
+-/* Whether or not cpuid faulting is available for the current domain. */
+-DECLARE_PER_CPU(bool, cpuid_faulting_enabled);
+-
+ #define CPUID_GUEST_NR_BASIC (0xdu + 1)
+ #define CPUID_GUEST_NR_FEAT (0u + 1)
+ #define CPUID_GUEST_NR_CACHE (5u + 1)
+diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
+index 41a8d8c32f..c9601b2fb2 100644
+--- a/xen/include/asm-x86/processor.h
++++ b/xen/include/asm-x86/processor.h
+@@ -151,7 +151,9 @@ extern struct cpuinfo_x86 boot_cpu_data;
+ extern struct cpuinfo_x86 cpu_data[];
+ #define current_cpu_data cpu_data[smp_processor_id()]
+
+-extern void (*ctxt_switch_levelling)(const struct vcpu *next);
++extern bool probe_cpuid_faulting(void);
++extern void ctxt_switch_levelling(const struct vcpu *next);
++extern void (*ctxt_switch_masking)(const struct vcpu *next);
+
+ extern u64 host_pat;
+ extern bool_t opt_cpu_info;
+--
+2.14.3
+
+
+From af2f50b2b6f284a5498bcfe8e4203b25e120338e Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Fri, 10 Nov 2017 16:35:26 +0000
+Subject: [PATCH 27/77] x86/Kconfig: Options for Xen and PVH support
+
+Introduce two options. One to detect whether the binary is running on
+Xen, the other enables PVH ABI support.
+
+The former will be useful to PV in HVM approach. Both will be used by
+PV in PVH approach.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+v2:
+Write commit message. Didn't change the config option value as it
+requires a lot of changes in later patches.
+---
+ xen/arch/x86/Kconfig | 17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
+index 7c4582922f..c0b0bcdcb3 100644
+--- a/xen/arch/x86/Kconfig
++++ b/xen/arch/x86/Kconfig
+@@ -117,6 +117,23 @@ config TBOOT
+ Technology (TXT)
+
+ If unsure, say Y.
++
++config XEN_GUEST
++ def_bool n
++ prompt "Xen Guest"
++ ---help---
++ Support for Xen detecting when it is running under Xen.
++
++ If unsure, say N.
++
++config PVH_GUEST
++ def_bool n
++ prompt "PVH Guest"
++ depends on XEN_GUEST
++ ---help---
++ Support booting using the PVH ABI.
++
++ If unsure, say N.
+ endmenu
+
+ source "common/Kconfig"
+--
+2.14.3
+
+
+From f575701f3c7a6c6afde7c289058d9d3110a617d1 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 22 Nov 2017 11:09:41 +0000
+Subject: [PATCH 28/77] x86/link: Relocate program headers
+
+When the xen binary is loaded by libelf (in the future) we rely on the
+elf loader to load the binary accordingly. Specify the load address so
+that the resulting binary can make p_vaddr and p_paddr have different
+values.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+v2:
+Clarify commit message. Haven't tested grub1 boot.
+---
+ xen/arch/x86/xen.lds.S | 22 +++++++++++++---------
+ 1 file changed, 13 insertions(+), 9 deletions(-)
+
+diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
+index 6164ad094f..400d8a56c4 100644
+--- a/xen/arch/x86/xen.lds.S
++++ b/xen/arch/x86/xen.lds.S
+@@ -13,6 +13,7 @@
+ #undef __XEN_VIRT_START
+ #define __XEN_VIRT_START __image_base__
+ #define SECTION_ALIGN MB(2)
++#define DECL_SECTION(x) x :
+
+ ENTRY(efi_start)
+
+@@ -20,8 +21,9 @@ ENTRY(efi_start)
+
+ #define FORMAT "elf64-x86-64"
+ #define SECTION_ALIGN PAGE_SIZE
++#define DECL_SECTION(x) x : AT(ADDR(x) - __XEN_VIRT_START)
+
+-ENTRY(start)
++ENTRY(start_pa)
+
+ #endif /* EFI */
+
+@@ -56,9 +58,11 @@ SECTIONS
+ __2M_text_start = .; /* Start of 2M superpages, mapped RX. */
+ #endif
+
++ start_pa = ABSOLUTE(start - __XEN_VIRT_START);
++
+ . = __XEN_VIRT_START + XEN_IMG_OFFSET;
+ _start = .;
+- .text : {
++ DECL_SECTION(.text) {
+ _stext = .; /* Text and read-only data */
+ *(.text)
+ *(.text.cold)
+@@ -73,7 +77,7 @@ SECTIONS
+ __2M_text_end = .;
+
+ __2M_rodata_start = .; /* Start of 2M superpages, mapped RO. */
+- .rodata : {
++ DECL_SECTION(.rodata) {
+ _srodata = .;
+ /* Bug frames table */
+ __start_bug_frames = .;
+@@ -132,13 +136,13 @@ SECTIONS
+ * compiler may want to inject other things in the .note which we don't care
+ * about - hence this unique name.
+ */
+- .note.gnu.build-id : {
++ DECL_SECTION(.note.gnu.build-id) {
+ __note_gnu_build_id_start = .;
+ *(.note.gnu.build-id)
+ __note_gnu_build_id_end = .;
+ } :note :text
+ #elif defined(BUILD_ID_EFI)
+- .buildid : {
++ DECL_SECTION(.buildid) {
+ __note_gnu_build_id_start = .;
+ *(.buildid)
+ __note_gnu_build_id_end = .;
+@@ -153,7 +157,7 @@ SECTIONS
+ __2M_init_start = .; /* Start of 2M superpages, mapped RWX (boot only). */
+ . = ALIGN(PAGE_SIZE); /* Init code and data */
+ __init_begin = .;
+- .init : {
++ DECL_SECTION(.init) {
+ _sinittext = .;
+ *(.init.text)
+ /*
+@@ -215,7 +219,7 @@ SECTIONS
+
+ __2M_rwdata_start = .; /* Start of 2M superpages, mapped RW. */
+ . = ALIGN(SMP_CACHE_BYTES);
+- .data.read_mostly : {
++ DECL_SECTION(.data.read_mostly) {
+ *(.data.read_mostly)
+ . = ALIGN(8);
+ __start_schedulers_array = .;
+@@ -223,7 +227,7 @@ SECTIONS
+ __end_schedulers_array = .;
+ } :text
+
+- .data : { /* Data */
++ DECL_SECTION(.data) {
+ *(.data.page_aligned)
+ *(.data)
+ *(.data.rel)
+@@ -231,7 +235,7 @@ SECTIONS
+ CONSTRUCTORS
+ } :text
+
+- .bss : { /* BSS */
++ DECL_SECTION(.bss) {
+ __bss_start = .;
+ *(.bss.stack_aligned)
+ *(.bss.page_aligned*)
+--
+2.14.3
+
+
+From 887c705600114c502cd3b529659af085680f526a Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Fri, 10 Nov 2017 12:36:49 +0000
+Subject: [PATCH 29/77] x86: introduce ELFNOTE macro
+
+It is needed later for introducing PVH entry point.
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+v2:
+1. Specify section attribute and type.
+2. Use p2align.
+3. Align instructions.
+4. Haven't used .L or turned it into assembly macro.
+---
+ xen/include/asm-x86/asm_defns.h | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
+index 388fc93b9d..35a5d9ee03 100644
+--- a/xen/include/asm-x86/asm_defns.h
++++ b/xen/include/asm-x86/asm_defns.h
+@@ -409,4 +409,16 @@ static always_inline void stac(void)
+ #define REX64_PREFIX "rex64/"
+ #endif
+
++#define ELFNOTE(name, type, desc) \
++ .pushsection .note.name, "a", @note ; \
++ .p2align 2 ; \
++ .long 2f - 1f /* namesz */ ; \
++ .long 4f - 3f /* descsz */ ; \
++ .long type /* type */ ; \
++1: .asciz #name /* name */ ; \
++2: .p2align 2 ; \
++3: desc /* desc */ ; \
++4: .p2align 2 ; \
++ .popsection
++
+ #endif /* __X86_ASM_DEFNS_H__ */
+--
+2.14.3
+
+
+From 51f937a39bb6acadec1f4ab55f01048c2c1caee0 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Fri, 10 Nov 2017 16:19:40 +0000
+Subject: [PATCH 30/77] x86: produce a binary that can be booted as PVH
+
+Produce a binary that can be booted as PVH. It doesn't do much yet.
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+v2:
+1. Remove shim-y dependency.
+2. Remove extraneous blank line.
+3. Fix bugs in xen.lds.S.
+4. Haven't split code into pvh.S because that will break later
+ patches.
+---
+ .gitignore | 1 +
+ xen/arch/x86/Makefile | 8 ++++++++
+ xen/arch/x86/boot/head.S | 9 +++++++++
+ xen/arch/x86/xen.lds.S | 9 ++++++++-
+ 4 files changed, 26 insertions(+), 1 deletion(-)
+
+diff --git a/.gitignore b/.gitignore
+index d64b03d06c..8da67daf31 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -323,6 +323,7 @@ xen/xsm/flask/xenpolicy-*
+ tools/flask/policy/policy.conf
+ tools/flask/policy/xenpolicy-*
+ xen/xen
++xen/xen-shim
+ xen/xen-syms
+ xen/xen-syms.map
+ xen/xen.*
+diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
+index d5d58a205e..01d1178530 100644
+--- a/xen/arch/x86/Makefile
++++ b/xen/arch/x86/Makefile
+@@ -75,6 +75,8 @@ efi-y := $(shell if [ ! -r $(BASEDIR)/include/xen/compile.h -o \
+ -O $(BASEDIR)/include/xen/compile.h ]; then \
+ echo '$(TARGET).efi'; fi)
+
++shim-$(CONFIG_PVH_GUEST) := $(TARGET)-shim
++
+ ifneq ($(build_id_linker),)
+ notes_phdrs = --notes
+ else
+@@ -144,6 +146,11 @@ $(TARGET)-syms: prelink.o xen.lds $(BASEDIR)/common/symbols-dummy.o
+ >$(@D)/$((a)F).map
+ rm -f $(@D)/.$((a)F).[0-9]*
+
++# Use elf32-x86-64 if toolchain support exists, elf32-i386 otherwise.
++$(TARGET)-shim: FORMAT = $(firstword $(filter elf32-x86-64,$(shell $(OBJCOPY) --help))
elf32-i386)
++$(TARGET)-shim: $(TARGET)-syms
++ $(OBJCOPY) -O $(FORMAT) $< $@
++
+ note.o: $(TARGET)-syms
+ $(OBJCOPY) -O binary --only-section=.note.gnu.build-id $(BASEDIR)/xen-syms $@.bin
+ $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 \
+@@ -224,5 +231,6 @@ clean::
+ rm -f asm-offsets.s *.lds boot/*.o boot/*~ boot/core boot/mkelf32
+ rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
+ rm -f $(BASEDIR)/.xen.efi.[0-9]* efi/*.efi efi/disabled efi/mkreloc
++ rm -f $(BASEDIR)/xen-shim
+ rm -f boot/cmdline.S boot/reloc.S boot/*.lnk boot/*.bin
+ rm -f note.o
+diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
+index 9cc35da558..af25d23736 100644
+--- a/xen/arch/x86/boot/head.S
++++ b/xen/arch/x86/boot/head.S
+@@ -7,6 +7,7 @@
+ #include <asm/page.h>
+ #include <asm/msr.h>
+ #include <asm/cpufeature.h>
++#include <public/elfnote.h>
+
+ .text
+ .code32
+@@ -374,6 +375,14 @@ cs32_switch:
+ /* Jump to earlier loaded address. */
+ jmp *%edi
+
++#ifdef CONFIG_PVH_GUEST
++ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long sym_offs(__pvh_start))
++
++__pvh_start:
++ ud2a
++
++#endif /* CONFIG_PVH_GUEST */
++
+ __start:
+ cld
+ cli
+diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
+index 400d8a56c4..2023f971e4 100644
+--- a/xen/arch/x86/xen.lds.S
++++ b/xen/arch/x86/xen.lds.S
+@@ -34,7 +34,7 @@ OUTPUT_ARCH(i386:x86-64)
+ PHDRS
+ {
+ text PT_LOAD ;
+-#if defined(BUILD_ID) && !defined(EFI)
++#if (defined(BUILD_ID) || defined (CONFIG_PVH_GUEST)) && !defined(EFI)
+ note PT_NOTE ;
+ #endif
+ }
+@@ -128,6 +128,12 @@ SECTIONS
+ __param_end = .;
+ } :text
+
++#if defined(CONFIG_PVH_GUEST) && !defined(EFI)
++ DECL_SECTION(.note.Xen) {
++ *(.note.Xen)
++ } :note :text
++#endif
++
+ #if defined(BUILD_ID)
+ #if !defined(EFI)
+ /*
+@@ -279,6 +285,7 @@ SECTIONS
+ #ifdef EFI
+ *(.comment)
+ *(.comment.*)
++ *(.note.Xen)
+ #endif
+ }
+
+--
+2.14.3
+
+
+From db65173fe73568d0c718ce2a1c3ef8dc69c66b99 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Mon, 13 Nov 2017 17:32:19 +0000
+Subject: [PATCH 31/77] x86/entry: Early PVH boot code
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+v2:
+1. Fix comment.
+2. Use cmpb $0.
+3. Address comments on pvh-boot.c.
+4. Haven't changed the pritnk modifiers to accommodate future changes.
+5. Missing a prerequisite patch to relocate pvh_info to make __va work reliably.
+ [BLOCKER].
+---
+ xen/arch/x86/Makefile | 1 +
+ xen/arch/x86/boot/head.S | 40 +++++++++++-
+ xen/arch/x86/boot/x86_64.S | 2 +-
+ xen/arch/x86/guest/Makefile | 1 +
+ xen/arch/x86/guest/pvh-boot.c | 119 +++++++++++++++++++++++++++++++++++
+ xen/arch/x86/setup.c | 18 +++++-
+ xen/include/asm-x86/guest.h | 34 ++++++++++
+ xen/include/asm-x86/guest/pvh-boot.h | 57 +++++++++++++++++
+ 8 files changed, 268 insertions(+), 4 deletions(-)
+ create mode 100644 xen/arch/x86/guest/Makefile
+ create mode 100644 xen/arch/x86/guest/pvh-boot.c
+ create mode 100644 xen/include/asm-x86/guest.h
+ create mode 100644 xen/include/asm-x86/guest/pvh-boot.h
+
+diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
+index 01d1178530..ac91e13606 100644
+--- a/xen/arch/x86/Makefile
++++ b/xen/arch/x86/Makefile
+@@ -1,6 +1,7 @@
+ subdir-y += acpi
+ subdir-y += cpu
+ subdir-y += genapic
++subdir-$(CONFIG_XEN_GUEST) += guest
+ subdir-$(CONFIG_HVM) += hvm
+ subdir-y += mm
+ subdir-$(CONFIG_XENOPROF) += oprofile
+diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
+index af25d23736..14caca6798 100644
+--- a/xen/arch/x86/boot/head.S
++++ b/xen/arch/x86/boot/head.S
+@@ -379,7 +379,39 @@ cs32_switch:
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long sym_offs(__pvh_start))
+
+ __pvh_start:
+- ud2a
++ cld
++ cli
++
++ /*
++ * We need one push/pop to determine load address. Use the same
++ * absolute stack address as the native path, for lack of a better
++ * alternative.
++ */
++ mov $0x1000, %esp
++
++ /* Calculate the load base address. */
++ call 1f
++1: pop %esi
++ sub $sym_offs(1b), %esi
++
++ /* Set up stack. */
++ lea STACK_SIZE + sym_esi(cpu0_stack), %esp
++
++ mov %ebx, sym_esi(pvh_start_info_pa)
++
++ /* Prepare gdt and segments */
++ add %esi, sym_esi(gdt_boot_base)
++ lgdt sym_esi(gdt_boot_descr)
++
++ mov $BOOT_DS, %ecx
++ mov %ecx, %ds
++ mov %ecx, %es
++ mov %ecx, %ss
++
++ /* Skip bootloader setup and bios setup, go straight to trampoline */
++ movb $1, sym_esi(pvh_boot)
++ movb $1, sym_esi(skip_realmode)
++ jmp trampoline_setup
+
+ #endif /* CONFIG_PVH_GUEST */
+
+@@ -543,12 +575,18 @@ trampoline_setup:
+ /* Get bottom-most low-memory stack address. */
+ add $TRAMPOLINE_SPACE,%ecx
+
++#ifdef CONFIG_PVH_GUEST
++ cmpb $0, sym_fs(pvh_boot)
++ jne 1f
++#endif
++
+ /* Save the Multiboot info struct (after relocation) for later use. */
+ push %ecx /* Bottom-most low-memory stack address. */
+ push %ebx /* Multiboot information address. */
+ push %eax /* Multiboot magic. */
+ call reloc
+ mov %eax,sym_fs(multiboot_ptr)
++1:
+
+ /*
+ * Now trampoline_phys points to the following structure (lowest address
+diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
+index 925fd4bb0a..cf47e019f5 100644
+--- a/xen/arch/x86/boot/x86_64.S
++++ b/xen/arch/x86/boot/x86_64.S
+@@ -31,7 +31,7 @@ ENTRY(__high_start)
+ test %ebx,%ebx
+ jnz start_secondary
+
+- /* Pass off the Multiboot info structure to C land. */
++ /* Pass off the Multiboot info structure to C land (if applicable). */
+ mov multiboot_ptr(%rip),%edi
+ call __start_xen
+ BUG /* __start_xen() shouldn't return. */
+diff --git a/xen/arch/x86/guest/Makefile b/xen/arch/x86/guest/Makefile
+new file mode 100644
+index 0000000000..a5f1625ab1
+--- /dev/null
++++ b/xen/arch/x86/guest/Makefile
+@@ -0,0 +1 @@
++obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o
+diff --git a/xen/arch/x86/guest/pvh-boot.c b/xen/arch/x86/guest/pvh-boot.c
+new file mode 100644
+index 0000000000..186e332657
+--- /dev/null
++++ b/xen/arch/x86/guest/pvh-boot.c
+@@ -0,0 +1,119 @@
++/******************************************************************************
++ * arch/x86/guest/pvh-boot.c
++ *
++ * PVH boot time support
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++#include <xen/init.h>
++#include <xen/lib.h>
++#include <xen/mm.h>
++
++#include <asm/guest.h>
++
++#include <public/arch-x86/hvm/start_info.h>
++
++/* Initialised in head.S, before .bss is zeroed. */
++bool __initdata pvh_boot;
++uint32_t __initdata pvh_start_info_pa;
++
++static multiboot_info_t __initdata pvh_mbi;
++static module_t __initdata pvh_mbi_mods[8];
++static const char *__initdata pvh_loader = "PVH Directboot";
++
++static void __init convert_pvh_info(void)
++{
++ const struct hvm_start_info *pvh_info = __va(pvh_start_info_pa);
++ const struct hvm_modlist_entry *entry;
++ module_t *mod;
++ unsigned int i;
++
++ ASSERT(pvh_info->magic == XEN_HVM_START_MAGIC_VALUE);
++
++ /*
++ * Turn hvm_start_info into mbi. Luckily all modules are placed under 4GB
++ * boundary on x86.
++ */
++ pvh_mbi.flags = MBI_CMDLINE | MBI_MODULES | MBI_LOADERNAME;
++
++ BUG_ON(pvh_info->cmdline_paddr >> 32);
++ pvh_mbi.cmdline = pvh_info->cmdline_paddr;
++ pvh_mbi.boot_loader_name = __pa(pvh_loader);
++
++ BUG_ON(pvh_info->nr_modules >= ARRAY_SIZE(pvh_mbi_mods));
++ pvh_mbi.mods_count = pvh_info->nr_modules;
++ pvh_mbi.mods_addr = __pa(pvh_mbi_mods);
++
++ mod = pvh_mbi_mods;
++ entry = __va(pvh_info->modlist_paddr);
++ for ( i = 0; i < pvh_info->nr_modules; i++ )
++ {
++ BUG_ON(entry[i].paddr >> 32);
++ BUG_ON(entry[i].cmdline_paddr >> 32);
++
++ mod[i].mod_start = entry[i].paddr;
++ mod[i].mod_end = entry[i].paddr + entry[i].size;
++ mod[i].string = entry[i].cmdline_paddr;
++ }
++}
++
++multiboot_info_t *__init pvh_init(void)
++{
++ convert_pvh_info();
++
++ return &pvh_mbi;
++}
++
++void __init pvh_print_info(void)
++{
++ const struct hvm_start_info *pvh_info = __va(pvh_start_info_pa);
++ const struct hvm_modlist_entry *entry;
++ unsigned int i;
++
++ ASSERT(pvh_info->magic == XEN_HVM_START_MAGIC_VALUE);
++
++ printk("PVH start info: (pa %08x)\n", pvh_start_info_pa);
++ printk(" version: %u\n", pvh_info->version);
++ printk(" flags: %#"PRIx32"\n", pvh_info->flags);
++ printk(" nr_modules: %u\n", pvh_info->nr_modules);
++ printk(" modlist_pa: %016"PRIx64"\n",
pvh_info->modlist_paddr);
++ printk(" cmdline_pa: %016"PRIx64"\n",
pvh_info->cmdline_paddr);
++ if ( pvh_info->cmdline_paddr )
++ printk(" cmdline: '%s'\n", (char
*)__va(pvh_info->cmdline_paddr));
++ printk(" rsdp_pa: %016"PRIx64"\n",
pvh_info->rsdp_paddr);
++
++ entry = __va(pvh_info->modlist_paddr);
++ for ( i = 0; i < pvh_info->nr_modules; i++ )
++ {
++ printk(" mod[%u].pa: %016"PRIx64"\n", i,
entry[i].paddr);
++ printk(" mod[%u].size: %016"PRIu64"\n", i,
entry[i].size);
++ printk(" mod[%u].cmdline_pa: %016"PRIx64"\n",
++ i, entry[i].cmdline_paddr);
++ if ( entry[i].cmdline_paddr )
++ printk(" mod[%1u].cmdline: '%s'\n", i,
++ (char *)__va(entry[i].cmdline_paddr));
++ }
++}
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index 2e10c6bdf4..4b8d09b751 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -51,6 +51,7 @@
+ #include <asm/alternative.h>
+ #include <asm/mc146818rtc.h>
+ #include <asm/cpuid.h>
++#include <asm/guest.h>
+
+ /* opt_nosmp: If true, secondary processors are ignored. */
+ static bool __initdata opt_nosmp;
+@@ -649,8 +650,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ char *memmap_type = NULL;
+ char *cmdline, *kextra, *loader;
+ unsigned int initrdidx, domcr_flags = DOMCRF_s3_integrity;
+- multiboot_info_t *mbi = __va(mbi_p);
+- module_t *mod = (module_t *)__va(mbi->mods_addr);
++ multiboot_info_t *mbi;
++ module_t *mod;
+ unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
+ int i, j, e820_warn = 0, bytes = 0;
+ bool acpi_boot_table_init_done = false, relocated = false;
+@@ -680,6 +681,16 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+
+ /* Full exception support from here on in. */
+
++ if ( pvh_boot )
++ {
++ ASSERT(mbi_p == 0);
++ mbi = pvh_init();
++ }
++ else
++ mbi = __va(mbi_p);
++
++ mod = __va(mbi->mods_addr);
++
+ loader = (mbi->flags & MBI_LOADERNAME)
+ ? (char *)__va(mbi->boot_loader_name) : "unknown";
+
+@@ -719,6 +730,9 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ ehci_dbgp_init();
+ console_init_preirq();
+
++ if ( pvh_boot )
++ pvh_print_info();
++
+ printk("Bootloader: %s\n", loader);
+
+ printk("Command line: %s\n", cmdline);
+diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h
+new file mode 100644
+index 0000000000..630c092c25
+--- /dev/null
++++ b/xen/include/asm-x86/guest.h
+@@ -0,0 +1,34 @@
++/******************************************************************************
++ * asm-x86/guest.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms and conditions of the GNU General Public
++ * License, version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++
++#ifndef __X86_GUEST_H__
++#define __X86_GUEST_H__
++
++#include <asm/guest/pvh-boot.h>
++
++#endif /* __X86_GUEST_H__ */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/include/asm-x86/guest/pvh-boot.h b/xen/include/asm-x86/guest/pvh-boot.h
+new file mode 100644
+index 0000000000..1b429f9401
+--- /dev/null
++++ b/xen/include/asm-x86/guest/pvh-boot.h
+@@ -0,0 +1,57 @@
++/******************************************************************************
++ * asm-x86/guest/pvh-boot.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms and conditions of the GNU General Public
++ * License, version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++
++#ifndef __X86_PVH_BOOT_H__
++#define __X86_PVH_BOOT_H__
++
++#include <xen/multiboot.h>
++
++#ifdef CONFIG_PVH_GUEST
++
++extern bool pvh_boot;
++
++multiboot_info_t *pvh_init(void);
++void pvh_print_info(void);
++
++#else
++
++#define pvh_boot 0
++
++static inline multiboot_info_t *pvh_init(void)
++{
++ ASSERT_UNREACHABLE();
++ return NULL;
++}
++
++static inline void pvh_print_info(void)
++{
++ ASSERT_UNREACHABLE();
++}
++
++#endif /* CONFIG_PVH_GUEST */
++#endif /* __X86_PVH_BOOT_H__ */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+--
+2.14.3
+
+
+From 31b664a93f5efd8f40889d04028881c18b76a5a3 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 22 Nov 2017 11:39:04 +0000
+Subject: [PATCH 32/77] x86/boot: Map more than the first 16MB
+
+TODO: Replace somehow (bootstrap_map() ?)
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ xen/arch/x86/boot/x86_64.S | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
+index cf47e019f5..42636cf334 100644
+--- a/xen/arch/x86/boot/x86_64.S
++++ b/xen/arch/x86/boot/x86_64.S
+@@ -114,11 +114,10 @@ GLOBAL(__page_tables_start)
+ GLOBAL(l2_identmap)
+ .quad sym_offs(l1_identmap) + __PAGE_HYPERVISOR
+ idx = 1
+- .rept 7
++ .rept 4 * L2_PAGETABLE_ENTRIES - 1
+ .quad (idx << L2_PAGETABLE_SHIFT) | PAGE_HYPERVISOR | _PAGE_PSE
+ idx = idx + 1
+ .endr
+- .fill 4 * L2_PAGETABLE_ENTRIES - 8, 8, 0
+ .size l2_identmap, . - l2_identmap
+
+ /*
+--
+2.14.3
+
+
+From 3d1afab1f6a092006b5bbd36a84186203989d846 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Tue, 28 Nov 2017 14:53:51 +0000
+Subject: [PATCH 33/77] x86/entry: Probe for Xen early during boot
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+v2: Add __read_mostly.
+---
+ xen/arch/x86/guest/Makefile | 2 ++
+ xen/arch/x86/guest/xen.c | 75 +++++++++++++++++++++++++++++++++++++++++
+ xen/arch/x86/setup.c | 2 ++
+ xen/include/asm-x86/guest.h | 1 +
+ xen/include/asm-x86/guest/xen.h | 47 ++++++++++++++++++++++++++
+ 5 files changed, 127 insertions(+)
+ create mode 100644 xen/arch/x86/guest/xen.c
+ create mode 100644 xen/include/asm-x86/guest/xen.h
+
+diff --git a/xen/arch/x86/guest/Makefile b/xen/arch/x86/guest/Makefile
+index a5f1625ab1..1345a60c81 100644
+--- a/xen/arch/x86/guest/Makefile
++++ b/xen/arch/x86/guest/Makefile
+@@ -1 +1,3 @@
++obj-y += xen.o
++
+ obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+new file mode 100644
+index 0000000000..8507757841
+--- /dev/null
++++ b/xen/arch/x86/guest/xen.c
+@@ -0,0 +1,75 @@
++/******************************************************************************
++ * arch/x86/guest/xen.c
++ *
++ * Support for detecting and running under Xen.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++#include <xen/init.h>
++#include <xen/types.h>
++
++#include <asm/guest.h>
++#include <asm/processor.h>
++
++#include <public/arch-x86/cpuid.h>
++
++bool __read_mostly xen_guest;
++
++static __read_mostly uint32_t xen_cpuid_base;
++
++static void __init find_xen_leaves(void)
++{
++ uint32_t eax, ebx, ecx, edx, base;
++
++ for ( base = XEN_CPUID_FIRST_LEAF;
++ base < XEN_CPUID_FIRST_LEAF + 0x10000; base += 0x100 )
++ {
++ cpuid(base, &eax, &ebx, &ecx, &edx);
++
++ if ( (ebx == XEN_CPUID_SIGNATURE_EBX) &&
++ (ecx == XEN_CPUID_SIGNATURE_ECX) &&
++ (edx == XEN_CPUID_SIGNATURE_EDX) &&
++ ((eax - base) >= 2) )
++ {
++ xen_cpuid_base = base;
++ break;
++ }
++ }
++}
++
++void __init probe_hypervisor(void)
++{
++ /* Too early to use cpu_has_hypervisor */
++ if ( !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) )
++ return;
++
++ find_xen_leaves();
++
++ if ( !xen_cpuid_base )
++ return;
++
++ xen_guest = true;
++}
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index 4b8d09b751..d8059f23b5 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -715,6 +715,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ * allocing any xenheap structures wanted in lower memory. */
+ kexec_early_calculations();
+
++ probe_hypervisor();
++
+ parse_video_info();
+
+ rdmsrl(MSR_EFER, this_cpu(efer));
+diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h
+index 630c092c25..8d91f81451 100644
+--- a/xen/include/asm-x86/guest.h
++++ b/xen/include/asm-x86/guest.h
+@@ -20,6 +20,7 @@
+ #define __X86_GUEST_H__
+
+ #include <asm/guest/pvh-boot.h>
++#include <asm/guest/xen.h>
+
+ #endif /* __X86_GUEST_H__ */
+
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+new file mode 100644
+index 0000000000..97a7c8d531
+--- /dev/null
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -0,0 +1,47 @@
++/******************************************************************************
++ * asm-x86/guest/xen.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms and conditions of the GNU General Public
++ * License, version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++
++#ifndef __X86_GUEST_XEN_H__
++#define __X86_GUEST_XEN_H__
++
++#include <xen/types.h>
++
++#ifdef CONFIG_XEN_GUEST
++
++extern bool xen_guest;
++
++void probe_hypervisor(void);
++
++#else
++
++#define xen_guest 0
++
++static inline void probe_hypervisor(void) {};
++
++#endif /* CONFIG_XEN_GUEST */
++#endif /* __X86_GUEST_XEN_H__ */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+--
+2.14.3
+
+
+From b38cc15b2f6170e0a8864aa9f151cc0e4b388c3f Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Tue, 21 Nov 2017 13:54:47 +0000
+Subject: [PATCH 34/77] x86/guest: Hypercall support
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+v2: append underscores to tmp.
+---
+ xen/arch/x86/guest/Makefile | 1 +
+ xen/arch/x86/guest/hypercall_page.S | 79 ++++++++++++++++++++++++++++++
+ xen/arch/x86/guest/xen.c | 5 ++
+ xen/arch/x86/xen.lds.S | 1 +
+ xen/include/asm-x86/guest.h | 1 +
+ xen/include/asm-x86/guest/hypercall.h | 92 +++++++++++++++++++++++++++++++++++
+ 6 files changed, 179 insertions(+)
+ create mode 100644 xen/arch/x86/guest/hypercall_page.S
+ create mode 100644 xen/include/asm-x86/guest/hypercall.h
+
+diff --git a/xen/arch/x86/guest/Makefile b/xen/arch/x86/guest/Makefile
+index 1345a60c81..26fb4b1007 100644
+--- a/xen/arch/x86/guest/Makefile
++++ b/xen/arch/x86/guest/Makefile
+@@ -1,3 +1,4 @@
++obj-y += hypercall_page.o
+ obj-y += xen.o
+
+ obj-bin-$(CONFIG_PVH_GUEST) += pvh-boot.init.o
+diff --git a/xen/arch/x86/guest/hypercall_page.S b/xen/arch/x86/guest/hypercall_page.S
+new file mode 100644
+index 0000000000..fdd2e72272
+--- /dev/null
++++ b/xen/arch/x86/guest/hypercall_page.S
+@@ -0,0 +1,79 @@
++#include <asm/page.h>
++#include <asm/asm_defns.h>
++#include <public/xen.h>
++
++ .section ".text.page_aligned", "ax", @progbits
++ .p2align PAGE_SHIFT
++
++GLOBAL(hypercall_page)
++ /* Poisoned with `ret` for safety before hypercalls are set up. */
++ .fill PAGE_SIZE, 1, 0xc3
++ .type hypercall_page, STT_OBJECT
++ .size hypercall_page, PAGE_SIZE
++
++/*
++ * Identify a specific hypercall in the hypercall page
++ * @param name Hypercall name.
++ */
++#define DECLARE_HYPERCALL(name) \
++ .globl HYPERCALL_ ## name; \
++ .set HYPERCALL_ ## name, hypercall_page + __HYPERVISOR_ ## name * 32; \
++ .type HYPERCALL_ ## name, STT_FUNC; \
++ .size HYPERCALL_ ## name, 32
++
++DECLARE_HYPERCALL(set_trap_table)
++DECLARE_HYPERCALL(mmu_update)
++DECLARE_HYPERCALL(set_gdt)
++DECLARE_HYPERCALL(stack_switch)
++DECLARE_HYPERCALL(set_callbacks)
++DECLARE_HYPERCALL(fpu_taskswitch)
++DECLARE_HYPERCALL(sched_op_compat)
++DECLARE_HYPERCALL(platform_op)
++DECLARE_HYPERCALL(set_debugreg)
++DECLARE_HYPERCALL(get_debugreg)
++DECLARE_HYPERCALL(update_descriptor)
++DECLARE_HYPERCALL(memory_op)
++DECLARE_HYPERCALL(multicall)
++DECLARE_HYPERCALL(update_va_mapping)
++DECLARE_HYPERCALL(set_timer_op)
++DECLARE_HYPERCALL(event_channel_op_compat)
++DECLARE_HYPERCALL(xen_version)
++DECLARE_HYPERCALL(console_io)
++DECLARE_HYPERCALL(physdev_op_compat)
++DECLARE_HYPERCALL(grant_table_op)
++DECLARE_HYPERCALL(vm_assist)
++DECLARE_HYPERCALL(update_va_mapping_otherdomain)
++DECLARE_HYPERCALL(iret)
++DECLARE_HYPERCALL(vcpu_op)
++DECLARE_HYPERCALL(set_segment_base)
++DECLARE_HYPERCALL(mmuext_op)
++DECLARE_HYPERCALL(xsm_op)
++DECLARE_HYPERCALL(nmi_op)
++DECLARE_HYPERCALL(sched_op)
++DECLARE_HYPERCALL(callback_op)
++DECLARE_HYPERCALL(xenoprof_op)
++DECLARE_HYPERCALL(event_channel_op)
++DECLARE_HYPERCALL(physdev_op)
++DECLARE_HYPERCALL(hvm_op)
++DECLARE_HYPERCALL(sysctl)
++DECLARE_HYPERCALL(domctl)
++DECLARE_HYPERCALL(kexec_op)
++DECLARE_HYPERCALL(tmem_op)
++DECLARE_HYPERCALL(xc_reserved_op)
++DECLARE_HYPERCALL(xenpmu_op)
++
++DECLARE_HYPERCALL(arch_0)
++DECLARE_HYPERCALL(arch_1)
++DECLARE_HYPERCALL(arch_2)
++DECLARE_HYPERCALL(arch_3)
++DECLARE_HYPERCALL(arch_4)
++DECLARE_HYPERCALL(arch_5)
++DECLARE_HYPERCALL(arch_6)
++DECLARE_HYPERCALL(arch_7)
++
++/*
++ * Local variables:
++ * tab-width: 8
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index 8507757841..10b90d0f61 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -22,6 +22,7 @@
+ #include <xen/types.h>
+
+ #include <asm/guest.h>
++#include <asm/msr.h>
+ #include <asm/processor.h>
+
+ #include <public/arch-x86/cpuid.h>
+@@ -29,6 +30,7 @@
+ bool __read_mostly xen_guest;
+
+ static __read_mostly uint32_t xen_cpuid_base;
++extern char hypercall_page[];
+
+ static void __init find_xen_leaves(void)
+ {
+@@ -61,6 +63,9 @@ void __init probe_hypervisor(void)
+ if ( !xen_cpuid_base )
+ return;
+
++ /* Fill the hypercall page. */
++ wrmsrl(cpuid_ebx(xen_cpuid_base + 2), __pa(hypercall_page));
++
+ xen_guest = true;
+ }
+
+diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
+index 2023f971e4..509f176913 100644
+--- a/xen/arch/x86/xen.lds.S
++++ b/xen/arch/x86/xen.lds.S
+@@ -65,6 +65,7 @@ SECTIONS
+ DECL_SECTION(.text) {
+ _stext = .; /* Text and read-only data */
+ *(.text)
++ *(.text.page_aligned)
+ *(.text.cold)
+ *(.text.unlikely)
+ *(.fixup)
+diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h
+index 8d91f81451..5abdb8c433 100644
+--- a/xen/include/asm-x86/guest.h
++++ b/xen/include/asm-x86/guest.h
+@@ -19,6 +19,7 @@
+ #ifndef __X86_GUEST_H__
+ #define __X86_GUEST_H__
+
++#include <asm/guest/hypercall.h>
+ #include <asm/guest/pvh-boot.h>
+ #include <asm/guest/xen.h>
+
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+new file mode 100644
+index 0000000000..d959c3dd8a
+--- /dev/null
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -0,0 +1,92 @@
++/******************************************************************************
++ * asm-x86/guest/hypercall.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms and conditions of the GNU General Public
++ * License, version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++
++#ifndef __X86_XEN_HYPERCALL_H__
++#define __X86_XEN_HYPERCALL_H__
++
++#ifdef CONFIG_XEN_GUEST
++
++/*
++ * Hypercall primatives for 64bit
++ *
++ * Inputs: %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6)
++ */
++
++#define _hypercall64_1(type, hcall, a1) \
++ ({ \
++ long res, tmp__; \
++ asm volatile ( \
++ "call hypercall_page + %c[offset]" \
++ : "=a" (res), "=D" (tmp__)
\
++ : [offset] "i" (hcall * 32), \
++ "1" ((long)(a1)) \
++ : "memory" ); \
++ (type)res; \
++ })
++
++#define _hypercall64_2(type, hcall, a1, a2) \
++ ({ \
++ long res, tmp__; \
++ asm volatile ( \
++ "call hypercall_page + %c[offset]" \
++ : "=a" (res), "=D" (tmp__), "=S" (tmp__)
\
++ : [offset] "i" (hcall * 32), \
++ "1" ((long)(a1)), "2" ((long)(a2))
\
++ : "memory" ); \
++ (type)res; \
++ })
++
++#define _hypercall64_3(type, hcall, a1, a2, a3) \
++ ({ \
++ long res, tmp__; \
++ asm volatile ( \
++ "call hypercall_page + %c[offset]" \
++ : "=a" (res), "=D" (tmp__), "=S" (tmp__),
"=d" (tmp__) \
++ : [offset] "i" (hcall * 32), \
++ "1" ((long)(a1)), "2" ((long)(a2)), "3"
((long)(a3)) \
++ : "memory" ); \
++ (type)res; \
++ })
++
++#define _hypercall64_4(type, hcall, a1, a2, a3, a4) \
++ ({ \
++ long res, tmp__; \
++ register long _a4 asm ("r10") = ((long)(a4)); \
++ asm volatile ( \
++ "call hypercall_page + %c[offset]" \
++ : "=a" (res), "=D" (tmp__), "=S" (tmp__),
"=d" (tmp__), \
++ "=&r" (tmp__) \
++ : [offset] "i" (hcall * 32), \
++ "1" ((long)(a1)), "2" ((long)(a2)), "3"
((long)(a3)), \
++ "4" (_a4) \
++ : "memory" ); \
++ (type)res; \
++ })
++
++#endif /* CONFIG_XEN_GUEST */
++#endif /* __X86_XEN_HYPERCALL_H__ */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+--
+2.14.3
+
+
+From 9752c7422b9193e18523d9c443bc0dad7ae0c7c7 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Tue, 21 Nov 2017 14:43:32 +0000
+Subject: [PATCH 35/77] x86/shutdown: Support for using
+ SCHEDOP_{shutdown,reboot}
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+---
+v2:
+1. Use sched_shutdown
+2. Move header inclusion
+---
+ docs/misc/xen-command-line.markdown | 3 +++
+ xen/arch/x86/shutdown.c | 34 ++++++++++++++++++++++++++++++----
+ xen/include/asm-x86/guest/hypercall.h | 32 ++++++++++++++++++++++++++++++++
+ 3 files changed, 65 insertions(+), 4 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index 781110d4b2..e5979bceee 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -1478,6 +1478,9 @@ Specify the host reboot method.
+ 'efi' instructs Xen to reboot using the EFI reboot call (in EFI mode by
+ default it will use that method first).
+
++`xen` instructs Xen to reboot using Xen's SCHEDOP hypercall (this is the default
++when running nested Xen)
++
+ ### rmrr
+ > '=
start<-end>=[s1]bdf1[,[s1]bdf2[,...]];start<-end>=[s2]bdf1[,[s2]bdf2[,...]]
+
+diff --git a/xen/arch/x86/shutdown.c b/xen/arch/x86/shutdown.c
+index a87aa60add..689f6f137d 100644
+--- a/xen/arch/x86/shutdown.c
++++ b/xen/arch/x86/shutdown.c
+@@ -25,6 +25,7 @@
+ #include <asm/mpspec.h>
+ #include <asm/tboot.h>
+ #include <asm/apic.h>
++#include <asm/guest.h>
+
+ enum reboot_type {
+ BOOT_INVALID,
+@@ -34,6 +35,7 @@ enum reboot_type {
+ BOOT_CF9 = 'p',
+ BOOT_CF9_PWR = 'P',
+ BOOT_EFI = 'e',
++ BOOT_XEN = 'x',
+ };
+
+ static int reboot_mode;
+@@ -49,6 +51,7 @@ static int reboot_mode;
+ * pci Use the so-called "PCI reset register", CF9
+ * Power Like 'pci' but for a full power-cyle reset
+ * efi Use the EFI reboot (if running under EFI)
++ * xen Use Xen SCHEDOP hypercall (if running under Xen as a guest)
+ */
+ static enum reboot_type reboot_type = BOOT_INVALID;
+
+@@ -75,6 +78,7 @@ static int __init set_reboot_type(const char *str)
+ case 'P':
+ case 'p':
+ case 't':
++ case 'x':
+ reboot_type = *str;
+ break;
+ default:
+@@ -93,6 +97,13 @@ static int __init set_reboot_type(const char *str)
+ reboot_type = BOOT_INVALID;
+ }
+
++ if ( reboot_type == BOOT_XEN && !xen_guest )
++ {
++ printk("Xen reboot selected, but Xen hypervisor not detected\n"
++ "Falling back to default\n");
++ reboot_type = BOOT_INVALID;
++ }
++
+ return rc;
+ }
+ custom_param("reboot", set_reboot_type);
+@@ -109,6 +120,10 @@ static inline void kb_wait(void)
+ static void noreturn __machine_halt(void *unused)
+ {
+ local_irq_disable();
++
++ if ( reboot_type == BOOT_XEN )
++ xen_hypercall_shutdown(SHUTDOWN_poweroff);
++
+ for ( ; ; )
+ halt();
+ }
+@@ -129,10 +144,17 @@ void machine_halt(void)
+
+ static void default_reboot_type(void)
+ {
+- if ( reboot_type == BOOT_INVALID )
+- reboot_type = efi_enabled(EFI_RS) ? BOOT_EFI
+- : acpi_disabled ? BOOT_KBD
+- : BOOT_ACPI;
++ if ( reboot_type != BOOT_INVALID )
++ return;
++
++ if ( xen_guest )
++ reboot_type = BOOT_XEN;
++ else if ( efi_enabled(EFI_RS) )
++ reboot_type = BOOT_EFI;
++ else if ( acpi_disabled )
++ reboot_type = BOOT_KBD;
++ else
++ reboot_type = BOOT_ACPI;
+ }
+
+ static int __init override_reboot(struct dmi_system_id *d)
+@@ -618,6 +640,10 @@ void machine_restart(unsigned int delay_millisecs)
+ }
+ reboot_type = BOOT_ACPI;
+ break;
++
++ case BOOT_XEN:
++ xen_hypercall_shutdown(SHUTDOWN_reboot);
++ break;
+ }
+ }
+ }
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+index d959c3dd8a..a05041d30b 100644
+--- a/xen/include/asm-x86/guest/hypercall.h
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -21,6 +21,11 @@
+
+ #ifdef CONFIG_XEN_GUEST
+
++#include <xen/types.h>
++
++#include <public/xen.h>
++#include <public/sched.h>
++
+ /*
+ * Hypercall primatives for 64bit
+ *
+@@ -78,6 +83,33 @@
+ (type)res; \
+ })
+
++/*
++ * Primitive Hypercall wrappers
++ */
++static inline long xen_hypercall_sched_op(unsigned int cmd, void *arg)
++{
++ return _hypercall64_2(long, __HYPERVISOR_sched_op, cmd, arg);
++}
++
++/*
++ * Higher level hypercall helpers
++ */
++static inline long xen_hypercall_shutdown(unsigned int reason)
++{
++ struct sched_shutdown s = { .reason = reason };
++ return xen_hypercall_sched_op(SCHEDOP_shutdown, &s);
++}
++
++#else /* CONFIG_XEN_GUEST */
++
++#include <public/sched.h>
++
++static inline long xen_hypercall_shutdown(unsigned int reason)
++{
++ ASSERT_UNREACHABLE();
++ return 0;
++}
++
+ #endif /* CONFIG_XEN_GUEST */
+ #endif /* __X86_XEN_HYPERCALL_H__ */
+
+--
+2.14.3
+
+
+From 2f5a0121434559b2f8e5b17dc0119699684e3b17 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Tue, 14 Nov 2017 18:19:09 +0000
+Subject: [PATCH 36/77] x86/pvh: Retrieve memory map from Xen
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+v2: fixed clang build, dropped rb tag
+---
+ xen/arch/x86/e820.c | 3 +--
+ xen/arch/x86/guest/pvh-boot.c | 20 ++++++++++++++++++++
+ xen/arch/x86/guest/xen.c | 3 +++
+ xen/arch/x86/setup.c | 7 ++++++-
+ xen/include/asm-x86/e820.h | 1 +
+ xen/include/asm-x86/guest/hypercall.h | 5 +++++
+ 6 files changed, 36 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c
+index 7c572bade2..b422a684ee 100644
+--- a/xen/arch/x86/e820.c
++++ b/xen/arch/x86/e820.c
+@@ -134,8 +134,7 @@ static struct change_member *change_point[2*E820MAX] __initdata;
+ static struct e820entry *overlap_list[E820MAX] __initdata;
+ static struct e820entry new_bios[E820MAX] __initdata;
+
+-static int __init sanitize_e820_map(struct e820entry *biosmap,
+- unsigned int *pnr_map)
++int __init sanitize_e820_map(struct e820entry *biosmap, unsigned int *pnr_map)
+ {
+ struct change_member *change_tmp;
+ unsigned long current_type, last_type;
+diff --git a/xen/arch/x86/guest/pvh-boot.c b/xen/arch/x86/guest/pvh-boot.c
+index 186e332657..be3122b16c 100644
+--- a/xen/arch/x86/guest/pvh-boot.c
++++ b/xen/arch/x86/guest/pvh-boot.c
+@@ -22,6 +22,7 @@
+ #include <xen/lib.h>
+ #include <xen/mm.h>
+
++#include <asm/e820.h>
+ #include <asm/guest.h>
+
+ #include <public/arch-x86/hvm/start_info.h>
+@@ -70,10 +71,29 @@ static void __init convert_pvh_info(void)
+ }
+ }
+
++static void __init get_memory_map(void)
++{
++ struct xen_memory_map memmap = {
++ .nr_entries = E820MAX,
++ };
++
++ set_xen_guest_handle(memmap.buffer, e820_raw.map);
++ BUG_ON(xen_hypercall_memory_op(XENMEM_memory_map, &memmap));
++ e820_raw.nr_map = memmap.nr_entries;
++
++ /* :( Various toolstacks don't sort the memory map. */
++ sanitize_e820_map(e820_raw.map, &e820_raw.nr_map);
++}
++
+ multiboot_info_t *__init pvh_init(void)
+ {
+ convert_pvh_info();
+
++ probe_hypervisor();
++ ASSERT(xen_guest);
++
++ get_memory_map();
++
+ return &pvh_mbi;
+ }
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index 10b90d0f61..c253ebd983 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -54,6 +54,9 @@ static void __init find_xen_leaves(void)
+
+ void __init probe_hypervisor(void)
+ {
++ if ( xen_guest )
++ return;
++
+ /* Too early to use cpu_has_hypervisor */
+ if ( !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) )
+ return;
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index d8059f23b5..edb43bf2cb 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -795,7 +795,12 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
+ panic("dom0 kernel not specified. Check bootloader configuration.");
+
+- if ( efi_enabled(EFI_LOADER) )
++ if ( pvh_boot )
++ {
++ /* pvh_init() already filled in e820_raw */
++ memmap_type = "PVH-e820";
++ }
++ else if ( efi_enabled(EFI_LOADER) )
+ {
+ set_pdx_range(xen_phys_start >> PAGE_SHIFT,
+ (xen_phys_start + BOOTSTRAP_MAP_BASE) >> PAGE_SHIFT);
+diff --git a/xen/include/asm-x86/e820.h b/xen/include/asm-x86/e820.h
+index 28defa8545..ee317b17aa 100644
+--- a/xen/include/asm-x86/e820.h
++++ b/xen/include/asm-x86/e820.h
+@@ -23,6 +23,7 @@ struct e820map {
+ struct e820entry map[E820MAX];
+ };
+
++extern int sanitize_e820_map(struct e820entry *biosmap, unsigned int *pnr_map);
+ extern int e820_all_mapped(u64 start, u64 end, unsigned type);
+ extern int reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e);
+ extern int e820_change_range_type(
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+index a05041d30b..e0b00f97fb 100644
+--- a/xen/include/asm-x86/guest/hypercall.h
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -91,6 +91,11 @@ static inline long xen_hypercall_sched_op(unsigned int cmd, void
*arg)
+ return _hypercall64_2(long, __HYPERVISOR_sched_op, cmd, arg);
+ }
+
++static inline long xen_hypercall_memory_op(unsigned int cmd, void *arg)
++{
++ return _hypercall64_2(long, __HYPERVISOR_memory_op, cmd, arg);
++}
++
+ /*
+ * Higher level hypercall helpers
+ */
+--
+2.14.3
+
+
+From 10128f33aa344f1f57584fd9ea528e1518b0d5fd Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Thu, 11 Jan 2018 10:18:09 +0000
+Subject: [PATCH 37/77] xen/console: Introduce console=xen
+
+This specifies whether to use Xen specific console output. There are
+two variants: one is the hypervisor console, the other is the magic
+debug port 0xe9.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/drivers/char/console.c | 46 +++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/guest/hypercall.h | 13 ++++++++++
+ 2 files changed, 59 insertions(+)
+
+diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
+index 19d0e74f17..d05ebf9f70 100644
+--- a/xen/drivers/char/console.c
++++ b/xen/drivers/char/console.c
+@@ -31,6 +31,10 @@
+ #include <xen/early_printk.h>
+ #include <xen/warning.h>
+
++#ifdef CONFIG_X86
++#include <asm/guest.h>
++#endif
++
+ /* console: comma-separated list of console outputs. */
+ static char __initdata opt_console[30] = OPT_CONSOLE_STR;
+ string_param("console", opt_console);
+@@ -83,6 +87,10 @@ static uint32_t conringc, conringp;
+
+ static int __read_mostly sercon_handle = -1;
+
++#ifdef CONFIG_X86
++static bool __read_mostly opt_console_xen; /* console=xen */
++#endif
++
+ static DEFINE_SPINLOCK(console_lock);
+
+ /*
+@@ -432,6 +440,16 @@ static void notify_dom0_con_ring(unsigned long unused)
+ static DECLARE_SOFTIRQ_TASKLET(notify_dom0_con_ring_tasklet,
+ notify_dom0_con_ring, 0);
+
++#ifdef CONFIG_X86
++static inline void xen_console_write_debug_port(const char *buf, size_t len)
++{
++ unsigned long tmp;
++ asm volatile ( "rep outsb;"
++ : "=&S" (tmp), "=&c" (tmp)
++ : "0" (buf), "1" (len), "d" (0xe9) );
++}
++#endif
++
+ static long guest_console_write(XEN_GUEST_HANDLE_PARAM(char) buffer, int count)
+ {
+ char kbuf[128];
+@@ -458,6 +476,18 @@ static long guest_console_write(XEN_GUEST_HANDLE_PARAM(char) buffer,
int count)
+ sercon_puts(kbuf);
+ video_puts(kbuf);
+
++#ifdef CONFIG_X86
++ if ( opt_console_xen )
++ {
++ size_t len = strlen(kbuf);
++
++ if ( xen_guest )
++ xen_hypercall_console_write(kbuf, len);
++ else
++ xen_console_write_debug_port(kbuf, len);
++ }
++#endif
++
+ if ( opt_console_to_ring )
+ {
+ conring_puts(kbuf);
+@@ -567,6 +597,18 @@ static void __putstr(const char *str)
+ sercon_puts(str);
+ video_puts(str);
+
++#ifdef CONFIG_X86
++ if ( opt_console_xen )
++ {
++ size_t len = strlen(str);
++
++ if ( xen_guest )
++ xen_hypercall_console_write(str, len);
++ else
++ xen_console_write_debug_port(str, len);
++ }
++#endif
++
+ conring_puts(str);
+
+ if ( !console_locks_busted )
+@@ -762,6 +804,10 @@ void __init console_init_preirq(void)
+ p++;
+ if ( !strncmp(p, "vga", 3) )
+ video_init();
++#ifdef CONFIG_X86
++ else if ( !strncmp(p, "xen", 3) )
++ opt_console_xen = true;
++#endif
+ else if ( !strncmp(p, "none", 4) )
+ continue;
+ else if ( (sh = serial_parse_handle(p)) >= 0 )
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+index e0b00f97fb..9cd95d2b92 100644
+--- a/xen/include/asm-x86/guest/hypercall.h
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -99,6 +99,13 @@ static inline long xen_hypercall_memory_op(unsigned int cmd, void
*arg)
+ /*
+ * Higher level hypercall helpers
+ */
++static inline void xen_hypercall_console_write(
++ const char *buf, unsigned int count)
++{
++ (void)_hypercall64_3(long, __HYPERVISOR_console_io,
++ CONSOLEIO_write, count, buf);
++}
++
+ static inline long xen_hypercall_shutdown(unsigned int reason)
+ {
+ struct sched_shutdown s = { .reason = reason };
+@@ -109,6 +116,12 @@ static inline long xen_hypercall_shutdown(unsigned int reason)
+
+ #include <public/sched.h>
+
++static inline void xen_hypercall_console_write(
++ const char *buf, unsigned int count)
++{
++ ASSERT_UNREACHABLE();
++}
++
+ static inline long xen_hypercall_shutdown(unsigned int reason)
+ {
+ ASSERT_UNREACHABLE();
+--
+2.14.3
+
+
+From 1fa54448348d6cc36b89bb9e1729ea601013b00f Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Wed, 3 Jan 2018 16:38:54 +0000
+Subject: [PATCH 38/77] xen: introduce rangeset_claim_range
+
+Reserve a hole in a rangeset.
+
+Signed-off-by: Roger Pau Monne <roger.pau(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+Changes since v1:
+ - Change function name.
+ - Use a local variable instead of *s.
+ - Add unlikely to the !prev case.
+ - Move the function prototype position in the header file.
+---
+ xen/common/rangeset.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++
+ xen/include/xen/rangeset.h | 4 +++-
+ 2 files changed, 55 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/rangeset.c b/xen/common/rangeset.c
+index 6c6293c15c..ade34f6a50 100644
+--- a/xen/common/rangeset.c
++++ b/xen/common/rangeset.c
+@@ -298,6 +298,58 @@ int rangeset_report_ranges(
+ return rc;
+ }
+
++int rangeset_claim_range(struct rangeset *r, unsigned long size,
++ unsigned long *s)
++{
++ struct range *prev, *next;
++ unsigned long start = 0;
++
++ write_lock(&r->lock);
++
++ for ( prev = NULL, next = first_range(r);
++ next;
++ prev = next, next = next_range(r, next) )
++ {
++ if ( (next->s - start) >= size )
++ goto insert;
++
++ if ( next->e == ~0UL )
++ goto out;
++
++ start = next->e + 1;
++ }
++
++ if ( (~0UL - start) + 1 >= size )
++ goto insert;
++
++ out:
++ write_unlock(&r->lock);
++ return -ENOSPC;
++
++ insert:
++ if ( unlikely(!prev) )
++ {
++ next = alloc_range(r);
++ if ( !next )
++ {
++ write_unlock(&r->lock);
++ return -ENOMEM;
++ }
++
++ next->s = start;
++ next->e = start + size - 1;
++ insert_range(r, prev, next);
++ }
++ else
++ prev->e += size;
++
++ write_unlock(&r->lock);
++
++ *s = start;
++
++ return 0;
++}
++
+ int rangeset_add_singleton(
+ struct rangeset *r, unsigned long s)
+ {
+diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
+index aa6408248b..1f83b1f44b 100644
+--- a/xen/include/xen/rangeset.h
++++ b/xen/include/xen/rangeset.h
+@@ -55,9 +55,11 @@ void rangeset_limit(
+ bool_t __must_check rangeset_is_empty(
+ const struct rangeset *r);
+
+-/* Add/remove/query a numeric range. */
++/* Add/claim/remove/query a numeric range. */
+ int __must_check rangeset_add_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
++int __must_check rangeset_claim_range(struct rangeset *r, unsigned long size,
++ unsigned long *s);
+ int __must_check rangeset_remove_range(
+ struct rangeset *r, unsigned long s, unsigned long e);
+ bool_t __must_check rangeset_contains_range(
+--
+2.14.3
+
+
+From 83186a8e6988b8f218fce57db3a62e35d39b529a Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Wed, 3 Jan 2018 16:50:24 +0000
+Subject: [PATCH 39/77] xen/pvshim: keep track of used PFN ranges
+
+Simple infrastructure to keep track of PFN space usage, so that we can
+use unpopulated PFNs to map special pages like shared info and grant
+table.
+
+As rangeset depends on malloc being ready so hypervisor_setup is
+introduced for things that can be initialised late in the process.
+
+Note that the PFN is marked as reserved at least up to 4GiB (or more
+if the guest has more memory). This is not a perfect solution but
+avoids using the MMIO hole below 4GiB. Ideally the shim (L1) should
+have a way to ask the underlying Xen (L0) which memory regions are
+populated, unpopulated, or MMIO space.
+
+Signed-off-by: Roger Pau Monne <roger.pau(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/arch/x86/guest/xen.c | 56 +++++++++++++++++++++++++++++++++++++++++
+ xen/arch/x86/setup.c | 3 +++
+ xen/include/asm-x86/guest/xen.h | 7 ++++++
+ 3 files changed, 66 insertions(+)
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index c253ebd983..abf53ebbc6 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -19,8 +19,12 @@
+ * Copyright (c) 2017 Citrix Systems Ltd.
+ */
+ #include <xen/init.h>
++#include <xen/mm.h>
++#include <xen/pfn.h>
++#include <xen/rangeset.h>
+ #include <xen/types.h>
+
++#include <asm/e820.h>
+ #include <asm/guest.h>
+ #include <asm/msr.h>
+ #include <asm/processor.h>
+@@ -31,6 +35,7 @@ bool __read_mostly xen_guest;
+
+ static __read_mostly uint32_t xen_cpuid_base;
+ extern char hypercall_page[];
++static struct rangeset *mem;
+
+ static void __init find_xen_leaves(void)
+ {
+@@ -72,6 +77,57 @@ void __init probe_hypervisor(void)
+ xen_guest = true;
+ }
+
++static void __init init_memmap(void)
++{
++ unsigned int i;
++
++ mem = rangeset_new(NULL, "host memory map", 0);
++ if ( !mem )
++ panic("failed to allocate PFN usage rangeset");
++
++ /*
++ * Mark up to the last memory page (or 4GiB) as RAM. This is done because
++ * Xen doesn't know the position of possible MMIO holes, so at least try to
++ * avoid the know MMIO hole below 4GiB. Note that this is subject to future
++ * discussion and improvements.
++ */
++ if ( rangeset_add_range(mem, 0, max_t(unsigned long, max_page - 1,
++ PFN_DOWN(GB(4) - 1))) )
++ panic("unable to add RAM to in-use PFN rangeset");
++
++ for ( i = 0; i < e820.nr_map; i++ )
++ {
++ struct e820entry *e = &e820.map[i];
++
++ if ( rangeset_add_range(mem, PFN_DOWN(e->addr),
++ PFN_UP(e->addr + e->size - 1)) )
++ panic("unable to add range [%#lx, %#lx] to in-use PFN rangeset",
++ PFN_DOWN(e->addr), PFN_UP(e->addr + e->size - 1));
++ }
++}
++
++void __init hypervisor_setup(void)
++{
++ init_memmap();
++}
++
++int hypervisor_alloc_unused_page(mfn_t *mfn)
++{
++ unsigned long m;
++ int rc;
++
++ rc = rangeset_claim_range(mem, 1, &m);
++ if ( !rc )
++ *mfn = _mfn(m);
++
++ return rc;
++}
++
++int hypervisor_free_unused_page(mfn_t mfn)
++{
++ return rangeset_remove_range(mem, mfn_x(mfn), mfn_x(mfn));
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index edb43bf2cb..b9b97d68f5 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -1472,6 +1472,9 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ max_cpus = nr_cpu_ids;
+ }
+
++ if ( xen_guest )
++ hypervisor_setup();
++
+ /* Low mappings were only needed for some BIOS table parsing. */
+ zap_low_mappings();
+
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+index 97a7c8d531..427837797b 100644
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -26,12 +26,19 @@
+ extern bool xen_guest;
+
+ void probe_hypervisor(void);
++void hypervisor_setup(void);
++int hypervisor_alloc_unused_page(mfn_t *mfn);
++int hypervisor_free_unused_page(mfn_t mfn);
+
+ #else
+
+ #define xen_guest 0
+
+ static inline void probe_hypervisor(void) {};
++static inline void hypervisor_setup(void)
++{
++ ASSERT_UNREACHABLE();
++}
+
+ #endif /* CONFIG_XEN_GUEST */
+ #endif /* __X86_GUEST_XEN_H__ */
+--
+2.14.3
+
+
+From efa15c993b600e9636cd091c626ee0c989afc62f Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Tue, 9 Jan 2018 11:19:44 +0000
+Subject: [PATCH 40/77] x86/guest: map shared_info page
+
+Use an unpopulated PFN in order to map it.
+
+Signed-off-by: Roger Pau Monne <roger.pau(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+Changes since v1:
+ - Use an unpopulated PFN to map the shared_info page.
+ - Mask all event channels.
+ - Report XENMEM_add_to_physmap error code in case of failure.
+---
+ xen/arch/x86/guest/xen.c | 27 +++++++++++++++++++++++++++
+ xen/include/asm-x86/fixmap.h | 3 +++
+ xen/include/asm-x86/guest/xen.h | 5 +++++
+ 3 files changed, 35 insertions(+)
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index abf53ebbc6..f62f93af16 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -77,6 +77,31 @@ void __init probe_hypervisor(void)
+ xen_guest = true;
+ }
+
++static void map_shared_info(void)
++{
++ mfn_t mfn;
++ struct xen_add_to_physmap xatp = {
++ .domid = DOMID_SELF,
++ .space = XENMAPSPACE_shared_info,
++ };
++ unsigned int i;
++ unsigned long rc;
++
++ if ( hypervisor_alloc_unused_page(&mfn) )
++ panic("unable to reserve shared info memory page");
++
++ xatp.gpfn = mfn_x(mfn);
++ rc = xen_hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
++ if ( rc )
++ panic("failed to map shared_info page: %ld", rc);
++
++ set_fixmap(FIX_XEN_SHARED_INFO, mfn_x(mfn) << PAGE_SHIFT);
++
++ /* Mask all upcalls */
++ for ( i = 0; i < ARRAY_SIZE(XEN_shared_info->evtchn_mask); i++ )
++ write_atomic(&XEN_shared_info->evtchn_mask[i], ~0ul);
++}
++
+ static void __init init_memmap(void)
+ {
+ unsigned int i;
+@@ -109,6 +134,8 @@ static void __init init_memmap(void)
+ void __init hypervisor_setup(void)
+ {
+ init_memmap();
++
++ map_shared_info();
+ }
+
+ int hypervisor_alloc_unused_page(mfn_t *mfn)
+diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h
+index 51b0e7e945..ded4ddf21b 100644
+--- a/xen/include/asm-x86/fixmap.h
++++ b/xen/include/asm-x86/fixmap.h
+@@ -45,6 +45,9 @@ enum fixed_addresses {
+ FIX_COM_BEGIN,
+ FIX_COM_END,
+ FIX_EHCI_DBGP,
++#ifdef CONFIG_XEN_GUEST
++ FIX_XEN_SHARED_INFO,
++#endif /* CONFIG_XEN_GUEST */
+ /* Everything else should go further down. */
+ FIX_APIC_BASE,
+ FIX_IO_APIC_BASE_0,
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+index 427837797b..f25ad4241b 100644
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -21,6 +21,11 @@
+
+ #include <xen/types.h>
+
++#include <asm/e820.h>
++#include <asm/fixmap.h>
++
++#define XEN_shared_info ((struct shared_info *)fix_to_virt(FIX_XEN_SHARED_INFO))
++
+ #ifdef CONFIG_XEN_GUEST
+
+ extern bool xen_guest;
+--
+2.14.3
+
+
+From d2df09c92bf988af804b65a1db92d8ea82a60350 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Wed, 27 Dec 2017 09:23:01 +0000
+Subject: [PATCH 41/77] xen/guest: fetch vCPU ID from Xen
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+If available.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+[ wei: fix non-shim build ]
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/arch/x86/guest/xen.c | 23 +++++++++++++++++++++++
+ xen/arch/x86/smpboot.c | 4 ++++
+ xen/include/asm-x86/guest/xen.h | 7 +++++++
+ 3 files changed, 34 insertions(+)
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index f62f93af16..de8cfc6e36 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -37,6 +37,8 @@ static __read_mostly uint32_t xen_cpuid_base;
+ extern char hypercall_page[];
+ static struct rangeset *mem;
+
++DEFINE_PER_CPU(unsigned int, vcpu_id);
++
+ static void __init find_xen_leaves(void)
+ {
+ uint32_t eax, ebx, ecx, edx, base;
+@@ -102,6 +104,20 @@ static void map_shared_info(void)
+ write_atomic(&XEN_shared_info->evtchn_mask[i], ~0ul);
+ }
+
++static void set_vcpu_id(void)
++{
++ uint32_t eax, ebx, ecx, edx;
++
++ ASSERT(xen_cpuid_base);
++
++ /* Fetch vcpu id from cpuid. */
++ cpuid(xen_cpuid_base + 4, &eax, &ebx, &ecx, &edx);
++ if ( eax & XEN_HVM_CPUID_VCPU_ID_PRESENT )
++ this_cpu(vcpu_id) = ebx;
++ else
++ this_cpu(vcpu_id) = smp_processor_id();
++}
++
+ static void __init init_memmap(void)
+ {
+ unsigned int i;
+@@ -136,6 +152,13 @@ void __init hypervisor_setup(void)
+ init_memmap();
+
+ map_shared_info();
++
++ set_vcpu_id();
++}
++
++void hypervisor_ap_setup(void)
++{
++ set_vcpu_id();
+ }
+
+ int hypervisor_alloc_unused_page(mfn_t *mfn)
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 1609b627ae..5c7863035e 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -38,6 +38,7 @@
+ #include <asm/desc.h>
+ #include <asm/div64.h>
+ #include <asm/flushtlb.h>
++#include <asm/guest.h>
+ #include <asm/msr.h>
+ #include <asm/mtrr.h>
+ #include <asm/time.h>
+@@ -373,6 +374,9 @@ void start_secondary(void *unused)
+ cpumask_set_cpu(cpu, &cpu_online_map);
+ unlock_vector_lock();
+
++ if ( xen_guest )
++ hypervisor_ap_setup();
++
+ /* We can take interrupts now: we're officially "up". */
+ local_irq_enable();
+ mtrr_ap_init();
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+index f25ad4241b..db35a9e628 100644
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -32,9 +32,12 @@ extern bool xen_guest;
+
+ void probe_hypervisor(void);
+ void hypervisor_setup(void);
++void hypervisor_ap_setup(void);
+ int hypervisor_alloc_unused_page(mfn_t *mfn);
+ int hypervisor_free_unused_page(mfn_t mfn);
+
++DECLARE_PER_CPU(unsigned int, vcpu_id);
++
+ #else
+
+ #define xen_guest 0
+@@ -44,6 +47,10 @@ static inline void hypervisor_setup(void)
+ {
+ ASSERT_UNREACHABLE();
+ }
++static inline void hypervisor_ap_setup(void)
++{
++ ASSERT_UNREACHABLE();
++}
+
+ #endif /* CONFIG_XEN_GUEST */
+ #endif /* __X86_GUEST_XEN_H__ */
+--
+2.14.3
+
+
+From 68e7a08436ed50f9ba51f9c9e88819ba0fedcc24 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 28 Dec 2017 15:22:34 +0000
+Subject: [PATCH 42/77] x86/guest: map per-cpu vcpu_info area.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Mapping the per-vcpu vcpu_info area is required in order to use more
+than XEN_LEGACY_MAX_VCPUS.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+Changes since v1:
+ - Make vcpu_info_mapped static.
+ - Add a BUG_ON in case VCPUOP_register_vcpu_info fails.
+ - Remove one indentation level in hypervisor_setup.
+ - Make xen_hypercall_vcpu_op return int.
+---
+ xen/arch/x86/guest/xen.c | 57 +++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/guest/hypercall.h | 8 +++++
+ xen/include/asm-x86/guest/xen.h | 1 +
+ 3 files changed, 66 insertions(+)
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index de8cfc6e36..60626ec21c 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -39,6 +39,10 @@ static struct rangeset *mem;
+
+ DEFINE_PER_CPU(unsigned int, vcpu_id);
+
++static struct vcpu_info *vcpu_info;
++static unsigned long vcpu_info_mapped[BITS_TO_LONGS(NR_CPUS)];
++DEFINE_PER_CPU(struct vcpu_info *, vcpu_info);
++
+ static void __init find_xen_leaves(void)
+ {
+ uint32_t eax, ebx, ecx, edx, base;
+@@ -104,6 +108,41 @@ static void map_shared_info(void)
+ write_atomic(&XEN_shared_info->evtchn_mask[i], ~0ul);
+ }
+
++static int map_vcpuinfo(void)
++{
++ unsigned int vcpu = this_cpu(vcpu_id);
++ struct vcpu_register_vcpu_info info;
++ int rc;
++
++ if ( !vcpu_info )
++ {
++ this_cpu(vcpu_info) = &XEN_shared_info->vcpu_info[vcpu];
++ return 0;
++ }
++
++ if ( test_bit(vcpu, vcpu_info_mapped) )
++ {
++ this_cpu(vcpu_info) = &vcpu_info[vcpu];
++ return 0;
++ }
++
++ info.mfn = virt_to_mfn(&vcpu_info[vcpu]);
++ info.offset = (unsigned long)&vcpu_info[vcpu] & ~PAGE_MASK;
++ rc = xen_hypercall_vcpu_op(VCPUOP_register_vcpu_info, vcpu, &info);
++ if ( rc )
++ {
++ BUG_ON(vcpu >= XEN_LEGACY_MAX_VCPUS);
++ this_cpu(vcpu_info) = &XEN_shared_info->vcpu_info[vcpu];
++ }
++ else
++ {
++ this_cpu(vcpu_info) = &vcpu_info[vcpu];
++ set_bit(vcpu, vcpu_info_mapped);
++ }
++
++ return rc;
++}
++
+ static void set_vcpu_id(void)
+ {
+ uint32_t eax, ebx, ecx, edx;
+@@ -154,11 +193,29 @@ void __init hypervisor_setup(void)
+ map_shared_info();
+
+ set_vcpu_id();
++ vcpu_info = xzalloc_array(struct vcpu_info, nr_cpu_ids);
++ if ( map_vcpuinfo() )
++ {
++ xfree(vcpu_info);
++ vcpu_info = NULL;
++ }
++ if ( !vcpu_info && nr_cpu_ids > XEN_LEGACY_MAX_VCPUS )
++ {
++ unsigned int i;
++
++ for ( i = XEN_LEGACY_MAX_VCPUS; i < nr_cpu_ids; i++ )
++ __cpumask_clear_cpu(i, &cpu_present_map);
++ nr_cpu_ids = XEN_LEGACY_MAX_VCPUS;
++ printk(XENLOG_WARNING
++ "unable to map vCPU info, limiting vCPUs to: %u\n",
++ XEN_LEGACY_MAX_VCPUS);
++ }
+ }
+
+ void hypervisor_ap_setup(void)
+ {
+ set_vcpu_id();
++ map_vcpuinfo();
+ }
+
+ int hypervisor_alloc_unused_page(mfn_t *mfn)
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+index 9cd95d2b92..dbc57a566e 100644
+--- a/xen/include/asm-x86/guest/hypercall.h
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -26,6 +26,8 @@
+ #include <public/xen.h>
+ #include <public/sched.h>
+
++#include <public/vcpu.h>
++
+ /*
+ * Hypercall primatives for 64bit
+ *
+@@ -96,6 +98,12 @@ static inline long xen_hypercall_memory_op(unsigned int cmd, void
*arg)
+ return _hypercall64_2(long, __HYPERVISOR_memory_op, cmd, arg);
+ }
+
++static inline int xen_hypercall_vcpu_op(unsigned int cmd, unsigned int vcpu,
++ void *arg)
++{
++ return _hypercall64_3(long, __HYPERVISOR_vcpu_op, cmd, vcpu, arg);
++}
++
+ /*
+ * Higher level hypercall helpers
+ */
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+index db35a9e628..b3e684f756 100644
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -37,6 +37,7 @@ int hypervisor_alloc_unused_page(mfn_t *mfn);
+ int hypervisor_free_unused_page(mfn_t mfn);
+
+ DECLARE_PER_CPU(unsigned int, vcpu_id);
++DECLARE_PER_CPU(struct vcpu_info *, vcpu_info);
+
+ #else
+
+--
+2.14.3
+
+
+From f5ca36927e87fd4fee647ca567aca01b7ab78004 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Thu, 16 Nov 2017 17:56:18 +0000
+Subject: [PATCH 43/77] x86: xen pv clock time source
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+It is a variant of TSC clock source.
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+Changes since v1:
+ - Use the mapped vcpu_info.
+---
+ xen/arch/x86/time.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 89 insertions(+)
+
+diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
+index 6c20b1036d..ab866ad68d 100644
+--- a/xen/arch/x86/time.c
++++ b/xen/arch/x86/time.c
+@@ -29,6 +29,7 @@
+ #include <asm/mpspec.h>
+ #include <asm/processor.h>
+ #include <asm/fixmap.h>
++#include <asm/guest.h>
+ #include <asm/mc146818rtc.h>
+ #include <asm/div64.h>
+ #include <asm/acpi.h>
+@@ -525,6 +526,91 @@ static struct platform_timesource __initdata plt_tsc =
+ .init = init_tsc,
+ };
+
++#ifdef CONFIG_XEN_GUEST
++/************************************************************
++ * PLATFORM TIMER 5: XEN PV CLOCK SOURCE
++ *
++ * Xen clock source is a variant of TSC source.
++ */
++
++static uint64_t xen_timer_cpu_frequency(void)
++{
++ struct vcpu_time_info *info = &this_cpu(vcpu_info)->time;
++ uint64_t freq;
++
++ freq = 1000000000ULL << 32;
++ do_div(freq, info->tsc_to_system_mul);
++ if ( info->tsc_shift < 0 )
++ freq <<= -info->tsc_shift;
++ else
++ freq >>= info->tsc_shift;
++
++ return freq;
++}
++
++static int64_t __init init_xen_timer(struct platform_timesource *pts)
++{
++ if ( !xen_guest )
++ return 0;
++
++ pts->frequency = xen_timer_cpu_frequency();
++
++ return pts->frequency;
++}
++
++static always_inline uint64_t read_cycle(const struct vcpu_time_info *info,
++ uint64_t tsc)
++{
++ uint64_t delta = tsc - info->tsc_timestamp;
++ struct time_scale ts = {
++ .shift = info->tsc_shift,
++ .mul_frac = info->tsc_to_system_mul,
++ };
++ uint64_t offset = scale_delta(delta, &ts);
++
++ return info->system_time + offset;
++}
++
++static uint64_t read_xen_timer(void)
++{
++ struct vcpu_time_info *info = &this_cpu(vcpu_info)->time;
++ uint32_t version;
++ uint64_t ret;
++ uint64_t last;
++ static uint64_t last_value;
++
++ do {
++ version = info->version & ~1;
++ /* Make sure version is read before the data */
++ smp_rmb();
++
++ ret = read_cycle(info, rdtsc_ordered());
++ /* Ignore fancy flags for now */
++
++ /* Make sure version is reread after the data */
++ smp_rmb();
++ } while ( unlikely(version != info->version) );
++
++ /* Maintain a monotonic global value */
++ do {
++ last = read_atomic(&last_value);
++ if ( ret < last )
++ return last;
++ } while ( unlikely(cmpxchg(&last_value, last, ret) != last) );
++
++ return ret;
++}
++
++static struct platform_timesource __initdata plt_xen_timer =
++{
++ .id = "xen",
++ .name = "XEN PV CLOCK",
++ .read_counter = read_xen_timer,
++ .init = init_xen_timer,
++ .counter_bits = 63,
++};
++#endif
++
+ /************************************************************
+ * GENERIC PLATFORM TIMER INFRASTRUCTURE
+ */
+@@ -672,6 +758,9 @@ static s64 __init try_platform_timer(struct platform_timesource
*pts)
+ static u64 __init init_platform_timer(void)
+ {
+ static struct platform_timesource * __initdata plt_timers[] = {
++#ifdef CONFIG_XEN_GUEST
++ &plt_xen_timer,
++#endif
+ &plt_hpet, &plt_pmtimer, &plt_pit
+ };
+
+--
+2.14.3
+
+
+From 949eb11d5813466f1456a6229ff01e294fb1cdeb Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Fri, 17 Nov 2017 12:46:41 +0000
+Subject: [PATCH 44/77] x86: APIC timer calibration when running as a guest
+
+The timer calibration currently depends on PIT. Introduce a variant
+to wait for a tick's worth of time to elapse when running as a PVH
+guest.
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+---
+ xen/arch/x86/apic.c | 38 ++++++++++++++++++++++++++++++--------
+ 1 file changed, 30 insertions(+), 8 deletions(-)
+
+diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
+index ed59440c45..5039173827 100644
+--- a/xen/arch/x86/apic.c
++++ b/xen/arch/x86/apic.c
+@@ -36,6 +36,8 @@
+ #include <mach_apic.h>
+ #include <io_ports.h>
+ #include <xen/kexec.h>
++#include <asm/guest.h>
++#include <asm/time.h>
+
+ static bool __read_mostly tdt_enabled;
+ static bool __initdata tdt_enable = true;
+@@ -1091,6 +1093,20 @@ static void setup_APIC_timer(void)
+ local_irq_restore(flags);
+ }
+
++static void wait_tick_pvh(void)
++{
++ u64 lapse_ns = 1000000000ULL / HZ;
++ s_time_t start, curr_time;
++
++ start = NOW();
++
++ /* Won't wrap around */
++ do {
++ cpu_relax();
++ curr_time = NOW();
++ } while ( curr_time - start < lapse_ns );
++}
++
+ /*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+@@ -1123,12 +1139,15 @@ static int __init calibrate_APIC_clock(void)
+ */
+ __setup_APIC_LVTT(1000000000);
+
+- /*
+- * The timer chip counts down to zero. Let's wait
+- * for a wraparound to start exact measurement:
+- * (the current tick might have been already half done)
+- */
+- wait_8254_wraparound();
++ if ( !xen_guest )
++ /*
++ * The timer chip counts down to zero. Let's wait
++ * for a wraparound to start exact measurement:
++ * (the current tick might have been already half done)
++ */
++ wait_8254_wraparound();
++ else
++ wait_tick_pvh();
+
+ /*
+ * We wrapped around just now. Let's start:
+@@ -1137,10 +1156,13 @@ static int __init calibrate_APIC_clock(void)
+ tt1 = apic_read(APIC_TMCCT);
+
+ /*
+- * Let's wait LOOPS wraprounds:
++ * Let's wait LOOPS ticks:
+ */
+ for (i = 0; i < LOOPS; i++)
+- wait_8254_wraparound();
++ if ( !xen_guest )
++ wait_8254_wraparound();
++ else
++ wait_tick_pvh();
+
+ tt2 = apic_read(APIC_TMCCT);
+ t2 = rdtsc_ordered();
+--
+2.14.3
+
+
+From 5a543c6f397c9e4f8068e83246967ca7bd92605c Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Fri, 17 Nov 2017 15:19:09 +0000
+Subject: [PATCH 45/77] x86: read wallclock from Xen when running in pvh mode
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+---
+ xen/arch/x86/time.c | 32 ++++++++++++++++++++++++++++----
+ 1 file changed, 28 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
+index ab866ad68d..2dbf1c7d75 100644
+--- a/xen/arch/x86/time.c
++++ b/xen/arch/x86/time.c
+@@ -964,6 +964,30 @@ static unsigned long get_cmos_time(void)
+ return mktime(rtc.year, rtc.mon, rtc.day, rtc.hour, rtc.min, rtc.sec);
+ }
+
++static unsigned long get_wallclock_time(void)
++{
++#ifdef CONFIG_XEN_GUEST
++ if ( xen_guest )
++ {
++ struct shared_info *sh_info = XEN_shared_info;
++ uint32_t wc_version;
++ uint64_t wc_sec;
++
++ do {
++ wc_version = sh_info->wc_version & ~1;
++ smp_rmb();
++
++ wc_sec = sh_info->wc_sec;
++ smp_rmb();
++ } while ( wc_version != sh_info->wc_version );
++
++ return wc_sec + read_xen_timer() / 1000000000;
++ }
++#endif
++
++ return get_cmos_time();
++}
++
+ /***************************************************************************
+ * System Time
+ ***************************************************************************/
+@@ -1759,8 +1783,8 @@ int __init init_xen_time(void)
+
+ open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
+
+- /* NB. get_cmos_time() can take over one second to execute. */
+- do_settime(get_cmos_time(), 0, NOW());
++ /* NB. get_wallclock_time() can take over one second to execute. */
++ do_settime(get_wallclock_time(), 0, NOW());
+
+ /* Finish platform timer initialization. */
+ try_platform_timer_tail(false);
+@@ -1870,7 +1894,7 @@ int time_suspend(void)
+ {
+ if ( smp_processor_id() == 0 )
+ {
+- cmos_utc_offset = -get_cmos_time();
++ cmos_utc_offset = -get_wallclock_time();
+ cmos_utc_offset += get_sec();
+ kill_timer(&calibration_timer);
+
+@@ -1897,7 +1921,7 @@ int time_resume(void)
+
+ set_timer(&calibration_timer, NOW() + EPOCH);
+
+- do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
++ do_settime(get_wallclock_time() + cmos_utc_offset, 0, NOW());
+
+ update_vcpu_system_time(current);
+
+--
+2.14.3
+
+
+From 3b058a3eabf24b4b31521a49a600438b6a511739 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Thu, 11 Jan 2018 13:45:48 +0000
+Subject: [PATCH 46/77] x86: don't swallow the first command line item in guest
+ mode
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/arch/x86/setup.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index b9b97d68f5..c1f4184e06 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -632,8 +632,8 @@ static char * __init cmdline_cook(char *p, const char *loader_name)
+ while ( *p == ' ' )
+ p++;
+
+- /* GRUB2 does not include image name as first item on command line. */
+- if ( loader_is_grub2(loader_name) )
++ /* GRUB2 and PVH don't not include image name as first item on command line. */
++ if ( xen_guest || loader_is_grub2(loader_name) )
+ return p;
+
+ /* Strip image name plus whitespace. */
+--
+2.14.3
+
+
+From cb5dc94ba74f06c574390b58695dd2b4d4971571 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Tue, 9 Jan 2018 12:51:37 +0000
+Subject: [PATCH 47/77] x86/guest: setup event channel upcall vector
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+And a dummy event channel upcall handler.
+
+Note that with the current code the underlying Xen (L0) must support
+HVMOP_set_evtchn_upcall_vector or else event channel setup is going to
+fail. This limitation can be lifted by implementing more event channel
+interrupt injection methods as a backup.
+
+Register callback_irq to trick toolstack to think the domain is
+enlightened.
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/arch/x86/guest/xen.c | 41 +++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/guest/hypercall.h | 17 +++++++++++++++
+ 2 files changed, 58 insertions(+)
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index 60626ec21c..59871170c8 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -24,6 +24,7 @@
+ #include <xen/rangeset.h>
+ #include <xen/types.h>
+
++#include <asm/apic.h>
+ #include <asm/e820.h>
+ #include <asm/guest.h>
+ #include <asm/msr.h>
+@@ -186,6 +187,43 @@ static void __init init_memmap(void)
+ }
+ }
+
++static void xen_evtchn_upcall(struct cpu_user_regs *regs)
++{
++ struct vcpu_info *vcpu_info = this_cpu(vcpu_info);
++
++ vcpu_info->evtchn_upcall_pending = 0;
++ write_atomic(&vcpu_info->evtchn_pending_sel, 0);
++
++ ack_APIC_irq();
++}
++
++static void init_evtchn(void)
++{
++ static uint8_t evtchn_upcall_vector;
++ int rc;
++
++ if ( !evtchn_upcall_vector )
++ alloc_direct_apic_vector(&evtchn_upcall_vector, xen_evtchn_upcall);
++
++ ASSERT(evtchn_upcall_vector);
++
++ rc = xen_hypercall_set_evtchn_upcall_vector(this_cpu(vcpu_id),
++ evtchn_upcall_vector);
++ if ( rc )
++ panic("Unable to set evtchn upcall vector: %d", rc);
++
++ /* Trick toolstack to think we are enlightened */
++ {
++ struct xen_hvm_param a = {
++ .domid = DOMID_SELF,
++ .index = HVM_PARAM_CALLBACK_IRQ,
++ .value = 1,
++ };
++
++ BUG_ON(xen_hypercall_hvm_op(HVMOP_set_param, &a));
++ }
++}
++
+ void __init hypervisor_setup(void)
+ {
+ init_memmap();
+@@ -210,12 +248,15 @@ void __init hypervisor_setup(void)
+ "unable to map vCPU info, limiting vCPUs to: %u\n",
+ XEN_LEGACY_MAX_VCPUS);
+ }
++
++ init_evtchn();
+ }
+
+ void hypervisor_ap_setup(void)
+ {
+ set_vcpu_id();
+ map_vcpuinfo();
++ init_evtchn();
+ }
+
+ int hypervisor_alloc_unused_page(mfn_t *mfn)
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+index dbc57a566e..b36a1cc189 100644
+--- a/xen/include/asm-x86/guest/hypercall.h
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -25,6 +25,7 @@
+
+ #include <public/xen.h>
+ #include <public/sched.h>
++#include <public/hvm/hvm_op.h>
+
+ #include <public/vcpu.h>
+
+@@ -104,6 +105,11 @@ static inline int xen_hypercall_vcpu_op(unsigned int cmd, unsigned
int vcpu,
+ return _hypercall64_3(long, __HYPERVISOR_vcpu_op, cmd, vcpu, arg);
+ }
+
++static inline long xen_hypercall_hvm_op(unsigned int op, void *arg)
++{
++ return _hypercall64_2(long, __HYPERVISOR_hvm_op, op, arg);
++}
++
+ /*
+ * Higher level hypercall helpers
+ */
+@@ -120,6 +126,17 @@ static inline long xen_hypercall_shutdown(unsigned int reason)
+ return xen_hypercall_sched_op(SCHEDOP_shutdown, &s);
+ }
+
++static inline long xen_hypercall_set_evtchn_upcall_vector(
++ unsigned int cpu, unsigned int vector)
++{
++ struct xen_hvm_evtchn_upcall_vector a = {
++ .vcpu = cpu,
++ .vector = vector,
++ };
++
++ return xen_hypercall_hvm_op(HVMOP_set_evtchn_upcall_vector, &a);
++}
++
+ #else /* CONFIG_XEN_GUEST */
+
+ #include <public/sched.h>
+--
+2.14.3
+
+
+From 7477359b9a462d066a4819cefb6d6e60bc4defc5 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Date: Fri, 24 Nov 2017 11:07:32 +0000
+Subject: [PATCH 48/77] x86/guest: add PV console code
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/drivers/char/Makefile | 1 +
+ xen/drivers/char/xen_pv_console.c | 205 ++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/fixmap.h | 1 +
+ xen/include/asm-x86/guest/hypercall.h | 33 ++++++
+ xen/include/xen/pv_console.h | 32 ++++++
+ 5 files changed, 272 insertions(+)
+ create mode 100644 xen/drivers/char/xen_pv_console.c
+ create mode 100644 xen/include/xen/pv_console.h
+
+diff --git a/xen/drivers/char/Makefile b/xen/drivers/char/Makefile
+index aa169d7961..9d48d0f2dc 100644
+--- a/xen/drivers/char/Makefile
++++ b/xen/drivers/char/Makefile
+@@ -8,3 +8,4 @@ obj-$(CONFIG_HAS_SCIF) += scif-uart.o
+ obj-$(CONFIG_HAS_EHCI) += ehci-dbgp.o
+ obj-$(CONFIG_ARM) += arm-uart.o
+ obj-y += serial.o
++obj-$(CONFIG_XEN_GUEST) += xen_pv_console.o
+diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c
+new file mode 100644
+index 0000000000..f5aca4c69e
+--- /dev/null
++++ b/xen/drivers/char/xen_pv_console.c
+@@ -0,0 +1,205 @@
++/******************************************************************************
++ * drivers/char/xen_pv_console.c
++ *
++ * A frontend driver for Xen's PV console.
++ * Can be used when Xen is running on top of Xen in pv-in-pvh mode.
++ * (Linux's name for this is hvc console)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++
++#include <xen/lib.h>
++#include <xen/hypercall.h>
++#include <xen/pv_console.h>
++
++#include <asm/fixmap.h>
++#include <asm/guest.h>
++
++#include <public/io/console.h>
++
++static struct xencons_interface *cons_ring;
++static evtchn_port_t cons_evtchn;
++static serial_rx_fn cons_rx_handler;
++static DEFINE_SPINLOCK(tx_lock);
++
++void __init pv_console_init(void)
++{
++ long r;
++ uint64_t raw_pfn = 0, raw_evtchn = 0;
++
++ if ( !xen_guest )
++ {
++ printk("PV console init failed: xen_guest mode is not active!\n");
++ return;
++ }
++
++ r = xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN, &raw_pfn);
++ if ( r < 0 )
++ goto error;
++
++ r = xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_EVTCHN, &raw_evtchn);
++ if ( r < 0 )
++ goto error;
++
++ set_fixmap(FIX_PV_CONSOLE, raw_pfn << PAGE_SHIFT);
++ cons_ring = (struct xencons_interface *)fix_to_virt(FIX_PV_CONSOLE);
++ cons_evtchn = raw_evtchn;
++
++ printk("Initialised PV console at 0x%p with pfn %#lx and evtchn %#x\n",
++ cons_ring, raw_pfn, cons_evtchn);
++ return;
++
++ error:
++ printk("Couldn't initialise PV console\n");
++}
++
++void __init pv_console_set_rx_handler(serial_rx_fn fn)
++{
++ cons_rx_handler = fn;
++}
++
++void __init pv_console_init_postirq(void)
++{
++ if ( !cons_ring )
++ return;
++
++ xen_hypercall_evtchn_unmask(cons_evtchn);
++}
++
++static void notify_daemon(void)
++{
++ xen_hypercall_evtchn_send(cons_evtchn);
++}
++
++size_t pv_console_rx(struct cpu_user_regs *regs)
++{
++ char c;
++ XENCONS_RING_IDX cons, prod;
++ size_t recv = 0;
++
++ if ( !cons_ring )
++ return 0;
++
++ /* TODO: move this somewhere */
++ if ( !test_bit(cons_evtchn, XEN_shared_info->evtchn_pending) )
++ return 0;
++
++ prod = ACCESS_ONCE(cons_ring->in_prod);
++ cons = cons_ring->in_cons;
++
++ /*
++ * Latch pointers before accessing the ring. Included compiler barrier also
++ * ensures that pointers are really read only once into local variables.
++ */
++ smp_rmb();
++
++ ASSERT((prod - cons) <= sizeof(cons_ring->in));
++
++ while ( cons != prod )
++ {
++ c = cons_ring->in[MASK_XENCONS_IDX(cons++, cons_ring->in)];
++ if ( cons_rx_handler )
++ cons_rx_handler(c, regs);
++ recv++;
++ }
++
++ /* No need for a mem barrier because every character was already consumed */
++ barrier();
++ ACCESS_ONCE(cons_ring->in_cons) = cons;
++ notify_daemon();
++
++ clear_bit(cons_evtchn, XEN_shared_info->evtchn_pending);
++
++ return recv;
++}
++
++static size_t pv_ring_puts(const char *buf)
++{
++ XENCONS_RING_IDX cons, prod;
++ size_t sent = 0, avail;
++ bool put_r = false;
++
++ while ( buf[sent] != '\0' || put_r )
++ {
++ cons = ACCESS_ONCE(cons_ring->out_cons);
++ prod = cons_ring->out_prod;
++
++ /*
++ * Latch pointers before accessing the ring. Included compiler barrier
++ * ensures that pointers are really read only once into local variables.
++ */
++ smp_rmb();
++
++ ASSERT((prod - cons) <= sizeof(cons_ring->out));
++ avail = sizeof(cons_ring->out) - (prod - cons);
++
++ if ( avail == 0 )
++ {
++ /* Wait for xenconsoled to consume our output */
++ xen_hypercall_sched_op(SCHEDOP_yield, NULL);
++ continue;
++ }
++
++ while ( avail && (buf[sent] != '\0' || put_r) )
++ {
++ if ( put_r )
++ {
++ cons_ring->out[MASK_XENCONS_IDX(prod++, cons_ring->out)] =
'\r';
++ put_r = false;
++ }
++ else
++ {
++ cons_ring->out[MASK_XENCONS_IDX(prod++, cons_ring->out)] =
++ buf[sent];
++
++ /* Send '\r' for every '\n' */
++ if ( buf[sent] == '\n' )
++ put_r = true;
++ sent++;
++ }
++ avail--;
++ }
++
++ /* Write to the ring before updating the pointer */
++ smp_wmb();
++ ACCESS_ONCE(cons_ring->out_prod) = prod;
++ notify_daemon();
++ }
++
++ return sent;
++}
++
++void pv_console_puts(const char *buf)
++{
++ unsigned long flags;
++
++ if ( !cons_ring )
++ return;
++
++ spin_lock_irqsave(&tx_lock, flags);
++ pv_ring_puts(buf);
++ spin_unlock_irqrestore(&tx_lock, flags);
++}
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/include/asm-x86/fixmap.h b/xen/include/asm-x86/fixmap.h
+index ded4ddf21b..16ccaa2c77 100644
+--- a/xen/include/asm-x86/fixmap.h
++++ b/xen/include/asm-x86/fixmap.h
+@@ -46,6 +46,7 @@ enum fixed_addresses {
+ FIX_COM_END,
+ FIX_EHCI_DBGP,
+ #ifdef CONFIG_XEN_GUEST
++ FIX_PV_CONSOLE,
+ FIX_XEN_SHARED_INFO,
+ #endif /* CONFIG_XEN_GUEST */
+ /* Everything else should go further down. */
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+index b36a1cc189..81a955d479 100644
+--- a/xen/include/asm-x86/guest/hypercall.h
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -105,6 +105,11 @@ static inline int xen_hypercall_vcpu_op(unsigned int cmd, unsigned
int vcpu,
+ return _hypercall64_3(long, __HYPERVISOR_vcpu_op, cmd, vcpu, arg);
+ }
+
++static inline long xen_hypercall_event_channel_op(unsigned int cmd, void *arg)
++{
++ return _hypercall64_2(long, __HYPERVISOR_event_channel_op, cmd, arg);
++}
++
+ static inline long xen_hypercall_hvm_op(unsigned int op, void *arg)
+ {
+ return _hypercall64_2(long, __HYPERVISOR_hvm_op, op, arg);
+@@ -126,6 +131,34 @@ static inline long xen_hypercall_shutdown(unsigned int reason)
+ return xen_hypercall_sched_op(SCHEDOP_shutdown, &s);
+ }
+
++static inline long xen_hypercall_evtchn_send(evtchn_port_t port)
++{
++ struct evtchn_send send = { .port = port };
++
++ return xen_hypercall_event_channel_op(EVTCHNOP_send, &send);
++}
++
++static inline long xen_hypercall_evtchn_unmask(evtchn_port_t port)
++{
++ struct evtchn_unmask unmask = { .port = port };
++
++ return xen_hypercall_event_channel_op(EVTCHNOP_unmask, &unmask);
++}
++
++static inline long xen_hypercall_hvm_get_param(uint32_t index, uint64_t *value)
++{
++ struct xen_hvm_param xhv = {
++ .domid = DOMID_SELF,
++ .index = index,
++ };
++ long ret = xen_hypercall_hvm_op(HVMOP_get_param, &xhv);
++
++ if ( ret == 0 )
++ *value = xhv.value;
++
++ return ret;
++}
++
+ static inline long xen_hypercall_set_evtchn_upcall_vector(
+ unsigned int cpu, unsigned int vector)
+ {
+diff --git a/xen/include/xen/pv_console.h b/xen/include/xen/pv_console.h
+new file mode 100644
+index 0000000000..e578b56620
+--- /dev/null
++++ b/xen/include/xen/pv_console.h
+@@ -0,0 +1,32 @@
++#ifndef __XEN_PV_CONSOLE_H__
++#define __XEN_PV_CONSOLE_H__
++
++#include <xen/serial.h>
++
++#ifdef CONFIG_XEN_GUEST
++
++void pv_console_init(void);
++void pv_console_set_rx_handler(serial_rx_fn fn);
++void pv_console_init_postirq(void);
++void pv_console_puts(const char *buf);
++size_t pv_console_rx(struct cpu_user_regs *regs);
++
++#else
++
++static inline void pv_console_init(void) {}
++static inline void pv_console_set_rx_handler(serial_rx_fn fn) { }
++static inline void pv_console_init_postirq(void) { }
++static inline void pv_console_puts(const char *buf) { }
++static inline size_t pv_console_rx(struct cpu_user_regs *regs) { return 0; }
++
++#endif /* !CONFIG_XEN_GUEST */
++#endif /* __XEN_PV_CONSOLE_H__ */
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+--
+2.14.3
+
+
+From aa96a59dc2290fc3084525659282a59b29eff1d5 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Date: Fri, 24 Nov 2017 11:21:17 +0000
+Subject: [PATCH 49/77] x86/guest: use PV console for Xen/Dom0 I/O
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ docs/misc/xen-command-line.markdown | 5 ++++-
+ xen/arch/x86/guest/xen.c | 3 +++
+ xen/drivers/char/console.c | 16 ++++++++++++++++
+ 3 files changed, 23 insertions(+), 1 deletion(-)
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index e5979bceee..da006dd4f7 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -365,7 +365,7 @@ The following are examples of correct specifications:
+ Specify the size of the console ring buffer.
+
+ ### console
+-> `= List of [ vga | com1[H,L] | com2[H,L] | dbgp | none ]`
++> `= List of [ vga | com1[H,L] | com2[H,L] | pv | dbgp | none ]`
+
+ > Default: `console=com1,vga`
+
+@@ -381,6 +381,9 @@ the converse; transmitted and received characters will have their
MSB
+ cleared. This allows a single port to be shared by two subsystems
+ (e.g. console and debugger).
+
++`pv` indicates that Xen should use Xen's PV console. This option is
++only available when used together with `pv-in-pvh`.
++
+ `dbgp` indicates that Xen should use a USB debug port.
+
+ `none` indicates that Xen should not use a console. This option only
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index 59871170c8..d4968b47aa 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -23,6 +23,7 @@
+ #include <xen/pfn.h>
+ #include <xen/rangeset.h>
+ #include <xen/types.h>
++#include <xen/pv_console.h>
+
+ #include <asm/apic.h>
+ #include <asm/e820.h>
+@@ -194,6 +195,8 @@ static void xen_evtchn_upcall(struct cpu_user_regs *regs)
+ vcpu_info->evtchn_upcall_pending = 0;
+ write_atomic(&vcpu_info->evtchn_pending_sel, 0);
+
++ pv_console_rx(regs);
++
+ ack_APIC_irq();
+ }
+
+diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
+index d05ebf9f70..8acd358395 100644
+--- a/xen/drivers/char/console.c
++++ b/xen/drivers/char/console.c
+@@ -32,6 +32,7 @@
+ #include <xen/warning.h>
+
+ #ifdef CONFIG_X86
++#include <xen/pv_console.h>
+ #include <asm/guest.h>
+ #endif
+
+@@ -344,6 +345,11 @@ static void sercon_puts(const char *s)
+ (*serial_steal_fn)(s);
+ else
+ serial_puts(sercon_handle, s);
++
++#ifdef CONFIG_X86
++ /* Copy all serial output into PV console */
++ pv_console_puts(s);
++#endif
+ }
+
+ static void dump_console_ring_key(unsigned char key)
+@@ -805,6 +811,8 @@ void __init console_init_preirq(void)
+ if ( !strncmp(p, "vga", 3) )
+ video_init();
+ #ifdef CONFIG_X86
++ else if ( !strncmp(p, "pv", 2) )
++ pv_console_init();
+ else if ( !strncmp(p, "xen", 3) )
+ opt_console_xen = true;
+ #endif
+@@ -828,6 +836,10 @@ void __init console_init_preirq(void)
+
+ serial_set_rx_handler(sercon_handle, serial_rx);
+
++#ifdef CONFIG_X86
++ pv_console_set_rx_handler(serial_rx);
++#endif
++
+ /* HELLO WORLD --- start-of-day banner text. */
+ spin_lock(&console_lock);
+ __putstr(xen_banner());
+@@ -880,6 +892,10 @@ void __init console_init_postirq(void)
+ {
+ serial_init_postirq();
+
++#ifdef CONFIG_X86
++ pv_console_init_postirq();
++#endif
++
+ if ( conring != _conring )
+ return;
+
+--
+2.14.3
+
+
+From b5ead1fad3930a3e1034f64f9af416ae211e27da Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Fri, 10 Nov 2017 16:35:26 +0000
+Subject: [PATCH 50/77] x86/shim: Kconfig and command line options
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ docs/misc/xen-command-line.markdown | 11 ++++++++++
+ xen/arch/x86/Kconfig | 22 +++++++++++++++++++
+ xen/arch/x86/pv/Makefile | 1 +
+ xen/arch/x86/pv/shim.c | 39 ++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/guest.h | 1 +
+ xen/include/asm-x86/pv/shim.h | 42 +++++++++++++++++++++++++++++++++++++
+ 6 files changed, 116 insertions(+)
+ create mode 100644 xen/arch/x86/pv/shim.c
+ create mode 100644 xen/include/asm-x86/pv/shim.h
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index da006dd4f7..3a1a9c1fba 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -1445,6 +1445,17 @@ do; there may be other custom operating systems which do. If
you're
+ certain you don't plan on having PV guests which use this feature,
+ turning it off can reduce the attack surface.
+
++### pv-shim (x86)
++> `= <boolean>`
++
++> Default: `false`
++
++This option is intended for use by a toolstack, when choosing to run a PV
++guest compatibly inside an HVM container.
++
++In this mode, the kernel and initrd passed as modules to the hypervisor are
++constructed into a plain unprivileged PV domain.
++
+ ### rcu-idle-timer-period-ms
+ > `= <integer>`
+
+diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
+index c0b0bcdcb3..4953533f16 100644
+--- a/xen/arch/x86/Kconfig
++++ b/xen/arch/x86/Kconfig
+@@ -133,6 +133,28 @@ config PVH_GUEST
+ ---help---
+ Support booting using the PVH ABI.
+
++ If unsure, say N.
++
++config PV_SHIM
++ def_bool n
++ prompt "PV Shim"
++ depends on PV && XEN_GUEST
++ ---help---
++ Build Xen with a mode which acts as a shim to allow PV guest to run
++ in an HVM/PVH container. This mode can only be enabled with command
++ line option.
++
++ If unsure, say N.
++
++config PV_SHIM_EXCLUSIVE
++ def_bool n
++ prompt "PV Shim Exclusive"
++ depends on PV_SHIM
++ ---help---
++ Build Xen in a way which unconditionally assumes PV_SHIM mode. This
++ option is only intended for use when building a dedicated PV Shim
++ firmware, and will not function correctly in other scenarios.
++
+ If unsure, say N.
+ endmenu
+
+diff --git a/xen/arch/x86/pv/Makefile b/xen/arch/x86/pv/Makefile
+index bac2792aa2..65bca04175 100644
+--- a/xen/arch/x86/pv/Makefile
++++ b/xen/arch/x86/pv/Makefile
+@@ -11,6 +11,7 @@ obj-y += iret.o
+ obj-y += misc-hypercalls.o
+ obj-y += mm.o
+ obj-y += ro-page-fault.o
++obj-$(CONFIG_PV_SHIM) += shim.o
+ obj-y += traps.o
+
+ obj-bin-y += dom0_build.init.o
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+new file mode 100644
+index 0000000000..4d037355db
+--- /dev/null
++++ b/xen/arch/x86/pv/shim.c
+@@ -0,0 +1,39 @@
++/******************************************************************************
++ * arch/x86/pv/shim.c
++ *
++ * Functionaltiy for PV Shim mode
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++#include <xen/init.h>
++#include <xen/types.h>
++
++#include <asm/apic.h>
++
++#ifndef CONFIG_PV_SHIM_EXCLUSIVE
++bool pv_shim;
++boolean_param("pv-shim", pv_shim);
++#endif
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/include/asm-x86/guest.h b/xen/include/asm-x86/guest.h
+index 5abdb8c433..a38c6b5b3f 100644
+--- a/xen/include/asm-x86/guest.h
++++ b/xen/include/asm-x86/guest.h
+@@ -22,6 +22,7 @@
+ #include <asm/guest/hypercall.h>
+ #include <asm/guest/pvh-boot.h>
+ #include <asm/guest/xen.h>
++#include <asm/pv/shim.h>
+
+ #endif /* __X86_GUEST_H__ */
+
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+new file mode 100644
+index 0000000000..1468cfd498
+--- /dev/null
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -0,0 +1,42 @@
++/******************************************************************************
++ * asm-x86/guest/shim.h
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms and conditions of the GNU General Public
++ * License, version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public
++ * License along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++
++#ifndef __X86_PV_SHIM_H__
++#define __X86_PV_SHIM_H__
++
++#include <xen/types.h>
++
++#if defined(CONFIG_PV_SHIM_EXCLUSIVE)
++# define pv_shim 1
++#elif defined(CONFIG_PV_SHIM)
++extern bool pv_shim;
++#else
++# define pv_shim 0
++#endif /* CONFIG_PV_SHIM{,_EXCLUSIVE} */
++
++#endif /* __X86_PV_SHIM_H__ */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+--
+2.14.3
+
+
+From 378425686619e5fae65988cfedd23d5883206c2b Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 22 Nov 2017 13:31:26 +0000
+Subject: [PATCH 51/77] tools/firmware: Build and install xen-shim
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Link a minimum set of files to build the shim. The linkfarm rune can
+handle creation and deletion of files. Introduce build-shim and
+install-shim targets in xen/Makefile.
+
+We can do better by properly generate the dependency from the list of
+files but that's an improvement for later.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+[change default scheduler to credit]
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+v2: Introduce a top-level build-shim target. Split the xen-shim build
+ with normal build.
+---
+ .gitignore | 4 ++
+ tools/firmware/Makefile | 9 ++++
+ tools/firmware/xen-dir/Makefile | 59 ++++++++++++++++++++++++++
+ tools/firmware/xen-dir/shim.config | 87 ++++++++++++++++++++++++++++++++++++++
+ xen/Makefile | 16 +++++--
+ 5 files changed, 172 insertions(+), 3 deletions(-)
+ create mode 100644 tools/firmware/xen-dir/Makefile
+ create mode 100644 tools/firmware/xen-dir/shim.config
+
+diff --git a/.gitignore b/.gitignore
+index 8da67daf31..f6cc61a701 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -155,6 +155,10 @@ tools/firmware/rombios/rombios[^/]*.s
+ tools/firmware/rombios/32bit/32bitbios_flat.h
+ tools/firmware/vgabios/vbetables-gen
+ tools/firmware/vgabios/vbetables.h
++tools/firmware/xen-dir/*.old
++tools/firmware/xen-dir/linkfarm.stamp*
++tools/firmware/xen-dir/xen-root
++tools/firmware/xen-dir/xen-shim
+ tools/flask/utils/flask-getenforce
+ tools/flask/utils/flask-get-bool
+ tools/flask/utils/flask-loadpolicy
+diff --git a/tools/firmware/Makefile b/tools/firmware/Makefile
+index 868b506920..9387cc0878 100644
+--- a/tools/firmware/Makefile
++++ b/tools/firmware/Makefile
+@@ -1,6 +1,8 @@
+ XEN_ROOT = $(CURDIR)/../..
+ include $(XEN_ROOT)/tools/Rules.mk
+
++CONFIG_PV_SHIM := y
++
+ # hvmloader is a 32-bit protected mode binary.
+ TARGET := hvmloader/hvmloader
+ INST_DIR := $(DESTDIR)$(XENFIRMWAREDIR)
+@@ -11,6 +13,7 @@ SUBDIRS-$(CONFIG_SEABIOS) += seabios-dir
+ SUBDIRS-$(CONFIG_ROMBIOS) += rombios
+ SUBDIRS-$(CONFIG_ROMBIOS) += vgabios
+ SUBDIRS-$(CONFIG_ROMBIOS) += etherboot
++SUBDIRS-$(CONFIG_PV_SHIM) += xen-dir
+ SUBDIRS-y += hvmloader
+
+ LD32BIT-$(CONFIG_FreeBSD) := LD32BIT_FLAG=-melf_i386_fbsd
+@@ -48,6 +51,9 @@ endif
+ ifeq ($(CONFIG_OVMF),y)
+ $(INSTALL_DATA) ovmf-dir/ovmf.bin $(INST_DIR)/ovmf.bin
+ endif
++ifeq ($(CONFIG_PV_SHIM),y)
++ $(INSTALL_DATA) xen-dir/xen-shim $(INST_DIR)/xen-shim
++endif
+
+ .PHONY: uninstall
+ uninstall:
+@@ -58,6 +64,9 @@ endif
+ ifeq ($(CONFIG_OVMF),y)
+ rm -f $(INST_DIR)/ovmf.bin
+ endif
++ifeq ($(CONFIG_PV_SHIM),y)
++ rm -f $(INST_DIR)/xen-shim
++endif
+
+ .PHONY: clean
+ clean: subdirs-clean
+diff --git a/tools/firmware/xen-dir/Makefile b/tools/firmware/xen-dir/Makefile
+new file mode 100644
+index 0000000000..adf6c31e8d
+--- /dev/null
++++ b/tools/firmware/xen-dir/Makefile
+@@ -0,0 +1,59 @@
++XEN_ROOT = $(CURDIR)/../../..
++
++all: xen-shim
++
++.PHONY: FORCE
++FORCE:
++
++D=xen-root
++
++# Minimun set of files / directories go get Xen to build
++LINK_DIRS=config xen
++LINK_FILES=Config.mk
++
++DEP_DIRS=$(foreach i, $(LINK_DIRS), $(XEN_ROOT)/$(i))
++DEP_FILES=$(foreach i, $(LINK_FILES), $(XEN_ROOT)/$(i))
++
++linkfarm.stamp: $(DEP_DIRS) $(DEP_FILES) FORCE
++ mkdir -p $(D)
++ set -e
++ rm -f linkfarm.stamp.tmp
++ $(foreach d, $(LINK_DIRS), \
++ (mkdir -p $(D)/$(d); \
++ cd $(D)/$(d); \
++ find $(XEN_ROOT)/$(d)/ -type d -printf "./%P\n" | xargs mkdir -p);)
++ $(foreach d, $(LINK_DIRS), \
++ (cd $(XEN_ROOT); \
++ find $(d) ! -type l -type f \
++ $(addprefix ! -path , '*.[oda1]' '*.d[12]')) \
++ >> linkfarm.stamp.tmp ; )
++ $(foreach f, $(LINK_FILES), \
++ echo $(f) >> linkfarm.stamp.tmp ;)
++ cmp -s linkfarm.stamp.tmp linkfarm.stamp && \
++ rm linkfarm.stamp.tmp || { \
++ mv linkfarm.stamp.tmp linkfarm.stamp; \
++ cat linkfarm.stamp | while read f; \
++ do rm -f "$(D)/$$f"; ln -s "$(XEN_ROOT)/$$f"
"$(D)/$$f"; done \
++ }
++
++# Copy enough of the tree to build the shim hypervisor
++$(D): linkfarm.stamp
++ $(MAKE) -C $(D)/xen distclean
++
++.PHONY: shim-%config
++shim-%config: $(D) FORCE
++ $(MAKE) -C $(D)/xen $*config \
++ XEN_CONFIG_EXPERT=y \
++ KCONFIG_CONFIG=$(CURDIR)/shim.config
++
++xen-shim: $(D) shim-olddefconfig
++ $(MAKE) -C $(D)/xen install-shim \
++ XEN_CONFIG_EXPERT=y \
++ KCONFIG_CONFIG=$(CURDIR)/shim.config \
++ DESTDIR=$(CURDIR)
++
++.PHONY: distclean clean
++distclean clean:
++ rm -f xen-shim *.old
++ rm -rf $(D)
++ rm -f linkfarm.stamp*
+diff --git a/tools/firmware/xen-dir/shim.config b/tools/firmware/xen-dir/shim.config
+new file mode 100644
+index 0000000000..151a8b41e5
+--- /dev/null
++++ b/tools/firmware/xen-dir/shim.config
+@@ -0,0 +1,87 @@
++#
++# Automatically generated file; DO NOT EDIT.
++# Xen/x86 4.11-unstable Configuration
++#
++CONFIG_X86_64=y
++CONFIG_X86=y
++CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
++
++#
++# Architecture Features
++#
++CONFIG_NR_CPUS=32
++CONFIG_PV=y
++CONFIG_PV_LINEAR_PT=y
++CONFIG_HVM=y
++# CONFIG_SHADOW_PAGING is not set
++# CONFIG_BIGMEM is not set
++# CONFIG_HVM_FEP is not set
++# CONFIG_TBOOT is not set
++CONFIG_XEN_GUEST=y
++CONFIG_PVH_GUEST=y
++CONFIG_PV_SHIM=y
++CONFIG_PV_SHIM_EXCLUSIVE=y
++
++#
++# Common Features
++#
++CONFIG_COMPAT=y
++CONFIG_CORE_PARKING=y
++CONFIG_HAS_ALTERNATIVE=y
++CONFIG_HAS_EX_TABLE=y
++CONFIG_HAS_MEM_ACCESS=y
++CONFIG_HAS_MEM_PAGING=y
++CONFIG_HAS_MEM_SHARING=y
++CONFIG_HAS_PDX=y
++CONFIG_HAS_UBSAN=y
++CONFIG_HAS_KEXEC=y
++CONFIG_HAS_GDBSX=y
++CONFIG_HAS_IOPORTS=y
++# CONFIG_KEXEC is not set
++# CONFIG_TMEM is not set
++# CONFIG_XENOPROF is not set
++# CONFIG_XSM is not set
++
++#
++# Schedulers
++#
++CONFIG_SCHED_CREDIT=y
++# CONFIG_SCHED_CREDIT2 is not set
++# CONFIG_SCHED_RTDS is not set
++# CONFIG_SCHED_ARINC653 is not set
++# CONFIG_SCHED_NULL is not set
++# CONFIG_SCHED_CREDIT_DEFAULT is not set
++CONFIG_SCHED_CREDIT_DEFAULT=y
++CONFIG_SCHED_DEFAULT="credit"
++# CONFIG_LIVEPATCH is not set
++# CONFIG_SUPPRESS_DUPLICATE_SYMBOL_WARNINGS is not set
++CONFIG_CMDLINE=""
++
++#
++# Device Drivers
++#
++CONFIG_ACPI=y
++CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
++CONFIG_NUMA=y
++CONFIG_HAS_NS16550=y
++CONFIG_HAS_EHCI=y
++CONFIG_HAS_CPUFREQ=y
++CONFIG_HAS_PASSTHROUGH=y
++CONFIG_HAS_PCI=y
++CONFIG_VIDEO=y
++CONFIG_VGA=y
++CONFIG_DEFCONFIG_LIST="$ARCH_DEFCONFIG"
++CONFIG_ARCH_SUPPORTS_INT128=y
++
++#
++# Debugging Options
++#
++# CONFIG_DEBUG is not set
++# CONFIG_CRASH_DEBUG is not set
++# CONFIG_FRAME_POINTER is not set
++# CONFIG_GCOV is not set
++# CONFIG_LOCK_PROFILE is not set
++# CONFIG_PERF_COUNTERS is not set
++# CONFIG_VERBOSE_DEBUG is not set
++# CONFIG_SCRUB_DEBUG is not set
++# CONFIG_UBSAN is not set
+diff --git a/xen/Makefile b/xen/Makefile
+index 58a1f97d7d..623f889082 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -37,10 +37,10 @@ default: build
+ .PHONY: dist
+ dist: install
+
+-build install:: include/config/auto.conf
++build install build-shim:: include/config/auto.conf
+
+-.PHONY: build install uninstall clean distclean cscope TAGS tags MAP gtags tests
+-build install uninstall debug clean distclean cscope TAGS tags MAP gtags tests::
++.PHONY: build install uninstall clean distclean cscope TAGS tags MAP gtags tests
install-shim build-shim
++build install uninstall debug clean distclean cscope TAGS tags MAP gtags tests
install-shim build-shim::
+ ifneq ($(XEN_TARGET_ARCH),x86_32)
+ $(MAKE) -f Rules.mk _$@
+ else
+@@ -80,6 +80,13 @@ _install: $(TARGET)$(CONFIG_XEN_INSTALL_SUFFIX)
+ fi; \
+ fi
+
++.PHONY: _build-shim
++_build-shim: $(TARGET)-shim
++
++.PHONY: _install-shim
++_install-shim: build-shim
++ $(INSTALL_DATA) $(TARGET)-shim $(DESTDIR)
++
+ .PHONY: _tests
+ _tests:
+ $(MAKE) -f $(BASEDIR)/Rules.mk -C test tests
+@@ -144,6 +151,9 @@ $(TARGET): delete-unfresh-files
+ $(MAKE) -f $(BASEDIR)/Rules.mk include/asm-$(TARGET_ARCH)/asm-offsets.h
+ $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET)
+
++$(TARGET)-shim: $(TARGET)
++ $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) $(TARGET)-shim
++
+ # drivers/char/console.o contains static banner/compile info. Blow it away.
+ # Don't refresh these files during e.g., 'sudo make install'
+ .PHONY: delete-unfresh-files
+--
+2.14.3
+
+
+From 2b8a95a2961ba4a5e54b45b49cb6528068a3c0b3 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Tue, 28 Nov 2017 09:54:17 +0000
+Subject: [PATCH 52/77] xen/x86: make VGA support selectable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Through a Kconfig option. Enable it by default, and disable it for the
+PV-in-PVH shim.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+---
+Changes since v1:
+ - Make the VGA option dependent on the shim one.
+---
+ tools/firmware/xen-dir/shim.config | 3 +--
+ xen/arch/x86/Kconfig | 1 -
+ xen/arch/x86/boot/build32.mk | 1 +
+ xen/arch/x86/boot/cmdline.c | 5 ++++-
+ xen/arch/x86/boot/trampoline.S | 7 +++++++
+ xen/arch/x86/efi/efi-boot.h | 4 ++++
+ xen/arch/x86/platform_hypercall.c | 2 ++
+ xen/arch/x86/pv/dom0_build.c | 2 ++
+ xen/arch/x86/setup.c | 6 ++++++
+ xen/drivers/video/Kconfig | 8 +++++++-
+ xen/include/asm-x86/setup.h | 6 ++++++
+ 11 files changed, 40 insertions(+), 5 deletions(-)
+
+diff --git a/tools/firmware/xen-dir/shim.config b/tools/firmware/xen-dir/shim.config
+index 151a8b41e5..d22c2fd2f4 100644
+--- a/tools/firmware/xen-dir/shim.config
++++ b/tools/firmware/xen-dir/shim.config
+@@ -68,8 +68,7 @@ CONFIG_HAS_EHCI=y
+ CONFIG_HAS_CPUFREQ=y
+ CONFIG_HAS_PASSTHROUGH=y
+ CONFIG_HAS_PCI=y
+-CONFIG_VIDEO=y
+-CONFIG_VGA=y
++# CONFIG_VGA is not set
+ CONFIG_DEFCONFIG_LIST="$ARCH_DEFCONFIG"
+ CONFIG_ARCH_SUPPORTS_INT128=y
+
+diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
+index 4953533f16..f621e799ed 100644
+--- a/xen/arch/x86/Kconfig
++++ b/xen/arch/x86/Kconfig
+@@ -24,7 +24,6 @@ config X86
+ select HAS_PDX
+ select HAS_UBSAN
+ select NUMA
+- select VGA
+
+ config ARCH_DEFCONFIG
+ string
+diff --git a/xen/arch/x86/boot/build32.mk b/xen/arch/x86/boot/build32.mk
+index f7e8ebe67d..48c7407c00 100644
+--- a/xen/arch/x86/boot/build32.mk
++++ b/xen/arch/x86/boot/build32.mk
+@@ -5,6 +5,7 @@ include $(XEN_ROOT)/Config.mk
+ $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
+
+ CFLAGS += -Werror -fno-asynchronous-unwind-tables -fno-builtin -g0 -msoft-float
++CFLAGS += -I$(XEN_ROOT)/xen/include
+ CFLAGS := $(filter-out -flto,$(CFLAGS))
+
+ # NB. awk invocation is a portable alternative to 'head -n -1'
+diff --git a/xen/arch/x86/boot/cmdline.c b/xen/arch/x86/boot/cmdline.c
+index 06aa064e72..51b0659a04 100644
+--- a/xen/arch/x86/boot/cmdline.c
++++ b/xen/arch/x86/boot/cmdline.c
+@@ -30,6 +30,7 @@ asm (
+ " jmp cmdline_parse_early \n"
+ );
+
++#include <xen/kconfig.h>
+ #include "defs.h"
+ #include "video.h"
+
+@@ -336,5 +337,7 @@ void __stdcall cmdline_parse_early(const char *cmdline,
early_boot_opts_t *ebo)
+ ebo->skip_realmode = skip_realmode(cmdline);
+ ebo->opt_edd = edd_parse(cmdline);
+ ebo->opt_edid = edid_parse(cmdline);
+- vga_parse(cmdline, ebo);
++
++ if ( IS_ENABLED(CONFIG_VIDEO) )
++ vga_parse(cmdline, ebo);
+ }
+diff --git a/xen/arch/x86/boot/trampoline.S b/xen/arch/x86/boot/trampoline.S
+index 4d640f3fcd..a17a90df5e 100644
+--- a/xen/arch/x86/boot/trampoline.S
++++ b/xen/arch/x86/boot/trampoline.S
+@@ -219,7 +219,9 @@ trampoline_boot_cpu_entry:
+ */
+ call get_memory_map
+ call get_edd
++#ifdef CONFIG_VIDEO
+ call video
++#endif
+
+ mov $0x0200,%ax
+ int $0x16
+@@ -267,10 +269,13 @@ opt_edid:
+ .byte 0 /* EDID parsing option
(force/no/default). */
+ /* Padding. */
+ .byte 0
++
++#ifdef CONFIG_VIDEO
+ GLOBAL(boot_vid_mode)
+ .word VIDEO_80x25 /* If we don't run at all, assume
basic video mode 3 at 80x25. */
+ vesa_size:
+ .word 0,0,0 /* width x depth x height */
++#endif
+
+ GLOBAL(kbd_shift_flags)
+ .byte 0
+@@ -279,4 +284,6 @@ rm_idt: .word 256*4-1, 0, 0
+
+ #include "mem.S"
+ #include "edd.S"
++#ifdef CONFIG_VIDEO
+ #include "video.S"
++#endif
+diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
+index d30f688a5a..5789d2cb70 100644
+--- a/xen/arch/x86/efi/efi-boot.h
++++ b/xen/arch/x86/efi/efi-boot.h
+@@ -479,16 +479,19 @@ static void __init efi_arch_edd(void)
+
+ static void __init efi_arch_console_init(UINTN cols, UINTN rows)
+ {
++#ifdef CONFIG_VIDEO
+ vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3;
+ vga_console_info.u.text_mode_3.columns = cols;
+ vga_console_info.u.text_mode_3.rows = rows;
+ vga_console_info.u.text_mode_3.font_height = 16;
++#endif
+ }
+
+ static void __init efi_arch_video_init(EFI_GRAPHICS_OUTPUT_PROTOCOL *gop,
+ UINTN info_size,
+ EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info)
+ {
++#ifdef CONFIG_VIDEO
+ int bpp = 0;
+
+ switch ( mode_info->PixelFormat )
+@@ -550,6 +553,7 @@ static void __init efi_arch_video_init(EFI_GRAPHICS_OUTPUT_PROTOCOL
*gop,
+ vga_console_info.u.vesa_lfb.lfb_size =
+ (gop->Mode->FrameBufferSize + 0xffff) >> 16;
+ }
++#endif
+ }
+
+ static void __init efi_arch_memory_setup(void)
+diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
+index ebc2f394ee..ea18c3215a 100644
+--- a/xen/arch/x86/platform_hypercall.c
++++ b/xen/arch/x86/platform_hypercall.c
+@@ -388,6 +388,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t)
u_xenpf_op)
+ }
+ case XEN_FW_VBEDDC_INFO:
+ ret = -ESRCH;
++#ifdef CONFIG_VIDEO
+ if ( op->u.firmware_info.index != 0 )
+ break;
+ if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
+@@ -406,6 +407,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t)
u_xenpf_op)
+ copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid,
+ bootsym(boot_edid_info), 128) )
+ ret = -EFAULT;
++#endif
+ break;
+ case XEN_FW_EFI_INFO:
+ ret = efi_get_info(op->u.firmware_info.index,
+diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
+index a13412efb9..a3be335b0b 100644
+--- a/xen/arch/x86/pv/dom0_build.c
++++ b/xen/arch/x86/pv/dom0_build.c
+@@ -832,11 +832,13 @@ int __init dom0_construct_pv(struct domain *d,
+ if ( cmdline != NULL )
+ strlcpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line));
+
++#ifdef CONFIG_VIDEO
+ if ( fill_console_start_info((void *)(si + 1)) )
+ {
+ si->console.dom0.info_off = sizeof(struct start_info);
+ si->console.dom0.info_size = sizeof(struct dom0_vga_console_info);
+ }
++#endif
+
+ if ( is_pv_32bit_domain(d) )
+ xlat_start_info(si, XLAT_start_info_console_dom0);
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index c1f4184e06..2279014f74 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -456,6 +456,7 @@ static void __init setup_max_pdx(unsigned long top_page)
+ /* A temporary copy of the e820 map that we can mess with during bootstrap. */
+ static struct e820map __initdata boot_e820;
+
++#ifdef CONFIG_VIDEO
+ struct boot_video_info {
+ u8 orig_x; /* 0x00 */
+ u8 orig_y; /* 0x01 */
+@@ -486,9 +487,11 @@ struct boot_video_info {
+ u16 vesa_attrib; /* 0x28 */
+ };
+ extern struct boot_video_info boot_vid_info;
++#endif
+
+ static void __init parse_video_info(void)
+ {
++#ifdef CONFIG_VIDEO
+ struct boot_video_info *bvi = &bootsym(boot_vid_info);
+
+ /* vga_console_info is filled directly on EFI platform. */
+@@ -524,6 +527,7 @@ static void __init parse_video_info(void)
+ vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities;
+ vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib;
+ }
++#endif
+ }
+
+ static void __init kexec_reserve_area(struct e820map *e820)
+@@ -741,6 +745,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+
+ printk("Xen image load base address: %#lx\n", xen_phys_start);
+
++#ifdef CONFIG_VIDEO
+ printk("Video information:\n");
+
+ /* Print VGA display mode information. */
+@@ -784,6 +789,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ printk("of reasons unknown\n");
+ }
+ }
++#endif
+
+ printk("Disc information:\n");
+ printk(" Found %d MBR signatures\n",
+diff --git a/xen/drivers/video/Kconfig b/xen/drivers/video/Kconfig
+index 0ffbbd9a88..52e8ce6c15 100644
+--- a/xen/drivers/video/Kconfig
++++ b/xen/drivers/video/Kconfig
+@@ -3,8 +3,14 @@ config VIDEO
+ bool
+
+ config VGA
+- bool
++ bool "VGA support" if !PV_SHIM_EXCLUSIVE
+ select VIDEO
++ depends on X86
++ default y if !PV_SHIM_EXCLUSIVE
++ ---help---
++ Enable VGA output for the Xen hypervisor.
++
++ If unsure, say Y.
+
+ config HAS_ARM_HDLCD
+ bool
+diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
+index c5b3d4ef18..b68ec9de4d 100644
+--- a/xen/include/asm-x86/setup.h
++++ b/xen/include/asm-x86/setup.h
+@@ -31,8 +31,14 @@ void arch_init_memory(void);
+ void subarch_init_memory(void);
+
+ void init_IRQ(void);
++
++#ifdef CONFIG_VIDEO
+ void vesa_init(void);
+ void vesa_mtrr_init(void);
++#else
++static inline void vesa_init(void) {};
++static inline void vesa_mtrr_init(void) {};
++#endif
+
+ int construct_dom0(
+ struct domain *d,
+--
+2.14.3
+
+
+From 4ba6447e7ddbee91c3781c2630ca1d28e080857c Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:18 +0000
+Subject: [PATCH 53/77] xen/pvh: do not mark the low 1MB as IO mem
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+On PVH there's nothing special on the low 1MB.
+
+This is an optional patch that doesn't affect the functionality of the
+shim.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+ xen/arch/x86/mm.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 0569342200..371c764027 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -122,6 +122,7 @@
+ #include <asm/fixmap.h>
+ #include <asm/io_apic.h>
+ #include <asm/pci.h>
++#include <asm/guest.h>
+
+ #include <asm/hvm/grant_table.h>
+ #include <asm/pv/grant_table.h>
+@@ -288,8 +289,12 @@ void __init arch_init_memory(void)
+ dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0, NULL);
+ BUG_ON(IS_ERR(dom_cow));
+
+- /* First 1MB of RAM is historically marked as I/O. */
+- for ( i = 0; i < 0x100; i++ )
++ /*
++ * First 1MB of RAM is historically marked as I/O. If we booted PVH,
++ * reclaim the space. Irrespective, leave MFN 0 as special for the sake
++ * of 0 being a very common default value.
++ */
++ for ( i = 0; i < (pvh_boot ? 1 : 0x100); i++ )
+ share_xen_page_with_guest(mfn_to_page(_mfn(i)),
+ dom_io, XENSHARE_writable);
+
+--
+2.14.3
+
+
+From 0ba5d8c27509ba2011591cfab2715e8ca6b7b402 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:18 +0000
+Subject: [PATCH 54/77] xen/pvshim: skip Dom0-only domain builder parts
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Do not allow access to any iomem or ioport by the shim, and also
+remove the check for Dom0 kernel support.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Acked-by: Jan Beulich <jbeulich(a)suse.com>
+---
+ xen/arch/x86/dom0_build.c | 4 ++++
+ xen/arch/x86/pv/dom0_build.c | 3 ++-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
+index bf992fef6d..e2bf81b4e7 100644
+--- a/xen/arch/x86/dom0_build.c
++++ b/xen/arch/x86/dom0_build.c
+@@ -13,6 +13,7 @@
+ #include <xen/softirq.h>
+
+ #include <asm/dom0_build.h>
++#include <asm/guest.h>
+ #include <asm/hpet.h>
+ #include <asm/io_apic.h>
+ #include <asm/p2m.h>
+@@ -385,6 +386,9 @@ int __init dom0_setup_permissions(struct domain *d)
+ unsigned int i;
+ int rc;
+
++ if ( pv_shim )
++ return 0;
++
+ /* The hardware domain is initially permitted full I/O capabilities. */
+ rc = ioports_permit_access(d, 0, 0xFFFF);
+ rc |= iomem_permit_access(d, 0UL, (1UL << (paddr_bits - PAGE_SHIFT)) - 1);
+diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
+index a3be335b0b..852d00a8be 100644
+--- a/xen/arch/x86/pv/dom0_build.c
++++ b/xen/arch/x86/pv/dom0_build.c
+@@ -17,6 +17,7 @@
+
+ #include <asm/bzimage.h>
+ #include <asm/dom0_build.h>
++#include <asm/guest.h>
+ #include <asm/page.h>
+ #include <asm/pv/mm.h>
+ #include <asm/setup.h>
+@@ -373,7 +374,7 @@ int __init dom0_construct_pv(struct domain *d,
+
+ if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE )
+ {
+- if ( !test_bit(XENFEAT_dom0, parms.f_supported) )
++ if ( !pv_shim && !test_bit(XENFEAT_dom0, parms.f_supported) )
+ {
+ printk("Kernel does not support Dom0 operation\n");
+ rc = -EINVAL;
+--
+2.14.3
+
+
+From 60dd95357cca09c5ed3c4f3d57c11b732ea8befd Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:18 +0000
+Subject: [PATCH 55/77] xen: mark xenstore/console pages as RAM
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This si required so that later they can be shared with the guest if
+Xen is running in shim mode.
+
+Also prevent them from being used by Xen by marking them as bad pages
+in init_boot_pages.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+Changes since v1:
+ - Remove adding the pages to dom_io, there's no need since they are
+ already marked as bad pages.
+ - Use a static global array to store the memory address of this
+ special pages, so Xen avoids having to call
+ xen_hypercall_hvm_get_param twice.
+---
+ xen/arch/x86/e820.c | 4 ++++
+ xen/arch/x86/guest/xen.c | 43 +++++++++++++++++++++++++++++++++++++++
+ xen/common/page_alloc.c | 15 ++++++++++++++
+ xen/drivers/char/xen_pv_console.c | 4 ++++
+ xen/include/asm-x86/guest/xen.h | 14 +++++++++++++
+ 5 files changed, 80 insertions(+)
+
+diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c
+index b422a684ee..590ea985ef 100644
+--- a/xen/arch/x86/e820.c
++++ b/xen/arch/x86/e820.c
+@@ -9,6 +9,7 @@
+ #include <asm/processor.h>
+ #include <asm/mtrr.h>
+ #include <asm/msr.h>
++#include <asm/guest.h>
+
+ /*
+ * opt_mem: Limit maximum address of physical RAM.
+@@ -699,6 +700,9 @@ unsigned long __init init_e820(const char *str, struct e820map *raw)
+
+ machine_specific_memory_setup(raw);
+
++ if ( xen_guest )
++ hypervisor_fixup_e820(&e820);
++
+ printk("%s RAM map:\n", str);
+ print_e820_memory_map(e820.map, e820.nr_map);
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index d4968b47aa..27a6c47753 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -32,12 +32,14 @@
+ #include <asm/processor.h>
+
+ #include <public/arch-x86/cpuid.h>
++#include <public/hvm/params.h>
+
+ bool __read_mostly xen_guest;
+
+ static __read_mostly uint32_t xen_cpuid_base;
+ extern char hypercall_page[];
+ static struct rangeset *mem;
++static unsigned long __initdata reserved_pages[2];
+
+ DEFINE_PER_CPU(unsigned int, vcpu_id);
+
+@@ -279,6 +281,47 @@ int hypervisor_free_unused_page(mfn_t mfn)
+ return rangeset_remove_range(mem, mfn_x(mfn), mfn_x(mfn));
+ }
+
++static void __init mark_pfn_as_ram(struct e820map *e820, uint64_t pfn)
++{
++ if ( !e820_add_range(e820, pfn << PAGE_SHIFT,
++ (pfn << PAGE_SHIFT) + PAGE_SIZE, E820_RAM) )
++ if ( !e820_change_range_type(e820, pfn << PAGE_SHIFT,
++ (pfn << PAGE_SHIFT) + PAGE_SIZE,
++ E820_RESERVED, E820_RAM) )
++ panic("Unable to add/change memory type of pfn %#lx to RAM",
pfn);
++}
++
++void __init hypervisor_fixup_e820(struct e820map *e820)
++{
++ uint64_t pfn = 0;
++ unsigned int i = 0;
++ long rc;
++
++ ASSERT(xen_guest);
++
++#define MARK_PARAM_RAM(p) ({ \
++ rc = xen_hypercall_hvm_get_param(p, &pfn); \
++ if ( rc ) \
++ panic("Unable to get " #p); \
++ mark_pfn_as_ram(e820, pfn); \
++ ASSERT(i < ARRAY_SIZE(reserved_pages)); \
++ reserved_pages[i++] = pfn << PAGE_SHIFT; \
++})
++ MARK_PARAM_RAM(HVM_PARAM_STORE_PFN);
++ if ( !pv_console )
++ MARK_PARAM_RAM(HVM_PARAM_CONSOLE_PFN);
++#undef MARK_PARAM_RAM
++}
++
++const unsigned long *__init hypervisor_reserved_pages(unsigned int *size)
++{
++ ASSERT(xen_guest);
++
++ *size = ARRAY_SIZE(reserved_pages);
++
++ return reserved_pages;
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
+index 5616a82263..49b2a91751 100644
+--- a/xen/common/page_alloc.c
++++ b/xen/common/page_alloc.c
+@@ -143,6 +143,7 @@
+ #include <asm/numa.h>
+ #include <asm/flushtlb.h>
+ #ifdef CONFIG_X86
++#include <asm/guest.h>
+ #include <asm/p2m.h>
+ #include <asm/setup.h> /* for highmem_start only */
+ #else
+@@ -303,6 +304,20 @@ void __init init_boot_pages(paddr_t ps, paddr_t pe)
+ badpage++;
+ }
+ }
++
++ if ( xen_guest )
++ {
++ badpage = hypervisor_reserved_pages(&array_size);
++ if ( badpage )
++ {
++ for ( i = 0; i < array_size; i++ )
++ {
++ bootmem_region_zap(*badpage >> PAGE_SHIFT,
++ (*badpage >> PAGE_SHIFT) + 1);
++ badpage++;
++ }
++ }
++ }
+ #endif
+
+ /* Check new pages against the bad-page list. */
+diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c
+index f5aca4c69e..d4f0532101 100644
+--- a/xen/drivers/char/xen_pv_console.c
++++ b/xen/drivers/char/xen_pv_console.c
+@@ -35,6 +35,8 @@ static evtchn_port_t cons_evtchn;
+ static serial_rx_fn cons_rx_handler;
+ static DEFINE_SPINLOCK(tx_lock);
+
++bool pv_console;
++
+ void __init pv_console_init(void)
+ {
+ long r;
+@@ -60,6 +62,8 @@ void __init pv_console_init(void)
+
+ printk("Initialised PV console at 0x%p with pfn %#lx and evtchn %#x\n",
+ cons_ring, raw_pfn, cons_evtchn);
++ pv_console = true;
++
+ return;
+
+ error:
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+index b3e684f756..62255fda8b 100644
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -29,12 +29,15 @@
+ #ifdef CONFIG_XEN_GUEST
+
+ extern bool xen_guest;
++extern bool pv_console;
+
+ void probe_hypervisor(void);
+ void hypervisor_setup(void);
+ void hypervisor_ap_setup(void);
+ int hypervisor_alloc_unused_page(mfn_t *mfn);
+ int hypervisor_free_unused_page(mfn_t mfn);
++void hypervisor_fixup_e820(struct e820map *e820);
++const unsigned long *hypervisor_reserved_pages(unsigned int *size);
+
+ DECLARE_PER_CPU(unsigned int, vcpu_id);
+ DECLARE_PER_CPU(struct vcpu_info *, vcpu_info);
+@@ -42,6 +45,7 @@ DECLARE_PER_CPU(struct vcpu_info *, vcpu_info);
+ #else
+
+ #define xen_guest 0
++#define pv_console 0
+
+ static inline void probe_hypervisor(void) {};
+ static inline void hypervisor_setup(void)
+@@ -53,6 +57,16 @@ static inline void hypervisor_ap_setup(void)
+ ASSERT_UNREACHABLE();
+ }
+
++static inline void hypervisor_fixup_e820(struct e820map *e820)
++{
++ ASSERT_UNREACHABLE();
++}
++static inline const unsigned long *hypervisor_reserved_pages(unsigned int *size)
++{
++ ASSERT_UNREACHABLE();
++ return NULL;
++};
++
+ #endif /* CONFIG_XEN_GUEST */
+ #endif /* __X86_GUEST_XEN_H__ */
+
+--
+2.14.3
+
+
+From 1cd703979f73778403d0b0cf5c77c87534c544db Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:18 +0000
+Subject: [PATCH 56/77] xen/pvshim: modify Dom0 builder in order to build a
+ DomU
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+According to the PV ABI the initial virtual memory regions should
+contain the xenstore and console pages after the start_info. Also set
+the correct values in the start_info for DomU operation.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+Changes since v1:
+ - Modify the position of the __init attribute in dom0_update_physmap.
+ - Move the addition of sizeof(struct dom0_vga_console_info) to
+ vstartinfo_end with an existing if branch.
+ - Add a TODO item for fill_console_start_info in the !CONFIG_VIDEO
+ case.
+ - s/replace_va/replace_va_mapping/.
+ - Remove call to free_domheap_pages in replace_va_mapping.
+ put_page_and_type should already take care of freeing the page.
+ - Use PFN_DOWN in SET_AND_MAP_PARAM macro.
+ - Parenthesize va in SET_AND_MAP_PARAM macro when required.
+---
+ xen/arch/x86/pv/dom0_build.c | 48 +++++++++++++++++++++++-------
+ xen/arch/x86/pv/shim.c | 63 ++++++++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/dom0_build.h | 4 +++
+ xen/include/asm-x86/pv/shim.h | 21 ++++++++++++++
+ 4 files changed, 126 insertions(+), 10 deletions(-)
+
+diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
+index 852d00a8be..72752b8656 100644
+--- a/xen/arch/x86/pv/dom0_build.c
++++ b/xen/arch/x86/pv/dom0_build.c
+@@ -31,9 +31,8 @@
+ #define L3_PROT (BASE_PROT|_PAGE_DIRTY)
+ #define L4_PROT (BASE_PROT|_PAGE_DIRTY)
+
+-static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
+- unsigned long mfn,
+- unsigned long vphysmap_s)
++void __init dom0_update_physmap(struct domain *d, unsigned long pfn,
++ unsigned long mfn, unsigned long vphysmap_s)
+ {
+ if ( !is_pv_32bit_domain(d) )
+ ((unsigned long *)vphysmap_s)[pfn] = mfn;
+@@ -316,6 +315,10 @@ int __init dom0_construct_pv(struct domain *d,
+ unsigned long vphysmap_end;
+ unsigned long vstartinfo_start;
+ unsigned long vstartinfo_end;
++ unsigned long vxenstore_start = 0;
++ unsigned long vxenstore_end = 0;
++ unsigned long vconsole_start = 0;
++ unsigned long vconsole_end = 0;
+ unsigned long vstack_start;
+ unsigned long vstack_end;
+ unsigned long vpt_start;
+@@ -441,11 +444,22 @@ int __init dom0_construct_pv(struct domain *d,
+ if ( parms.p2m_base != UNSET_ADDR )
+ vphysmap_end = vphysmap_start;
+ vstartinfo_start = round_pgup(vphysmap_end);
+- vstartinfo_end = (vstartinfo_start +
+- sizeof(struct start_info) +
+- sizeof(struct dom0_vga_console_info));
++ vstartinfo_end = vstartinfo_start + sizeof(struct start_info);
++
++ if ( pv_shim )
++ {
++ vxenstore_start = round_pgup(vstartinfo_end);
++ vxenstore_end = vxenstore_start + PAGE_SIZE;
++ vconsole_start = vxenstore_end;
++ vconsole_end = vconsole_start + PAGE_SIZE;
++ vpt_start = vconsole_end;
++ }
++ else
++ {
++ vpt_start = round_pgup(vstartinfo_end);
++ vstartinfo_end += sizeof(struct dom0_vga_console_info);
++ }
+
+- vpt_start = round_pgup(vstartinfo_end);
+ for ( nr_pt_pages = 2; ; nr_pt_pages++ )
+ {
+ vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
+@@ -538,6 +552,8 @@ int __init dom0_construct_pv(struct domain *d,
+ " Init. ramdisk: %p->%p\n"
+ " Phys-Mach map: %p->%p\n"
+ " Start info: %p->%p\n"
++ " Xenstore ring: %p->%p\n"
++ " Console ring: %p->%p\n"
+ " Page tables: %p->%p\n"
+ " Boot stack: %p->%p\n"
+ " TOTAL: %p->%p\n",
+@@ -545,6 +561,8 @@ int __init dom0_construct_pv(struct domain *d,
+ _p(vinitrd_start), _p(vinitrd_end),
+ _p(vphysmap_start), _p(vphysmap_end),
+ _p(vstartinfo_start), _p(vstartinfo_end),
++ _p(vxenstore_start), _p(vxenstore_end),
++ _p(vconsole_start), _p(vconsole_end),
+ _p(vpt_start), _p(vpt_end),
+ _p(vstack_start), _p(vstack_end),
+ _p(v_start), _p(v_end));
+@@ -742,7 +760,8 @@ int __init dom0_construct_pv(struct domain *d,
+
+ si->shared_info = virt_to_maddr(d->shared_info);
+
+- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
++ if ( !pv_shim )
++ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ if ( !vinitrd_start && initrd_len )
+ si->flags |= SIF_MOD_START_PFN;
+ si->flags |= (xen_processor_pmbits << 8) & SIF_PM_MASK;
+@@ -834,15 +853,24 @@ int __init dom0_construct_pv(struct domain *d,
+ strlcpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line));
+
+ #ifdef CONFIG_VIDEO
+- if ( fill_console_start_info((void *)(si + 1)) )
++ if ( !pv_shim && fill_console_start_info((void *)(si + 1)) )
+ {
+ si->console.dom0.info_off = sizeof(struct start_info);
+ si->console.dom0.info_size = sizeof(struct dom0_vga_console_info);
+ }
+ #endif
+
++ /*
++ * TODO: provide an empty stub for fill_console_start_info in the
++ * !CONFIG_VIDEO case so the logic here can be simplified.
++ */
++ if ( pv_shim )
++ pv_shim_setup_dom(d, l4start, v_start, vxenstore_start, vconsole_start,
++ vphysmap_start, si);
++
+ if ( is_pv_32bit_domain(d) )
+- xlat_start_info(si, XLAT_start_info_console_dom0);
++ xlat_start_info(si, pv_shim ? XLAT_start_info_console_domU
++ : XLAT_start_info_console_dom0);
+
+ /* Return to idle domain's page tables. */
+ mapcache_override_current(NULL);
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 4d037355db..75365b0697 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -18,16 +18,79 @@
+ *
+ * Copyright (c) 2017 Citrix Systems Ltd.
+ */
++#include <xen/hypercall.h>
+ #include <xen/init.h>
+ #include <xen/types.h>
+
+ #include <asm/apic.h>
++#include <asm/dom0_build.h>
++#include <asm/guest.h>
++#include <asm/pv/mm.h>
+
+ #ifndef CONFIG_PV_SHIM_EXCLUSIVE
+ bool pv_shim;
+ boolean_param("pv-shim", pv_shim);
+ #endif
+
++#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \
++ _PAGE_GUEST_KERNEL)
++#define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
++
++static void __init replace_va_mapping(struct domain *d, l4_pgentry_t *l4start,
++ unsigned long va, unsigned long mfn)
++{
++ struct page_info *page;
++ l4_pgentry_t *pl4e;
++ l3_pgentry_t *pl3e;
++ l2_pgentry_t *pl2e;
++ l1_pgentry_t *pl1e;
++
++ pl4e = l4start + l4_table_offset(va);
++ pl3e = l4e_to_l3e(*pl4e);
++ pl3e += l3_table_offset(va);
++ pl2e = l3e_to_l2e(*pl3e);
++ pl2e += l2_table_offset(va);
++ pl1e = l2e_to_l1e(*pl2e);
++ pl1e += l1_table_offset(va);
++
++ page = mfn_to_page(l1e_get_pfn(*pl1e));
++ put_page_and_type(page);
++
++ *pl1e = l1e_from_pfn(mfn, (!is_pv_32bit_domain(d) ? L1_PROT
++ : COMPAT_L1_PROT));
++}
++
++void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
++ unsigned long va_start, unsigned long store_va,
++ unsigned long console_va, unsigned long vphysmap,
++ start_info_t *si)
++{
++ uint64_t param = 0;
++ long rc;
++
++#define SET_AND_MAP_PARAM(p, si, va) ({ \
++ rc = xen_hypercall_hvm_get_param(p, ¶m); \
++ if ( rc ) \
++ panic("Unable to get " #p "\n");
\
++ (si) = param; \
++ if ( va ) \
++ { \
++ share_xen_page_with_guest(mfn_to_page(param), d, XENSHARE_writable); \
++ replace_va_mapping(d, l4start, va, param); \
++ dom0_update_physmap(d, PFN_DOWN((va) - va_start), param, vphysmap); \
++ } \
++})
++ SET_AND_MAP_PARAM(HVM_PARAM_STORE_PFN, si->store_mfn, store_va);
++ SET_AND_MAP_PARAM(HVM_PARAM_STORE_EVTCHN, si->store_evtchn, 0);
++ if ( !pv_console )
++ {
++ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_PFN, si->console.domU.mfn,
++ console_va);
++ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0);
++ }
++#undef SET_AND_MAP_PARAM
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/include/asm-x86/dom0_build.h b/xen/include/asm-x86/dom0_build.h
+index d83d2b4387..d985406503 100644
+--- a/xen/include/asm-x86/dom0_build.h
++++ b/xen/include/asm-x86/dom0_build.h
+@@ -1,6 +1,7 @@
+ #ifndef _DOM0_BUILD_H_
+ #define _DOM0_BUILD_H_
+
++#include <xen/libelf.h>
+ #include <xen/sched.h>
+
+ #include <asm/setup.h>
+@@ -29,6 +30,9 @@ int dom0_construct_pvh(struct domain *d, const module_t *image,
+ unsigned long dom0_paging_pages(const struct domain *d,
+ unsigned long nr_pages);
+
++void dom0_update_physmap(struct domain *d, unsigned long pfn,
++ unsigned long mfn, unsigned long vphysmap_s);
++
+ #endif /* _DOM0_BUILD_H_ */
+
+ /*
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+index 1468cfd498..b0c361cba1 100644
+--- a/xen/include/asm-x86/pv/shim.h
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -29,6 +29,27 @@ extern bool pv_shim;
+ # define pv_shim 0
+ #endif /* CONFIG_PV_SHIM{,_EXCLUSIVE} */
+
++#ifdef CONFIG_PV_SHIM
++
++void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
++ unsigned long va_start, unsigned long store_va,
++ unsigned long console_va, unsigned long vphysmap,
++ start_info_t *si);
++
++#else
++
++static inline void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
++ unsigned long va_start,
++ unsigned long store_va,
++ unsigned long console_va,
++ unsigned long vphysmap,
++ start_info_t *si)
++{
++ ASSERT_UNREACHABLE();
++}
++
++#endif
++
+ #endif /* __X86_PV_SHIM_H__ */
+
+ /*
+--
+2.14.3
+
+
+From da4518c5595c048a5c030225533e44e021fffaab Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:19 +0000
+Subject: [PATCH 57/77] xen/pvshim: set correct domid value
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+If domid is not provided by L0 set domid to 1 by default. Note that L0
+not provinding the domid can cause trouble if the guest tries to use
+it's domid instead of DOMID_SELF when performing hypercalls that are
+forwarded to the L0 hypervisor.
+
+Since the domain created is no longer the hardware domain add a hook
+to the domain shutdown path in order to forward shutdown operations to
+the L0 hypervisor.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+---
+Changes since v1:
+ - s/get_dom0_domid/get_initial_domain_id/.
+ - Add a comment regarding why dom0 needs to be global.
+ - Fix compilation of xen/common/domain.c on ARM.
+---
+ xen/arch/x86/dom0_build.c | 2 +-
+ xen/arch/x86/guest/xen.c | 5 +++++
+ xen/arch/x86/pv/shim.c | 21 +++++++++++++++++++++
+ xen/arch/x86/setup.c | 16 +++++++++++-----
+ xen/common/domain.c | 12 ++++++++++++
+ xen/include/asm-x86/guest/xen.h | 6 ++++++
+ xen/include/asm-x86/pv/shim.h | 10 ++++++++++
+ 7 files changed, 66 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
+index e2bf81b4e7..452298c624 100644
+--- a/xen/arch/x86/dom0_build.c
++++ b/xen/arch/x86/dom0_build.c
+@@ -473,7 +473,7 @@ int __init construct_dom0(struct domain *d, const module_t *image,
+ int rc;
+
+ /* Sanity! */
+- BUG_ON(d->domain_id != 0);
++ BUG_ON(!pv_shim && d->domain_id != 0);
+ BUG_ON(d->vcpu[0] == NULL);
+ BUG_ON(d->vcpu[0]->is_initialised);
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index 27a6c47753..aff16a0e35 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -322,6 +322,11 @@ const unsigned long *__init hypervisor_reserved_pages(unsigned int
*size)
+ return reserved_pages;
+ }
+
++uint32_t hypervisor_cpuid_base(void)
++{
++ return xen_cpuid_base;
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 75365b0697..78351c9ee0 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -20,6 +20,7 @@
+ */
+ #include <xen/hypercall.h>
+ #include <xen/init.h>
++#include <xen/shutdown.h>
+ #include <xen/types.h>
+
+ #include <asm/apic.h>
+@@ -27,6 +28,8 @@
+ #include <asm/guest.h>
+ #include <asm/pv/mm.h>
+
++#include <public/arch-x86/cpuid.h>
++
+ #ifndef CONFIG_PV_SHIM_EXCLUSIVE
+ bool pv_shim;
+ boolean_param("pv-shim", pv_shim);
+@@ -91,6 +94,24 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ #undef SET_AND_MAP_PARAM
+ }
+
++void pv_shim_shutdown(uint8_t reason)
++{
++ /* XXX: handle suspend */
++ xen_hypercall_shutdown(reason);
++}
++
++domid_t get_initial_domain_id(void)
++{
++ uint32_t eax, ebx, ecx, edx;
++
++ if ( !pv_shim )
++ return 0;
++
++ cpuid(hypervisor_cpuid_base() + 4, &eax, &ebx, &ecx, &edx);
++
++ return (eax & XEN_HVM_CPUID_DOMID_PRESENT) ? ecx : 1;
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index 2279014f74..7091c38047 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -104,6 +104,12 @@ unsigned long __read_mostly mmu_cr4_features = XEN_MINIMAL_CR4;
+ #define SMEP_HVM_ONLY (-1)
+ static s8 __initdata opt_smep = 1;
+
++/*
++ * Initial domain place holder. Needs to be global so it can be created in
++ * __start_xen and unpaused in init_done.
++ */
++static struct domain *__initdata dom0;
++
+ static int __init parse_smep_param(const char *s)
+ {
+ if ( !*s )
+@@ -576,11 +582,11 @@ static void noinline init_done(void)
+
+ system_state = SYS_STATE_active;
+
++ domain_unpause_by_systemcontroller(dom0);
++
+ /* MUST be done prior to removing .init data. */
+ unregister_init_virtual_region();
+
+- domain_unpause_by_systemcontroller(hardware_domain);
+-
+ /* Zero the .init code and data. */
+ for ( va = __init_begin; va < _p(__init_end); va += PAGE_SIZE )
+ clear_page(va);
+@@ -659,7 +665,6 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
+ int i, j, e820_warn = 0, bytes = 0;
+ bool acpi_boot_table_init_done = false, relocated = false;
+- struct domain *dom0;
+ struct ns16550_defaults ns16550 = {
+ .data_bits = 8,
+ .parity = 'n',
+@@ -1617,11 +1622,12 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ }
+
+ /* Create initial domain 0. */
+- dom0 = domain_create(0, domcr_flags, 0, &config);
++ dom0 = domain_create(get_initial_domain_id(), domcr_flags, 0, &config);
+ if ( IS_ERR(dom0) || (alloc_dom0_vcpu0(dom0) == NULL) )
+ panic("Error creating domain 0");
+
+- dom0->is_privileged = 1;
++ if ( !pv_shim )
++ dom0->is_privileged = 1;
+ dom0->target = NULL;
+
+ /* Grab the DOM0 command line. */
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 7484693a87..1ba05fa3a1 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -43,6 +43,10 @@
+ #include <xen/tmem.h>
+ #include <asm/setup.h>
+
++#ifdef CONFIG_X86
++#include <asm/guest.h>
++#endif
++
+ /* Linux config option: propageted to domain0 */
+ /* xen_processor_pmbits: xen control Cx, Px, ... */
+ unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
+@@ -689,6 +693,14 @@ void domain_shutdown(struct domain *d, u8 reason)
+ {
+ struct vcpu *v;
+
++#ifdef CONFIG_X86
++ if ( pv_shim )
++ {
++ pv_shim_shutdown(reason);
++ return;
++ }
++#endif
++
+ spin_lock(&d->shutdown_lock);
+
+ if ( d->shutdown_code == SHUTDOWN_CODE_INVALID )
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+index 62255fda8b..ac48dcbe44 100644
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -38,6 +38,7 @@ int hypervisor_alloc_unused_page(mfn_t *mfn);
+ int hypervisor_free_unused_page(mfn_t mfn);
+ void hypervisor_fixup_e820(struct e820map *e820);
+ const unsigned long *hypervisor_reserved_pages(unsigned int *size);
++uint32_t hypervisor_cpuid_base(void);
+
+ DECLARE_PER_CPU(unsigned int, vcpu_id);
+ DECLARE_PER_CPU(struct vcpu_info *, vcpu_info);
+@@ -66,6 +67,11 @@ static inline const unsigned long *hypervisor_reserved_pages(unsigned
int *size)
+ ASSERT_UNREACHABLE();
+ return NULL;
+ };
++static inline uint32_t hypervisor_cpuid_base(void)
++{
++ ASSERT_UNREACHABLE();
++ return 0;
++};
+
+ #endif /* CONFIG_XEN_GUEST */
+ #endif /* __X86_GUEST_XEN_H__ */
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+index b0c361cba1..ff7c050dc6 100644
+--- a/xen/include/asm-x86/pv/shim.h
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -35,6 +35,8 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
+ unsigned long va_start, unsigned long store_va,
+ unsigned long console_va, unsigned long vphysmap,
+ start_info_t *si);
++void pv_shim_shutdown(uint8_t reason);
++domid_t get_initial_domain_id(void);
+
+ #else
+
+@@ -47,6 +49,14 @@ static inline void pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ {
+ ASSERT_UNREACHABLE();
+ }
++static inline void pv_shim_shutdown(uint8_t reason)
++{
++ ASSERT_UNREACHABLE();
++}
++static inline domid_t get_initial_domain_id(void)
++{
++ return 0;
++}
+
+ #endif
+
+--
+2.14.3
+
+
+From bbad376ab1c1c57ba31059bd2269aa9f213579d6 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:19 +0000
+Subject: [PATCH 58/77] xen/pvshim: forward evtchn ops between L0 Xen and L2
+ DomU
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Note that the unmask and the virq operations are handled by the shim
+itself, and that FIFO event channels are not exposed to the guest.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Anthony Liguori <aliguori(a)amazon.com>
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+---
+Changes since v1:
+ - Use find_first_set_bit instead of ffsl.
+ - Indent macro one more level.
+ - Have a single evtchn_close struct in pv_shim_event_channel_op.
+ - Add blank lines between switch cases.
+ - Use -EOPNOTSUPP in order to signal lack of FIFO or PIRQ support.
+ - Switch evtchn_bind_virq parameter to evtchn_port_t and use 0 signal
+ allocation needed.
+ - Switch evtchn helpers return type to int instead of long.
+ - Re-write event channel hypercall table handler instead of adding
+ hooks.
+ - Remove the pv_domain variable and instead use a static variable in
+ shim code.
+---
+ xen/arch/x86/compat.c | 4 +-
+ xen/arch/x86/guest/xen.c | 25 +++-
+ xen/arch/x86/pv/hypercall.c | 17 +++
+ xen/arch/x86/pv/shim.c | 263 ++++++++++++++++++++++++++++++++++++++
+ xen/common/event_channel.c | 99 ++++++++------
+ xen/drivers/char/xen_pv_console.c | 11 +-
+ xen/include/asm-x86/hypercall.h | 3 +
+ xen/include/asm-x86/pv/shim.h | 5 +
+ xen/include/xen/event.h | 15 +++
+ xen/include/xen/pv_console.h | 6 +
+ 10 files changed, 402 insertions(+), 46 deletions(-)
+
+diff --git a/xen/arch/x86/compat.c b/xen/arch/x86/compat.c
+index f417cd5034..9d376a4589 100644
+--- a/xen/arch/x86/compat.c
++++ b/xen/arch/x86/compat.c
+@@ -69,8 +69,8 @@ long do_event_channel_op_compat(XEN_GUEST_HANDLE_PARAM(evtchn_op_t)
uop)
+ case EVTCHNOP_bind_ipi:
+ case EVTCHNOP_bind_vcpu:
+ case EVTCHNOP_unmask:
+- return do_event_channel_op(op.cmd,
+- guest_handle_from_ptr(&uop.p->u, void));
++ return pv_get_hypercall_handler(__HYPERVISOR_event_channel_op, false)
++ (op.cmd, (unsigned long)&uop.p->u, 0, 0, 0, 0);
+
+ default:
+ return -ENOSYS;
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index aff16a0e35..57b297ad47 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -18,6 +18,7 @@
+ *
+ * Copyright (c) 2017 Citrix Systems Ltd.
+ */
++#include <xen/event.h>
+ #include <xen/init.h>
+ #include <xen/mm.h>
+ #include <xen/pfn.h>
+@@ -193,11 +194,31 @@ static void __init init_memmap(void)
+ static void xen_evtchn_upcall(struct cpu_user_regs *regs)
+ {
+ struct vcpu_info *vcpu_info = this_cpu(vcpu_info);
++ unsigned long pending;
+
+ vcpu_info->evtchn_upcall_pending = 0;
+- write_atomic(&vcpu_info->evtchn_pending_sel, 0);
++ pending = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+- pv_console_rx(regs);
++ while ( pending )
++ {
++ unsigned int l1 = find_first_set_bit(pending);
++ unsigned long evtchn = xchg(&XEN_shared_info->evtchn_pending[l1], 0);
++
++ __clear_bit(l1, &pending);
++ evtchn &= ~XEN_shared_info->evtchn_mask[l1];
++ while ( evtchn )
++ {
++ unsigned int port = find_first_set_bit(evtchn);
++
++ __clear_bit(port, &evtchn);
++ port += l1 * BITS_PER_LONG;
++
++ if ( pv_console && port == pv_console_evtchn() )
++ pv_console_rx(regs);
++ else if ( pv_shim )
++ pv_shim_inject_evtchn(port);
++ }
++ }
+
+ ack_APIC_irq();
+ }
+diff --git a/xen/arch/x86/pv/hypercall.c b/xen/arch/x86/pv/hypercall.c
+index f79f7eef62..3b72d6a44d 100644
+--- a/xen/arch/x86/pv/hypercall.c
++++ b/xen/arch/x86/pv/hypercall.c
+@@ -320,6 +320,23 @@ void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
+ *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */
+ }
+
++void __init pv_hypercall_table_replace(unsigned int hypercall,
++ hypercall_fn_t * native,
++ hypercall_fn_t *compat)
++{
++#define HANDLER_POINTER(f) \
++ ((unsigned long *)__va(__pa(&pv_hypercall_table[hypercall].f)))
++ write_atomic(HANDLER_POINTER(native), (unsigned long)native);
++ write_atomic(HANDLER_POINTER(compat), (unsigned long)compat);
++#undef HANDLER_POINTER
++}
++
++hypercall_fn_t *pv_get_hypercall_handler(unsigned int hypercall, bool compat)
++{
++ return compat ? pv_hypercall_table[hypercall].compat
++ : pv_hypercall_table[hypercall].native;
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 78351c9ee0..36f3a366d3 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -18,6 +18,8 @@
+ *
+ * Copyright (c) 2017 Citrix Systems Ltd.
+ */
++#include <xen/event.h>
++#include <xen/guest_access.h>
+ #include <xen/hypercall.h>
+ #include <xen/init.h>
+ #include <xen/shutdown.h>
+@@ -35,6 +37,10 @@ bool pv_shim;
+ boolean_param("pv-shim", pv_shim);
+ #endif
+
++static struct domain *guest;
++
++static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg);
++
+ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \
+ _PAGE_GUEST_KERNEL)
+ #define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+@@ -63,6 +69,27 @@ static void __init replace_va_mapping(struct domain *d, l4_pgentry_t
*l4start,
+ : COMPAT_L1_PROT));
+ }
+
++static void evtchn_reserve(struct domain *d, unsigned int port)
++{
++ ASSERT(port_is_valid(d, port));
++ evtchn_from_port(d, port)->state = ECS_RESERVED;
++ BUG_ON(xen_hypercall_evtchn_unmask(port));
++}
++
++static bool evtchn_handled(struct domain *d, unsigned int port)
++{
++ ASSERT(port_is_valid(d, port));
++ /* The shim manages VIRQs, the rest is forwarded to L0. */
++ return evtchn_from_port(d, port)->state == ECS_VIRQ;
++}
++
++static void evtchn_assign_vcpu(struct domain *d, unsigned int port,
++ unsigned int vcpu)
++{
++ ASSERT(port_is_valid(d, port));
++ evtchn_from_port(d, port)->notify_vcpu_id = vcpu;
++}
++
+ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
+ unsigned long va_start, unsigned long store_va,
+ unsigned long console_va, unsigned long vphysmap,
+@@ -82,6 +109,11 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ replace_va_mapping(d, l4start, va, param); \
+ dom0_update_physmap(d, PFN_DOWN((va) - va_start), param, vphysmap); \
+ } \
++ else \
++ { \
++ BUG_ON(evtchn_allocate_port(d, param)); \
++ evtchn_reserve(d, param); \
++ } \
+ })
+ SET_AND_MAP_PARAM(HVM_PARAM_STORE_PFN, si->store_mfn, store_va);
+ SET_AND_MAP_PARAM(HVM_PARAM_STORE_EVTCHN, si->store_evtchn, 0);
+@@ -92,6 +124,10 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0);
+ }
+ #undef SET_AND_MAP_PARAM
++ pv_hypercall_table_replace(__HYPERVISOR_event_channel_op,
++ (hypercall_fn_t *)pv_shim_event_channel_op,
++ (hypercall_fn_t *)pv_shim_event_channel_op);
++ guest = d;
+ }
+
+ void pv_shim_shutdown(uint8_t reason)
+@@ -100,6 +136,233 @@ void pv_shim_shutdown(uint8_t reason)
+ xen_hypercall_shutdown(reason);
+ }
+
++static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
++{
++ struct domain *d = current->domain;
++ struct evtchn_close close;
++ long rc;
++
++ switch ( cmd )
++ {
++#define EVTCHN_FORWARD(cmd, port_field) \
++ case EVTCHNOP_##cmd: { \
++ struct evtchn_##cmd op; \
++ \
++ if ( copy_from_guest(&op, arg, 1) != 0 ) \
++ return -EFAULT; \
++ \
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_##cmd, &op); \
++ if ( rc ) \
++ break; \
++ \
++ spin_lock(&d->event_lock); \
++ rc = evtchn_allocate_port(d, op.port_field); \
++ if ( rc ) \
++ { \
++ close.port = op.port_field; \
++ BUG_ON(xen_hypercall_event_channel_op(EVTCHNOP_close, &close)); \
++ } \
++ else \
++ evtchn_reserve(d, op.port_field); \
++ spin_unlock(&d->event_lock); \
++ \
++ if ( !rc && __copy_to_guest(arg, &op, 1) )
\
++ rc = -EFAULT; \
++ \
++ break; \
++ }
++
++ EVTCHN_FORWARD(alloc_unbound, port)
++ EVTCHN_FORWARD(bind_interdomain, local_port)
++#undef EVTCHN_FORWARD
++
++ case EVTCHNOP_bind_virq: {
++ struct evtchn_bind_virq virq;
++ struct evtchn_alloc_unbound alloc = {
++ .dom = DOMID_SELF,
++ .remote_dom = DOMID_SELF,
++ };
++
++ if ( copy_from_guest(&virq, arg, 1) != 0 )
++ return -EFAULT;
++ /*
++ * The event channel space is actually controlled by L0 Xen, so
++ * allocate a port from L0 and then force the VIRQ to be bound to that
++ * specific port.
++ *
++ * This is only required for VIRQ because the rest of the event channel
++ * operations are handled directly by L0.
++ */
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_alloc_unbound, &alloc);
++ if ( rc )
++ break;
++
++ /* Force L1 to use the event channel port allocated on L0. */
++ rc = evtchn_bind_virq(&virq, alloc.port);
++ if ( rc )
++ {
++ close.port = alloc.port;
++ BUG_ON(xen_hypercall_event_channel_op(EVTCHNOP_close, &close));
++ }
++
++ if ( !rc && __copy_to_guest(arg, &virq, 1) )
++ rc = -EFAULT;
++
++ break;
++ }
++
++ case EVTCHNOP_status: {
++ struct evtchn_status status;
++
++ if ( copy_from_guest(&status, arg, 1) != 0 )
++ return -EFAULT;
++
++ /*
++ * NB: if the event channel is not handled by the shim, just forward
++ * the status request to L0, even if the port is not valid.
++ */
++ if ( port_is_valid(d, status.port) && evtchn_handled(d, status.port) )
++ rc = evtchn_status(&status);
++ else
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_status, &status);
++
++ break;
++ }
++
++ case EVTCHNOP_bind_vcpu: {
++ struct evtchn_bind_vcpu vcpu;
++
++ if ( copy_from_guest(&vcpu, arg, 1) != 0 )
++ return -EFAULT;
++
++ if ( !port_is_valid(d, vcpu.port) )
++ return -EINVAL;
++
++ if ( evtchn_handled(d, vcpu.port) )
++ rc = evtchn_bind_vcpu(vcpu.port, vcpu.vcpu);
++ else
++ {
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_bind_vcpu, &vcpu);
++ if ( !rc )
++ evtchn_assign_vcpu(d, vcpu.port, vcpu.vcpu);
++ }
++
++ break;
++ }
++
++ case EVTCHNOP_close: {
++ if ( copy_from_guest(&close, arg, 1) != 0 )
++ return -EFAULT;
++
++ if ( !port_is_valid(d, close.port) )
++ return -EINVAL;
++
++ set_bit(close.port, XEN_shared_info->evtchn_mask);
++
++ if ( evtchn_handled(d, close.port) )
++ {
++ rc = evtchn_close(d, close.port, true);
++ if ( rc )
++ break;
++ }
++ else
++ evtchn_free(d, evtchn_from_port(d, close.port));
++
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_close, &close);
++ if ( rc )
++ /*
++ * If the port cannot be closed on the L0 mark it as reserved
++ * in the shim to avoid re-using it.
++ */
++ evtchn_reserve(d, close.port);
++
++ break;
++ }
++
++ case EVTCHNOP_bind_ipi: {
++ struct evtchn_bind_ipi ipi;
++
++ if ( copy_from_guest(&ipi, arg, 1) != 0 )
++ return -EFAULT;
++
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_bind_ipi, &ipi);
++ if ( rc )
++ break;
++
++ spin_lock(&d->event_lock);
++ rc = evtchn_allocate_port(d, ipi.port);
++ if ( rc )
++ {
++ spin_unlock(&d->event_lock);
++
++ close.port = ipi.port;
++ BUG_ON(xen_hypercall_event_channel_op(EVTCHNOP_close, &close));
++ break;
++ }
++
++ evtchn_assign_vcpu(d, ipi.port, ipi.vcpu);
++ evtchn_reserve(d, ipi.port);
++ spin_unlock(&d->event_lock);
++
++ if ( __copy_to_guest(arg, &ipi, 1) )
++ rc = -EFAULT;
++
++ break;
++ }
++
++ case EVTCHNOP_unmask: {
++ struct evtchn_unmask unmask;
++
++ if ( copy_from_guest(&unmask, arg, 1) != 0 )
++ return -EFAULT;
++
++ /* Unmask is handled in L1 */
++ rc = evtchn_unmask(unmask.port);
++
++ break;
++ }
++
++ case EVTCHNOP_send: {
++ struct evtchn_send send;
++
++ if ( copy_from_guest(&send, arg, 1) != 0 )
++ return -EFAULT;
++
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_send, &send);
++
++ break;
++ }
++
++ case EVTCHNOP_reset: {
++ struct evtchn_reset reset;
++
++ if ( copy_from_guest(&reset, arg, 1) != 0 )
++ return -EFAULT;
++
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_reset, &reset);
++
++ break;
++ }
++
++ default:
++ /* No FIFO or PIRQ support for now */
++ rc = -EOPNOTSUPP;
++ break;
++ }
++
++ return rc;
++}
++
++void pv_shim_inject_evtchn(unsigned int port)
++{
++ if ( port_is_valid(guest, port) )
++ {
++ struct evtchn *chn = evtchn_from_port(guest, port);
++
++ evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
++ }
++}
++
+ domid_t get_initial_domain_id(void)
+ {
+ uint32_t eax, ebx, ecx, edx;
+diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
+index c69f9db6db..be834c5c78 100644
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -156,46 +156,62 @@ static void free_evtchn_bucket(struct domain *d, struct evtchn
*bucket)
+ xfree(bucket);
+ }
+
++int evtchn_allocate_port(struct domain *d, evtchn_port_t port)
++{
++ if ( port > d->max_evtchn_port || port >= d->max_evtchns )
++ return -ENOSPC;
++
++ if ( port_is_valid(d, port) )
++ {
++ if ( evtchn_from_port(d, port)->state != ECS_FREE ||
++ evtchn_port_is_busy(d, port) )
++ return -EBUSY;
++ }
++ else
++ {
++ struct evtchn *chn;
++ struct evtchn **grp;
++
++ if ( !group_from_port(d, port) )
++ {
++ grp = xzalloc_array(struct evtchn *, BUCKETS_PER_GROUP);
++ if ( !grp )
++ return -ENOMEM;
++ group_from_port(d, port) = grp;
++ }
++
++ chn = alloc_evtchn_bucket(d, port);
++ if ( !chn )
++ return -ENOMEM;
++ bucket_from_port(d, port) = chn;
++
++ write_atomic(&d->valid_evtchns, d->valid_evtchns +
EVTCHNS_PER_BUCKET);
++ }
++
++ return 0;
++}
++
+ static int get_free_port(struct domain *d)
+ {
+- struct evtchn *chn;
+- struct evtchn **grp;
+ int port;
+
+ if ( d->is_dying )
+ return -EINVAL;
+
+- for ( port = 0; port_is_valid(d, port); port++ )
++ for ( port = 0; port <= d->max_evtchn_port; port++ )
+ {
+- if ( port > d->max_evtchn_port )
+- return -ENOSPC;
+- if ( evtchn_from_port(d, port)->state == ECS_FREE
+- && !evtchn_port_is_busy(d, port) )
+- return port;
+- }
++ int rc = evtchn_allocate_port(d, port);
+
+- if ( port == d->max_evtchns || port > d->max_evtchn_port )
+- return -ENOSPC;
++ if ( rc == -EBUSY )
++ continue;
+
+- if ( !group_from_port(d, port) )
+- {
+- grp = xzalloc_array(struct evtchn *, BUCKETS_PER_GROUP);
+- if ( !grp )
+- return -ENOMEM;
+- group_from_port(d, port) = grp;
++ return port;
+ }
+
+- chn = alloc_evtchn_bucket(d, port);
+- if ( !chn )
+- return -ENOMEM;
+- bucket_from_port(d, port) = chn;
+-
+- write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET);
+-
+- return port;
++ return -ENOSPC;
+ }
+
+-static void free_evtchn(struct domain *d, struct evtchn *chn)
++void evtchn_free(struct domain *d, struct evtchn *chn)
+ {
+ /* Clear pending event to avoid unexpected behavior on re-bind. */
+ evtchn_port_clear_pending(d, chn);
+@@ -345,13 +361,13 @@ static long evtchn_bind_interdomain(evtchn_bind_interdomain_t
*bind)
+ }
+
+
+-static long evtchn_bind_virq(evtchn_bind_virq_t *bind)
++int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port)
+ {
+ struct evtchn *chn;
+ struct vcpu *v;
+ struct domain *d = current->domain;
+- int port, virq = bind->virq, vcpu = bind->vcpu;
+- long rc = 0;
++ int virq = bind->virq, vcpu = bind->vcpu;
++ int rc = 0;
+
+ if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) )
+ return -EINVAL;
+@@ -368,8 +384,19 @@ static long evtchn_bind_virq(evtchn_bind_virq_t *bind)
+ if ( v->virq_to_evtchn[virq] != 0 )
+ ERROR_EXIT(-EEXIST);
+
+- if ( (port = get_free_port(d)) < 0 )
+- ERROR_EXIT(port);
++ if ( port != 0 )
++ {
++ if ( (rc = evtchn_allocate_port(d, port)) != 0 )
++ ERROR_EXIT(rc);
++ }
++ else
++ {
++ int alloc_port = get_free_port(d);
++
++ if ( alloc_port < 0 )
++ ERROR_EXIT(alloc_port);
++ port = alloc_port;
++ }
+
+ chn = evtchn_from_port(d, port);
+
+@@ -511,7 +538,7 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
+ }
+
+
+-static long evtchn_close(struct domain *d1, int port1, bool_t guest)
++int evtchn_close(struct domain *d1, int port1, bool guest)
+ {
+ struct domain *d2 = NULL;
+ struct vcpu *v;
+@@ -619,7 +646,7 @@ static long evtchn_close(struct domain *d1, int port1, bool_t guest)
+
+ double_evtchn_lock(chn1, chn2);
+
+- free_evtchn(d1, chn1);
++ evtchn_free(d1, chn1);
+
+ chn2->state = ECS_UNBOUND;
+ chn2->u.unbound.remote_domid = d1->domain_id;
+@@ -633,7 +660,7 @@ static long evtchn_close(struct domain *d1, int port1, bool_t guest)
+ }
+
+ spin_lock(&chn1->lock);
+- free_evtchn(d1, chn1);
++ evtchn_free(d1, chn1);
+ spin_unlock(&chn1->lock);
+
+ out:
+@@ -839,7 +866,7 @@ static void clear_global_virq_handlers(struct domain *d)
+ }
+ }
+
+-static long evtchn_status(evtchn_status_t *status)
++int evtchn_status(evtchn_status_t *status)
+ {
+ struct domain *d;
+ domid_t dom = status->dom;
+@@ -1056,7 +1083,7 @@ long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void)
arg)
+ struct evtchn_bind_virq bind_virq;
+ if ( copy_from_guest(&bind_virq, arg, 1) != 0 )
+ return -EFAULT;
+- rc = evtchn_bind_virq(&bind_virq);
++ rc = evtchn_bind_virq(&bind_virq, 0);
+ if ( !rc && __copy_to_guest(arg, &bind_virq, 1) )
+ rc = -EFAULT; /* Cleaning up here would be a mess! */
+ break;
+diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c
+index d4f0532101..948343303e 100644
+--- a/xen/drivers/char/xen_pv_console.c
++++ b/xen/drivers/char/xen_pv_console.c
+@@ -88,6 +88,11 @@ static void notify_daemon(void)
+ xen_hypercall_evtchn_send(cons_evtchn);
+ }
+
++evtchn_port_t pv_console_evtchn(void)
++{
++ return cons_evtchn;
++}
++
+ size_t pv_console_rx(struct cpu_user_regs *regs)
+ {
+ char c;
+@@ -97,10 +102,6 @@ size_t pv_console_rx(struct cpu_user_regs *regs)
+ if ( !cons_ring )
+ return 0;
+
+- /* TODO: move this somewhere */
+- if ( !test_bit(cons_evtchn, XEN_shared_info->evtchn_pending) )
+- return 0;
+-
+ prod = ACCESS_ONCE(cons_ring->in_prod);
+ cons = cons_ring->in_cons;
+
+@@ -125,8 +126,6 @@ size_t pv_console_rx(struct cpu_user_regs *regs)
+ ACCESS_ONCE(cons_ring->in_cons) = cons;
+ notify_daemon();
+
+- clear_bit(cons_evtchn, XEN_shared_info->evtchn_pending);
+-
+ return recv;
+ }
+
+diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h
+index 3eb4a8db89..b9f3ecf9a3 100644
+--- a/xen/include/asm-x86/hypercall.h
++++ b/xen/include/asm-x86/hypercall.h
+@@ -28,6 +28,9 @@ extern const hypercall_args_t hypercall_args_table[NR_hypercalls];
+ void pv_hypercall(struct cpu_user_regs *regs);
+ void hypercall_page_initialise_ring3_kernel(void *hypercall_page);
+ void hypercall_page_initialise_ring1_kernel(void *hypercall_page);
++void pv_hypercall_table_replace(unsigned int hypercall, hypercall_fn_t * native,
++ hypercall_fn_t *compat);
++hypercall_fn_t *pv_get_hypercall_handler(unsigned int hypercall, bool compat);
+
+ /*
+ * Both do_mmuext_op() and do_mmu_update():
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+index ff7c050dc6..ab656fd854 100644
+--- a/xen/include/asm-x86/pv/shim.h
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -36,6 +36,7 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
+ unsigned long console_va, unsigned long vphysmap,
+ start_info_t *si);
+ void pv_shim_shutdown(uint8_t reason);
++void pv_shim_inject_evtchn(unsigned int port);
+ domid_t get_initial_domain_id(void);
+
+ #else
+@@ -53,6 +54,10 @@ static inline void pv_shim_shutdown(uint8_t reason)
+ {
+ ASSERT_UNREACHABLE();
+ }
++static inline void pv_shim_inject_evtchn(unsigned int port)
++{
++ ASSERT_UNREACHABLE();
++}
+ static inline domid_t get_initial_domain_id(void)
+ {
+ return 0;
+diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
+index 87915ead69..ebb879e88d 100644
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -48,6 +48,21 @@ int evtchn_send(struct domain *d, unsigned int lport);
+ /* Bind a local event-channel port to the specified VCPU. */
+ long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
+
++/* Bind a VIRQ. */
++int evtchn_bind_virq(evtchn_bind_virq_t *bind, evtchn_port_t port);
++
++/* Get the status of an event channel port. */
++int evtchn_status(evtchn_status_t *status);
++
++/* Close an event channel. */
++int evtchn_close(struct domain *d1, int port1, bool guest);
++
++/* Free an event channel. */
++void evtchn_free(struct domain *d, struct evtchn *chn);
++
++/* Allocate a specific event channel port. */
++int evtchn_allocate_port(struct domain *d, unsigned int port);
++
+ /* Unmask a local event-channel port. */
+ int evtchn_unmask(unsigned int port);
+
+diff --git a/xen/include/xen/pv_console.h b/xen/include/xen/pv_console.h
+index e578b56620..cb92539666 100644
+--- a/xen/include/xen/pv_console.h
++++ b/xen/include/xen/pv_console.h
+@@ -10,6 +10,7 @@ void pv_console_set_rx_handler(serial_rx_fn fn);
+ void pv_console_init_postirq(void);
+ void pv_console_puts(const char *buf);
+ size_t pv_console_rx(struct cpu_user_regs *regs);
++evtchn_port_t pv_console_evtchn(void);
+
+ #else
+
+@@ -18,6 +19,11 @@ static inline void pv_console_set_rx_handler(serial_rx_fn fn) { }
+ static inline void pv_console_init_postirq(void) { }
+ static inline void pv_console_puts(const char *buf) { }
+ static inline size_t pv_console_rx(struct cpu_user_regs *regs) { return 0; }
++evtchn_port_t pv_console_evtchn(void)
++{
++ ASSERT_UNREACHABLE();
++ return 0;
++}
+
+ #endif /* !CONFIG_XEN_GUEST */
+ #endif /* __XEN_PV_CONSOLE_H__ */
+--
+2.14.3
+
+
+From 7f5eb7d04ef2616051b82437d3c9595208a7dec1 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:19 +0000
+Subject: [PATCH 59/77] xen/pvshim: add grant table operations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Anthony Liguori <aliguori(a)amazon.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+---
+Changes since v1:
+ - Use __ of copy_to_guest.
+ - Return EOPNOTSUPP for not implemented grant table hypercalls.
+ - Forward user provided buffer in GNTTABOP_query_size.
+ - Rewrite grant table hypercall handler.
+---
+ xen/arch/x86/pv/shim.c | 164 ++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/guest/hypercall.h | 6 ++
+ 2 files changed, 170 insertions(+)
+
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 36f3a366d3..eb8b146785 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -22,6 +22,7 @@
+ #include <xen/guest_access.h>
+ #include <xen/hypercall.h>
+ #include <xen/init.h>
++#include <xen/iocap.h>
+ #include <xen/shutdown.h>
+ #include <xen/types.h>
+
+@@ -32,6 +33,8 @@
+
+ #include <public/arch-x86/cpuid.h>
+
++#include <compat/grant_table.h>
++
+ #ifndef CONFIG_PV_SHIM_EXCLUSIVE
+ bool pv_shim;
+ boolean_param("pv-shim", pv_shim);
+@@ -39,7 +42,14 @@ boolean_param("pv-shim", pv_shim);
+
+ static struct domain *guest;
+
++static unsigned int nr_grant_list;
++static unsigned long *grant_frames;
++static DEFINE_SPINLOCK(grant_lock);
++
+ static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg);
++static long pv_shim_grant_table_op(unsigned int cmd,
++ XEN_GUEST_HANDLE_PARAM(void) uop,
++ unsigned int count);
+
+ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \
+ _PAGE_GUEST_KERNEL)
+@@ -127,6 +137,9 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ pv_hypercall_table_replace(__HYPERVISOR_event_channel_op,
+ (hypercall_fn_t *)pv_shim_event_channel_op,
+ (hypercall_fn_t *)pv_shim_event_channel_op);
++ pv_hypercall_table_replace(__HYPERVISOR_grant_table_op,
++ (hypercall_fn_t *)pv_shim_grant_table_op,
++ (hypercall_fn_t *)pv_shim_grant_table_op);
+ guest = d;
+ }
+
+@@ -363,6 +376,157 @@ void pv_shim_inject_evtchn(unsigned int port)
+ }
+ }
+
++static long pv_shim_grant_table_op(unsigned int cmd,
++ XEN_GUEST_HANDLE_PARAM(void) uop,
++ unsigned int count)
++{
++ struct domain *d = current->domain;
++ long rc = 0;
++
++ if ( count != 1 )
++ return -EINVAL;
++
++ switch ( cmd )
++ {
++ case GNTTABOP_setup_table:
++ {
++ bool compat = is_pv_32bit_domain(d);
++ struct gnttab_setup_table nat;
++ struct compat_gnttab_setup_table cmp;
++ unsigned int i;
++
++ if ( unlikely(compat ? copy_from_guest(&cmp, uop, 1)
++ : copy_from_guest(&nat, uop, 1)) ||
++ unlikely(compat ? !compat_handle_okay(cmp.frame_list,
++ cmp.nr_frames)
++ : !guest_handle_okay(nat.frame_list,
++ nat.nr_frames)) )
++ {
++ rc = -EFAULT;
++ break;
++ }
++ if ( compat )
++#define XLAT_gnttab_setup_table_HNDL_frame_list(d, s)
++ XLAT_gnttab_setup_table(&nat, &cmp);
++#undef XLAT_gnttab_setup_table_HNDL_frame_list
++
++ nat.status = GNTST_okay;
++
++ spin_lock(&grant_lock);
++ if ( !nr_grant_list )
++ {
++ struct gnttab_query_size query_size = {
++ .dom = DOMID_SELF,
++ };
++
++ rc = xen_hypercall_grant_table_op(GNTTABOP_query_size,
++ &query_size, 1);
++ if ( rc )
++ {
++ spin_unlock(&grant_lock);
++ break;
++ }
++
++ ASSERT(!grant_frames);
++ grant_frames = xzalloc_array(unsigned long,
++ query_size.max_nr_frames);
++ if ( !grant_frames )
++ {
++ spin_unlock(&grant_lock);
++ rc = -ENOMEM;
++ break;
++ }
++
++ nr_grant_list = query_size.max_nr_frames;
++ }
++
++ if ( nat.nr_frames > nr_grant_list )
++ {
++ spin_unlock(&grant_lock);
++ rc = -EINVAL;
++ break;
++ }
++
++ for ( i = 0; i < nat.nr_frames; i++ )
++ {
++ if ( !grant_frames[i] )
++ {
++ struct xen_add_to_physmap xatp = {
++ .domid = DOMID_SELF,
++ .idx = i,
++ .space = XENMAPSPACE_grant_table,
++ };
++ mfn_t mfn;
++
++ rc = hypervisor_alloc_unused_page(&mfn);
++ if ( rc )
++ {
++ gprintk(XENLOG_ERR,
++ "unable to get memory for grant table\n");
++ break;
++ }
++
++ xatp.gpfn = mfn_x(mfn);
++ rc = xen_hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
++ if ( rc )
++ {
++ hypervisor_free_unused_page(mfn);
++ break;
++ }
++
++ BUG_ON(iomem_permit_access(d, mfn_x(mfn), mfn_x(mfn)));
++ grant_frames[i] = mfn_x(mfn);
++ }
++
++ ASSERT(grant_frames[i]);
++ if ( compat )
++ {
++ compat_pfn_t pfn = grant_frames[i];
++
++ if ( __copy_to_compat_offset(cmp.frame_list, i, &pfn, 1) )
++ {
++ nat.status = GNTST_bad_virt_addr;
++ rc = -EFAULT;
++ break;
++ }
++ }
++ else if ( __copy_to_guest_offset(nat.frame_list, i,
++ &grant_frames[i], 1) )
++ {
++ nat.status = GNTST_bad_virt_addr;
++ rc = -EFAULT;
++ break;
++ }
++ }
++ spin_unlock(&grant_lock);
++
++ if ( compat )
++#define XLAT_gnttab_setup_table_HNDL_frame_list(d, s)
++ XLAT_gnttab_setup_table(&cmp, &nat);
++#undef XLAT_gnttab_setup_table_HNDL_frame_list
++
++ if ( unlikely(compat ? __copy_to_guest(uop, &cmp, 1)
++ : __copy_to_guest(uop, &nat, 1)) )
++ {
++ rc = -EFAULT;
++ break;
++ }
++
++ break;
++ }
++
++ case GNTTABOP_query_size:
++ rc = xen_hypercall_grant_table_op(GNTTABOP_query_size, uop.p, count);
++ break;
++
++ default:
++ rc = -EOPNOTSUPP;
++ break;
++ }
++
++ return rc;
++}
++
+ domid_t get_initial_domain_id(void)
+ {
+ uint32_t eax, ebx, ecx, edx;
+diff --git a/xen/include/asm-x86/guest/hypercall.h
b/xen/include/asm-x86/guest/hypercall.h
+index 81a955d479..e9e626b474 100644
+--- a/xen/include/asm-x86/guest/hypercall.h
++++ b/xen/include/asm-x86/guest/hypercall.h
+@@ -110,6 +110,12 @@ static inline long xen_hypercall_event_channel_op(unsigned int cmd,
void *arg)
+ return _hypercall64_2(long, __HYPERVISOR_event_channel_op, cmd, arg);
+ }
+
++static inline long xen_hypercall_grant_table_op(unsigned int cmd, void *arg,
++ unsigned int count)
++{
++ return _hypercall64_3(long, __HYPERVISOR_grant_table_op, cmd, arg, count);
++}
++
+ static inline long xen_hypercall_hvm_op(unsigned int op, void *arg)
+ {
+ return _hypercall64_2(long, __HYPERVISOR_hvm_op, op, arg);
+--
+2.14.3
+
+
+From cc7d96b98cf02540edf6f387286100a50d6f3d04 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:45:23 +0000
+Subject: [PATCH 60/77] x86/pv-shim: shadow PV console's page for L2 DomU
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+[remove notify_guest helper and directly use pv_shim_inject_evtchn]
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+Changes since v1:
+ - Use pv_shim_inject_evtchn.
+---
+ xen/arch/x86/pv/shim.c | 31 ++++++++--
+ xen/drivers/char/Makefile | 1 +
+ xen/drivers/char/console.c | 6 ++
+ xen/drivers/char/consoled.c | 148 ++++++++++++++++++++++++++++++++++++++++++++
+ xen/include/xen/consoled.h | 27 ++++++++
+ 5 files changed, 209 insertions(+), 4 deletions(-)
+ create mode 100644 xen/drivers/char/consoled.c
+ create mode 100644 xen/include/xen/consoled.h
+
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index eb8b146785..986f9da58a 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -25,6 +25,8 @@
+ #include <xen/iocap.h>
+ #include <xen/shutdown.h>
+ #include <xen/types.h>
++#include <xen/consoled.h>
++#include <xen/pv_console.h>
+
+ #include <asm/apic.h>
+ #include <asm/dom0_build.h>
+@@ -127,13 +129,28 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ })
+ SET_AND_MAP_PARAM(HVM_PARAM_STORE_PFN, si->store_mfn, store_va);
+ SET_AND_MAP_PARAM(HVM_PARAM_STORE_EVTCHN, si->store_evtchn, 0);
++ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0);
+ if ( !pv_console )
+- {
+ SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_PFN, si->console.domU.mfn,
+ console_va);
+- SET_AND_MAP_PARAM(HVM_PARAM_CONSOLE_EVTCHN, si->console.domU.evtchn, 0);
+- }
+ #undef SET_AND_MAP_PARAM
++ else
++ {
++ /* Allocate a new page for DomU's PV console */
++ void *page = alloc_xenheap_pages(0, MEMF_bits(32));
++ uint64_t console_mfn;
++
++ ASSERT(page);
++ clear_page(page);
++ console_mfn = virt_to_mfn(page);
++ si->console.domU.mfn = console_mfn;
++ share_xen_page_with_guest(mfn_to_page(console_mfn), d,
++ XENSHARE_writable);
++ replace_va_mapping(d, l4start, console_va, console_mfn);
++ dom0_update_physmap(d, (console_va - va_start) >> PAGE_SHIFT,
++ console_mfn, vphysmap);
++ consoled_set_ring_addr(page);
++ }
+ pv_hypercall_table_replace(__HYPERVISOR_event_channel_op,
+ (hypercall_fn_t *)pv_shim_event_channel_op,
+ (hypercall_fn_t *)pv_shim_event_channel_op);
+@@ -341,7 +358,13 @@ static long pv_shim_event_channel_op(int cmd,
XEN_GUEST_HANDLE_PARAM(void) arg)
+ if ( copy_from_guest(&send, arg, 1) != 0 )
+ return -EFAULT;
+
+- rc = xen_hypercall_event_channel_op(EVTCHNOP_send, &send);
++ if ( pv_console && send.port == pv_console_evtchn() )
++ {
++ consoled_guest_rx();
++ rc = 0;
++ }
++ else
++ rc = xen_hypercall_event_channel_op(EVTCHNOP_send, &send);
+
+ break;
+ }
+diff --git a/xen/drivers/char/Makefile b/xen/drivers/char/Makefile
+index 9d48d0f2dc..0d48b16e8d 100644
+--- a/xen/drivers/char/Makefile
++++ b/xen/drivers/char/Makefile
+@@ -9,3 +9,4 @@ obj-$(CONFIG_HAS_EHCI) += ehci-dbgp.o
+ obj-$(CONFIG_ARM) += arm-uart.o
+ obj-y += serial.o
+ obj-$(CONFIG_XEN_GUEST) += xen_pv_console.o
++obj-$(CONFIG_PV_SHIM) += consoled.o
+diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
+index 8acd358395..18f5b7f7aa 100644
+--- a/xen/drivers/char/console.c
++++ b/xen/drivers/char/console.c
+@@ -32,6 +32,7 @@
+ #include <xen/warning.h>
+
+ #ifdef CONFIG_X86
++#include <xen/consoled.h>
+ #include <xen/pv_console.h>
+ #include <asm/guest.h>
+ #endif
+@@ -415,6 +416,11 @@ static void __serial_rx(char c, struct cpu_user_regs *regs)
+ serial_rx_ring[SERIAL_RX_MASK(serial_rx_prod++)] = c;
+ /* Always notify the guest: prevents receive path from getting stuck. */
+ send_global_virq(VIRQ_CONSOLE);
++
++#ifdef CONFIG_X86
++ if ( pv_shim && pv_console )
++ consoled_guest_tx(c);
++#endif
+ }
+
+ static void serial_rx(char c, struct cpu_user_regs *regs)
+diff --git a/xen/drivers/char/consoled.c b/xen/drivers/char/consoled.c
+new file mode 100644
+index 0000000000..552abf5766
+--- /dev/null
++++ b/xen/drivers/char/consoled.c
+@@ -0,0 +1,148 @@
++/******************************************************************************
++ * drivers/char/consoled.c
++ *
++ * A backend driver for Xen's PV console.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; If not, see <
http://www.gnu.org/licenses/>.
++ *
++ * Copyright (c) 2017 Citrix Systems Ltd.
++ */
++
++#include <xen/lib.h>
++#include <xen/event.h>
++#include <xen/pv_console.h>
++#include <xen/consoled.h>
++
++#include <asm/guest.h>
++
++static struct xencons_interface *cons_ring;
++static DEFINE_SPINLOCK(rx_lock);
++
++void consoled_set_ring_addr(struct xencons_interface *ring)
++{
++ cons_ring = ring;
++}
++
++struct xencons_interface *consoled_get_ring_addr(void)
++{
++ return cons_ring;
++}
++
++#define BUF_SZ 255
++static char buf[BUF_SZ + 1];
++
++/* Receives characters from a domain's PV console */
++size_t consoled_guest_rx(void)
++{
++ size_t recv = 0, idx = 0;
++ XENCONS_RING_IDX cons, prod;
++
++ if ( !cons_ring )
++ return 0;
++
++ spin_lock(&rx_lock);
++
++ cons = cons_ring->out_cons;
++ prod = ACCESS_ONCE(cons_ring->out_prod);
++
++ /*
++ * Latch pointers before accessing the ring. Included compiler barrier also
++ * ensures that pointers are really read only once into local variables.
++ */
++ smp_rmb();
++
++ ASSERT((prod - cons) <= sizeof(cons_ring->out));
++
++ /* Is the ring empty? */
++ if ( cons == prod )
++ goto out;
++
++ while ( cons != prod )
++ {
++ char c = cons_ring->out[MASK_XENCONS_IDX(cons++, cons_ring->out)];
++
++ buf[idx++] = c;
++ recv++;
++
++ if ( idx >= BUF_SZ )
++ {
++ pv_console_puts(buf);
++ idx = 0;
++ }
++ }
++
++ if ( idx )
++ {
++ buf[idx] = '\0';
++ pv_console_puts(buf);
++ }
++
++ /* No need for a mem barrier because every character was already consumed */
++ barrier();
++ ACCESS_ONCE(cons_ring->out_cons) = cons;
++ pv_shim_inject_evtchn(pv_console_evtchn());
++
++ out:
++ spin_unlock(&rx_lock);
++
++ return recv;
++}
++
++/* Sends a character into a domain's PV console */
++size_t consoled_guest_tx(char c)
++{
++ size_t sent = 0;
++ XENCONS_RING_IDX cons, prod;
++
++ if ( !cons_ring )
++ return 0;
++
++ cons = ACCESS_ONCE(cons_ring->in_cons);
++ prod = cons_ring->in_prod;
++
++ /*
++ * Latch pointers before accessing the ring. Included compiler barrier also
++ * ensures that pointers are really read only once into local variables.
++ */
++ smp_rmb();
++
++ ASSERT((prod - cons) <= sizeof(cons_ring->in));
++
++ /* Is the ring out of space? */
++ if ( sizeof(cons_ring->in) - (prod - cons) == 0 )
++ goto notify;
++
++ cons_ring->in[MASK_XENCONS_IDX(prod++, cons_ring->in)] = c;
++ sent++;
++
++ /* Write to the ring before updating the pointer */
++ smp_wmb();
++ ACCESS_ONCE(cons_ring->in_prod) = prod;
++
++ notify:
++ /* Always notify the guest: prevents receive path from getting stuck. */
++ pv_shim_inject_evtchn(pv_console_evtchn());
++
++ return sent;
++}
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/include/xen/consoled.h b/xen/include/xen/consoled.h
+new file mode 100644
+index 0000000000..fd5d220a8a
+--- /dev/null
++++ b/xen/include/xen/consoled.h
+@@ -0,0 +1,27 @@
++#ifndef __XEN_CONSOLED_H__
++#define __XEN_CONSOLED_H__
++
++#include <public/io/console.h>
++
++#ifdef CONFIG_PV_SHIM
++
++void consoled_set_ring_addr(struct xencons_interface *ring);
++struct xencons_interface *consoled_get_ring_addr(void);
++size_t consoled_guest_rx(void);
++size_t consoled_guest_tx(char c);
++
++#else
++
++size_t consoled_guest_tx(char c) { return 0; }
++
++#endif /* !CONFIG_PV_SHIM */
++#endif /* __XEN_CONSOLED_H__ */
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+--
+2.14.3
+
+
+From 83c838c9f853712ac5d36c9dc001eb8903b1e1e2 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:19 +0000
+Subject: [PATCH 61/77] xen/pvshim: add migration support
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+Changes since v1:
+ - Use bitmap_zero instead of memset.
+ - Don't drop the __init attribute of unshare_xen_page_with_guest,
+ it's not needed for migration.
+ - Remove BUG_ON to check correct mapping, map_domain_page cannot
+ fail.
+ - Reduce indentation level of pv_shim_shutdown.
+---
+ xen/arch/x86/guest/xen.c | 29 +++++++
+ xen/arch/x86/pv/shim.c | 155 +++++++++++++++++++++++++++++++++++++-
+ xen/common/domain.c | 11 ++-
+ xen/common/schedule.c | 3 +-
+ xen/drivers/char/xen_pv_console.c | 2 +-
+ xen/include/asm-x86/guest/xen.h | 5 ++
+ xen/include/asm-x86/pv/shim.h | 5 +-
+ xen/include/xen/sched.h | 2 +-
+ 8 files changed, 197 insertions(+), 15 deletions(-)
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index 57b297ad47..2a5554ab26 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -348,6 +348,35 @@ uint32_t hypervisor_cpuid_base(void)
+ return xen_cpuid_base;
+ }
+
++static void ap_resume(void *unused)
++{
++ map_vcpuinfo();
++ init_evtchn();
++}
++
++void hypervisor_resume(void)
++{
++ /* Reset shared info page. */
++ map_shared_info();
++
++ /*
++ * Reset vcpu_info. Just clean the mapped bitmap and try to map the vcpu
++ * area again. On failure to map (when it was previously mapped) panic
++ * since it's impossible to safely shut down running guest vCPUs in order
++ * to meet the new XEN_LEGACY_MAX_VCPUS requirement.
++ */
++ bitmap_zero(vcpu_info_mapped, NR_CPUS);
++ if ( map_vcpuinfo() && nr_cpu_ids > XEN_LEGACY_MAX_VCPUS )
++ panic("unable to remap vCPU info and vCPUs > legacy limit");
++
++ /* Setup event channel upcall vector. */
++ init_evtchn();
++ smp_call_function(ap_resume, NULL, 1);
++
++ if ( pv_console )
++ pv_console_init();
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 986f9da58a..c53a4ca407 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -160,10 +160,159 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ guest = d;
+ }
+
+-void pv_shim_shutdown(uint8_t reason)
++static void write_start_info(struct domain *d)
+ {
+- /* XXX: handle suspend */
+- xen_hypercall_shutdown(reason);
++ struct cpu_user_regs *regs = guest_cpu_user_regs();
++ start_info_t *si = map_domain_page(_mfn(is_pv_32bit_domain(d) ? regs->edx
++ : regs->rdx));
++ uint64_t param;
++
++ snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%s",
++ is_pv_32bit_domain(d) ? "32p" : "64");
++ si->nr_pages = d->tot_pages;
++ si->shared_info = virt_to_maddr(d->shared_info);
++ si->flags = 0;
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_PFN, &si->store_mfn));
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_EVTCHN, ¶m));
++ si->store_evtchn = param;
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_EVTCHN, ¶m));
++ si->console.domU.evtchn = param;
++ if ( pv_console )
++ si->console.domU.mfn = virt_to_mfn(consoled_get_ring_addr());
++ else if ( xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN,
++ &si->console.domU.mfn) )
++ BUG();
++
++ if ( is_pv_32bit_domain(d) )
++ xlat_start_info(si, XLAT_start_info_console_domU);
++
++ unmap_domain_page(si);
++}
++
++int pv_shim_shutdown(uint8_t reason)
++{
++ struct domain *d = current->domain;
++ struct vcpu *v;
++ unsigned int i;
++ uint64_t old_store_pfn, old_console_pfn = 0, store_pfn, console_pfn;
++ uint64_t store_evtchn, console_evtchn;
++ long rc;
++
++ if ( reason != SHUTDOWN_suspend )
++ /* Forward to L0. */
++ return xen_hypercall_shutdown(reason);
++
++ BUG_ON(current->vcpu_id != 0);
++
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_PFN, &old_store_pfn));
++ if ( !pv_console )
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN,
++ &old_console_pfn));
++
++ /* Pause the other vcpus before starting the migration. */
++ for_each_vcpu(d, v)
++ if ( v != current )
++ vcpu_pause_by_systemcontroller(v);
++
++ rc = xen_hypercall_shutdown(SHUTDOWN_suspend);
++ if ( rc )
++ {
++ for_each_vcpu(d, v)
++ if ( v != current )
++ vcpu_unpause_by_systemcontroller(v);
++
++ return rc;
++ }
++
++ /* Resume the shim itself first. */
++ hypervisor_resume();
++
++ /*
++ * ATM there's nothing Xen can do if the console/store pfn changes,
++ * because Xen won't have a page_info struct for it.
++ */
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_PFN, &store_pfn));
++ BUG_ON(old_store_pfn != store_pfn);
++ if ( !pv_console )
++ {
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_PFN,
++ &console_pfn));
++ BUG_ON(old_console_pfn != console_pfn);
++ }
++
++ /* Update domain id. */
++ d->domain_id = get_initial_domain_id();
++
++ /* Clean the iomem range. */
++ BUG_ON(iomem_deny_access(d, 0, ~0UL));
++
++ /* Clean grant frames. */
++ xfree(grant_frames);
++ grant_frames = NULL;
++ nr_grant_list = 0;
++
++ /* Clean event channels. */
++ for ( i = 0; i < EVTCHN_2L_NR_CHANNELS; i++ )
++ {
++ if ( !port_is_valid(d, i) )
++ continue;
++
++ if ( evtchn_handled(d, i) )
++ evtchn_close(d, i, false);
++ else
++ evtchn_free(d, evtchn_from_port(d, i));
++ }
++
++ /* Reserve store/console event channel. */
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_STORE_EVTCHN, &store_evtchn));
++ BUG_ON(evtchn_allocate_port(d, store_evtchn));
++ evtchn_reserve(d, store_evtchn);
++ BUG_ON(xen_hypercall_hvm_get_param(HVM_PARAM_CONSOLE_EVTCHN,
++ &console_evtchn));
++ BUG_ON(evtchn_allocate_port(d, console_evtchn));
++ evtchn_reserve(d, console_evtchn);
++
++ /* Clean watchdogs. */
++ watchdog_domain_destroy(d);
++ watchdog_domain_init(d);
++
++ /* Clean the PIRQ EOI page. */
++ if ( d->arch.pirq_eoi_map != NULL )
++ {
++ unmap_domain_page_global(d->arch.pirq_eoi_map);
++ put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn));
++ d->arch.pirq_eoi_map = NULL;
++ d->arch.pirq_eoi_map_mfn = 0;
++ d->arch.auto_unmask = 0;
++ }
++
++ /*
++ * NB: there's no need to fixup the p2m, since the mfns assigned
++ * to the PV guest have not changed at all. Just re-write the
++ * start_info fields with the appropriate value.
++ */
++ write_start_info(d);
++
++ for_each_vcpu(d, v)
++ {
++ /* Unmap guest vcpu_info pages. */
++ unmap_vcpu_info(v);
++
++ /* Reset the periodic timer to the default value. */
++ v->periodic_period = MILLISECS(10);
++ /* Stop the singleshot timer. */
++ stop_timer(&v->singleshot_timer);
++
++ if ( test_bit(_VPF_down, &v->pause_flags) )
++ BUG_ON(vcpu_reset(v));
++
++ if ( v != current )
++ vcpu_unpause_by_systemcontroller(v);
++ else
++ vcpu_force_reschedule(v);
++ }
++
++ return 0;
+ }
+
+ static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 1ba05fa3a1..9a703734eb 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -689,16 +689,13 @@ void __domain_crash_synchronous(void)
+ }
+
+
+-void domain_shutdown(struct domain *d, u8 reason)
++int domain_shutdown(struct domain *d, u8 reason)
+ {
+ struct vcpu *v;
+
+ #ifdef CONFIG_X86
+ if ( pv_shim )
+- {
+- pv_shim_shutdown(reason);
+- return;
+- }
++ return pv_shim_shutdown(reason);
+ #endif
+
+ spin_lock(&d->shutdown_lock);
+@@ -713,7 +710,7 @@ void domain_shutdown(struct domain *d, u8 reason)
+ if ( d->is_shutting_down )
+ {
+ spin_unlock(&d->shutdown_lock);
+- return;
++ return 0;
+ }
+
+ d->is_shutting_down = 1;
+@@ -735,6 +732,8 @@ void domain_shutdown(struct domain *d, u8 reason)
+ __domain_finalise_shutdown(d);
+
+ spin_unlock(&d->shutdown_lock);
++
++ return 0;
+ }
+
+ void domain_resume(struct domain *d)
+diff --git a/xen/common/schedule.c b/xen/common/schedule.c
+index 88279213e8..b7884263f2 100644
+--- a/xen/common/schedule.c
++++ b/xen/common/schedule.c
+@@ -1149,11 +1149,10 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
+ if ( copy_from_guest(&sched_shutdown, arg, 1) )
+ break;
+
+- ret = 0;
+ TRACE_3D(TRC_SCHED_SHUTDOWN,
+ current->domain->domain_id, current->vcpu_id,
+ sched_shutdown.reason);
+- domain_shutdown(current->domain, (u8)sched_shutdown.reason);
++ ret = domain_shutdown(current->domain, (u8)sched_shutdown.reason);
+
+ break;
+ }
+diff --git a/xen/drivers/char/xen_pv_console.c b/xen/drivers/char/xen_pv_console.c
+index 948343303e..cc1c1d743f 100644
+--- a/xen/drivers/char/xen_pv_console.c
++++ b/xen/drivers/char/xen_pv_console.c
+@@ -37,7 +37,7 @@ static DEFINE_SPINLOCK(tx_lock);
+
+ bool pv_console;
+
+-void __init pv_console_init(void)
++void pv_console_init(void)
+ {
+ long r;
+ uint64_t raw_pfn = 0, raw_evtchn = 0;
+diff --git a/xen/include/asm-x86/guest/xen.h b/xen/include/asm-x86/guest/xen.h
+index ac48dcbe44..11243fe60d 100644
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -39,6 +39,7 @@ int hypervisor_free_unused_page(mfn_t mfn);
+ void hypervisor_fixup_e820(struct e820map *e820);
+ const unsigned long *hypervisor_reserved_pages(unsigned int *size);
+ uint32_t hypervisor_cpuid_base(void);
++void hypervisor_resume(void);
+
+ DECLARE_PER_CPU(unsigned int, vcpu_id);
+ DECLARE_PER_CPU(struct vcpu_info *, vcpu_info);
+@@ -72,6 +73,10 @@ static inline uint32_t hypervisor_cpuid_base(void)
+ ASSERT_UNREACHABLE();
+ return 0;
+ };
++static inline void hypervisor_resume(void)
++{
++ ASSERT_UNREACHABLE();
++};
+
+ #endif /* CONFIG_XEN_GUEST */
+ #endif /* __X86_GUEST_XEN_H__ */
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+index ab656fd854..4d5f0b43fc 100644
+--- a/xen/include/asm-x86/pv/shim.h
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -35,7 +35,7 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
+ unsigned long va_start, unsigned long store_va,
+ unsigned long console_va, unsigned long vphysmap,
+ start_info_t *si);
+-void pv_shim_shutdown(uint8_t reason);
++int pv_shim_shutdown(uint8_t reason);
+ void pv_shim_inject_evtchn(unsigned int port);
+ domid_t get_initial_domain_id(void);
+
+@@ -50,9 +50,10 @@ static inline void pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ {
+ ASSERT_UNREACHABLE();
+ }
+-static inline void pv_shim_shutdown(uint8_t reason)
++static inline int pv_shim_shutdown(uint8_t reason)
+ {
+ ASSERT_UNREACHABLE();
++ return 0;
+ }
+ static inline void pv_shim_inject_evtchn(unsigned int port)
+ {
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 64abc1df6c..2541ecb04f 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -603,7 +603,7 @@ static inline struct domain *rcu_lock_current_domain(void)
+ struct domain *get_domain_by_id(domid_t dom);
+ void domain_destroy(struct domain *d);
+ int domain_kill(struct domain *d);
+-void domain_shutdown(struct domain *d, u8 reason);
++int domain_shutdown(struct domain *d, u8 reason);
+ void domain_resume(struct domain *d);
+ void domain_pause_for_debugger(void);
+
+--
+2.14.3
+
+
+From 7dcc20e0c8cf6fa30f483b0c91c8566a97a61031 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:20 +0000
+Subject: [PATCH 62/77] xen/pvshim: add shim_mem cmdline parameter
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli(a)citrix.com>
+---
+ docs/misc/xen-command-line.markdown | 16 +++++++++++++
+ xen/arch/x86/dom0_build.c | 18 ++++++++++++++-
+ xen/arch/x86/pv/shim.c | 46 +++++++++++++++++++++++++++++++++++++
+ xen/include/asm-x86/pv/shim.h | 6 +++++
+ 4 files changed, 85 insertions(+), 1 deletion(-)
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index 3a1a9c1fba..9f51710a46 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -686,6 +686,8 @@ any dom0 autoballooning feature present in your toolstack. See the
+ _xl.conf(5)_ man page or [Xen Best
+
Practices](http://wiki.xen.org/wiki/Xen_Best_Practices#Xen_dom0_dedicated....
+
++This option doesn't have effect if pv-shim mode is enabled.
++
+ ### dom0\_nodes
+
+ > `= List of [ <integer> | relaxed | strict ]`
+@@ -1456,6 +1458,20 @@ guest compatibly inside an HVM container.
+ In this mode, the kernel and initrd passed as modules to the hypervisor are
+ constructed into a plain unprivileged PV domain.
+
++### shim\_mem (x86)
++> `= List of ( min:<size> | max:<size> | <size> )`
++
++Set the amount of memory that xen-shim reserves for itself. Only has effect
++if pv-shim mode is enabled.
++
++* `min:<size>` specifies the minimum amount of memory. Ignored if greater
++ than max. Default: 10M.
++* `max:<size>` specifies the maximum amount of memory. Default: 128M.
++* `<size>` specifies the exact amount of memory. Overrides both min and max.
++
++By default, 1/16th of total HVM container's memory is reserved for xen-shim
++with minimum amount being 10MB and maximum amount 128MB.
++
+ ### rcu-idle-timer-period-ms
+ > `= <integer>`
+
+diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
+index 452298c624..bc713fb2b5 100644
+--- a/xen/arch/x86/dom0_build.c
++++ b/xen/arch/x86/dom0_build.c
+@@ -51,6 +51,13 @@ static long __init parse_amt(const char *s, const char **ps)
+
+ static int __init parse_dom0_mem(const char *s)
+ {
++ /* xen-shim uses shim_mem parameter instead of dom0_mem */
++ if ( pv_shim )
++ {
++ printk("Ignoring dom0_mem param in pv-shim mode\n");
++ return 0;
++ }
++
+ do {
+ if ( !strncmp(s, "min:", 4) )
+ dom0_min_nrpages = parse_amt(s+4, &s);
+@@ -284,7 +291,16 @@ unsigned long __init dom0_compute_nr_pages(
+ * maximum of 128MB.
+ */
+ if ( nr_pages == 0 )
+- nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT));
++ {
++ uint64_t rsvd = min(avail / 16, 128UL << (20 - PAGE_SHIFT));
++ if ( pv_shim )
++ {
++ rsvd = pv_shim_mem(avail);
++ printk("Reserved %lu pages for xen-shim\n", rsvd);
++
++ }
++ nr_pages = -rsvd;
++ }
+
+ /* Negative specification means "all memory - specified amount". */
+ if ( (long)nr_pages < 0 ) nr_pages += avail;
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index c53a4ca407..6dc1ee45d7 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -53,6 +53,52 @@ static long pv_shim_grant_table_op(unsigned int cmd,
+ XEN_GUEST_HANDLE_PARAM(void) uop,
+ unsigned int count);
+
++/*
++ * By default, 1/16th of total HVM container's memory is reserved for xen-shim
++ * with minimum amount being 10MB and maximum amount 128MB. Some users may wish
++ * to tune this constants for better memory utilization. This can be achieved
++ * using the following xen-shim's command line option:
++ *
++ * shim_mem=[min:<min_amt>,][max:<max_amt>,][<amt>]
++ *
++ * <min_amt>: The minimum amount of memory that should be allocated for xen-shim
++ * (ignored if greater than max)
++ * <max_amt>: The maximum amount of memory that should be allocated for xen-shim
++ * <amt>: The precise amount of memory to allocate for xen-shim
++ * (overrides both min and max)
++ */
++static uint64_t __initdata shim_nrpages;
++static uint64_t __initdata shim_min_nrpages = 10UL << (20 - PAGE_SHIFT);
++static uint64_t __initdata shim_max_nrpages = 128UL << (20 - PAGE_SHIFT);
++
++static int __init parse_shim_mem(const char *s)
++{
++ do {
++ if ( !strncmp(s, "min:", 4) )
++ shim_min_nrpages = parse_size_and_unit(s+4, &s) >> PAGE_SHIFT;
++ else if ( !strncmp(s, "max:", 4) )
++ shim_max_nrpages = parse_size_and_unit(s+4, &s) >> PAGE_SHIFT;
++ else
++ shim_nrpages = parse_size_and_unit(s, &s) >> PAGE_SHIFT;
++ } while ( *s++ == ',' );
++
++ return s[-1] ? -EINVAL : 0;
++}
++custom_param("shim_mem", parse_shim_mem);
++
++uint64_t pv_shim_mem(uint64_t avail)
++{
++ uint64_t rsvd = min(avail / 16, shim_max_nrpages);
++
++ if ( shim_nrpages )
++ return shim_nrpages;
++
++ if ( shim_min_nrpages <= shim_max_nrpages )
++ rsvd = max(rsvd, shim_min_nrpages);
++
++ return rsvd;
++}
++
+ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \
+ _PAGE_GUEST_KERNEL)
+ #define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+index 4d5f0b43fc..0ef452158e 100644
+--- a/xen/include/asm-x86/pv/shim.h
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -38,6 +38,7 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
+ int pv_shim_shutdown(uint8_t reason);
+ void pv_shim_inject_evtchn(unsigned int port);
+ domid_t get_initial_domain_id(void);
++uint64_t pv_shim_mem(uint64_t avail);
+
+ #else
+
+@@ -63,6 +64,11 @@ static inline domid_t get_initial_domain_id(void)
+ {
+ return 0;
+ }
++static inline uint64_t pv_shim_mem(uint64_t avail)
++{
++ ASSERT_UNREACHABLE();
++ return 0;
++}
+
+ #endif
+
+--
+2.14.3
+
+
+From 004646a1dd4ff2f768d942689545dd3b6e2135e2 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:20 +0000
+Subject: [PATCH 63/77] xen/pvshim: set max_pages to the value of tot_pages
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+So that the guest is not able to deplete the memory pool of the shim
+itself by trying to balloon up.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Acked-by: Jan Beulich <jbeulich(a)suse.com>
+---
+ xen/arch/x86/pv/shim.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 6dc1ee45d7..e3e101a5b1 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -204,6 +204,12 @@ void __init pv_shim_setup_dom(struct domain *d, l4_pgentry_t
*l4start,
+ (hypercall_fn_t *)pv_shim_grant_table_op,
+ (hypercall_fn_t *)pv_shim_grant_table_op);
+ guest = d;
++
++ /*
++ * Set the max pages to the current number of pages to prevent the
++ * guest from depleting the shim memory pool.
++ */
++ d->max_pages = d->tot_pages;
+ }
+
+ static void write_start_info(struct domain *d)
+--
+2.14.3
+
+
+From 5b6c3ffa1d291724a329b57658783fc30b93b479 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:20 +0000
+Subject: [PATCH 64/77] xen/pvshim: support vCPU hotplug
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+Changes since v1:
+ - Fix hotplug so that v->initialized is checked before attempting to
+ bring up the physical CPU.
+ - Fix ARM compilation.
+---
+ xen/arch/x86/pv/shim.c | 63 +++++++++++++++++++++++++++++++++++++++++++
+ xen/common/domain.c | 38 +++++++++++++++++---------
+ xen/include/asm-x86/pv/shim.h | 12 +++++++++
+ xen/include/xen/domain.h | 1 +
+ 4 files changed, 102 insertions(+), 12 deletions(-)
+
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index e3e101a5b1..68ec7bed8e 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -751,6 +751,69 @@ static long pv_shim_grant_table_op(unsigned int cmd,
+ return rc;
+ }
+
++long pv_shim_cpu_up(void *data)
++{
++ struct vcpu *v = data;
++ struct domain *d = v->domain;
++ bool wake;
++
++ BUG_ON(smp_processor_id() != 0);
++
++ domain_lock(d);
++ if ( !v->is_initialised )
++ {
++ domain_unlock(d);
++ return -EINVAL;
++ }
++
++ if ( !cpu_online(v->vcpu_id) )
++ {
++ long rc = cpu_up_helper((void *)(unsigned long)v->vcpu_id);
++
++ if ( rc )
++ {
++ domain_unlock(d);
++ gprintk(XENLOG_ERR, "Failed to bring up CPU#%u: %ld\n",
++ v->vcpu_id, rc);
++ return rc;
++ }
++ }
++
++ wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
++ domain_unlock(d);
++ if ( wake )
++ vcpu_wake(v);
++
++ return 0;
++}
++
++long pv_shim_cpu_down(void *data)
++{
++ struct vcpu *v = data;
++ long rc;
++
++ BUG_ON(smp_processor_id() != 0);
++
++ if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
++ vcpu_sleep_sync(v);
++
++ if ( cpu_online(v->vcpu_id) )
++ {
++ rc = cpu_down_helper((void *)(unsigned long)v->vcpu_id);
++ if ( rc )
++ gprintk(XENLOG_ERR, "Failed to bring down CPU#%u: %ld\n",
++ v->vcpu_id, rc);
++ /*
++ * NB: do not propagate errors from cpu_down_helper failing. The shim
++ * is going to run with extra CPUs, but that's not going to prevent
++ * normal operation. OTOH most guests are not prepared to handle an
++ * error on VCPUOP_down failing, and will likely panic.
++ */
++ }
++
++ return 0;
++}
++
+ domid_t get_initial_domain_id(void)
+ {
+ uint32_t eax, ebx, ecx, edx;
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 9a703734eb..8fbd33d4c6 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -1293,22 +1293,36 @@ long do_vcpu_op(int cmd, unsigned int vcpuid,
XEN_GUEST_HANDLE_PARAM(void) arg)
+
+ break;
+
+- case VCPUOP_up: {
+- bool_t wake = 0;
+- domain_lock(d);
+- if ( !v->is_initialised )
+- rc = -EINVAL;
++ case VCPUOP_up:
++#ifdef CONFIG_X86
++ if ( pv_shim )
++ rc = continue_hypercall_on_cpu(0, pv_shim_cpu_up, v);
+ else
+- wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
+- domain_unlock(d);
+- if ( wake )
+- vcpu_wake(v);
++#endif
++ {
++ bool wake = false;
++
++ domain_lock(d);
++ if ( !v->is_initialised )
++ rc = -EINVAL;
++ else
++ wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
++ domain_unlock(d);
++ if ( wake )
++ vcpu_wake(v);
++ }
++
+ break;
+- }
+
+ case VCPUOP_down:
+- if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
+- vcpu_sleep_nosync(v);
++#ifdef CONFIG_X86
++ if ( pv_shim )
++ rc = continue_hypercall_on_cpu(0, pv_shim_cpu_down, v);
++ else
++#endif
++ if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
++ vcpu_sleep_nosync(v);
++
+ break;
+
+ case VCPUOP_is_up:
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+index 0ef452158e..eb59ddd38a 100644
+--- a/xen/include/asm-x86/pv/shim.h
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -37,6 +37,8 @@ void pv_shim_setup_dom(struct domain *d, l4_pgentry_t *l4start,
+ start_info_t *si);
+ int pv_shim_shutdown(uint8_t reason);
+ void pv_shim_inject_evtchn(unsigned int port);
++long pv_shim_cpu_up(void *data);
++long pv_shim_cpu_down(void *data);
+ domid_t get_initial_domain_id(void);
+ uint64_t pv_shim_mem(uint64_t avail);
+
+@@ -60,6 +62,16 @@ static inline void pv_shim_inject_evtchn(unsigned int port)
+ {
+ ASSERT_UNREACHABLE();
+ }
++static inline long pv_shim_cpu_up(void *data)
++{
++ ASSERT_UNREACHABLE();
++ return 0;
++}
++static inline long pv_shim_cpu_down(void *data)
++{
++ ASSERT_UNREACHABLE();
++ return 0;
++}
+ static inline domid_t get_initial_domain_id(void)
+ {
+ return 0;
+diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
+index 347f264047..eb62f1dab1 100644
+--- a/xen/include/xen/domain.h
++++ b/xen/include/xen/domain.h
+@@ -17,6 +17,7 @@ struct vcpu *alloc_vcpu(
+ struct domain *d, unsigned int vcpu_id, unsigned int cpu_id);
+ struct vcpu *alloc_dom0_vcpu0(struct domain *dom0);
+ int vcpu_reset(struct vcpu *);
++int vcpu_up(struct vcpu *v);
+
+ struct xen_domctl_getdomaininfo;
+ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info);
+--
+2.14.3
+
+
+From 29dd3142bf7115d45836a6de7a72c17a4dac7cc8 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:20 +0000
+Subject: [PATCH 65/77] xen/pvshim: memory hotplug
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+Changes since v1:
+ - Add an order parameter to batch_memory_op.
+ - Add a TODO item regarding high order memory chunks to
+ pv_shim_online_memory.
+ - Use page_list_splice.
+ - Make sure the shim handlers are not called multiple times when
+ the hypercall is preempted.
+---
+ xen/arch/x86/pv/shim.c | 112 ++++++++++++++++++++++++++++++++++++++++++
+ xen/common/memory.c | 21 ++++++++
+ xen/include/asm-x86/pv/shim.h | 10 ++++
+ 3 files changed, 143 insertions(+)
+
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 68ec7bed8e..4120cc550e 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -48,6 +48,9 @@ static unsigned int nr_grant_list;
+ static unsigned long *grant_frames;
+ static DEFINE_SPINLOCK(grant_lock);
+
++static PAGE_LIST_HEAD(balloon);
++static DEFINE_SPINLOCK(balloon_lock);
++
+ static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg);
+ static long pv_shim_grant_table_op(unsigned int cmd,
+ XEN_GUEST_HANDLE_PARAM(void) uop,
+@@ -814,6 +817,115 @@ long pv_shim_cpu_down(void *data)
+ return 0;
+ }
+
++static unsigned long batch_memory_op(unsigned int cmd, unsigned int order,
++ const struct page_list_head *list)
++{
++ struct xen_memory_reservation xmr = {
++ .domid = DOMID_SELF,
++ .extent_order = order,
++ };
++ unsigned long pfns[64];
++ const struct page_info *pg;
++ unsigned long done = 0;
++
++ set_xen_guest_handle(xmr.extent_start, pfns);
++ page_list_for_each ( pg, list )
++ {
++ pfns[xmr.nr_extents++] = page_to_mfn(pg);
++ if ( xmr.nr_extents == ARRAY_SIZE(pfns) || !page_list_next(pg, list) )
++ {
++ long nr = xen_hypercall_memory_op(cmd, &xmr);
++
++ done += nr > 0 ? nr : 0;
++ if ( nr != xmr.nr_extents )
++ break;
++ xmr.nr_extents = 0;
++ }
++ }
++
++ return done;
++}
++
++void pv_shim_online_memory(unsigned int nr, unsigned int order)
++{
++ struct page_info *page, *tmp;
++ PAGE_LIST_HEAD(list);
++
++ spin_lock(&balloon_lock);
++ page_list_for_each_safe ( page, tmp, &balloon )
++ {
++ /* TODO: add support for splitting high order memory chunks. */
++ if ( page->v.free.order != order )
++ continue;
++
++ page_list_del(page, &balloon);
++ page_list_add_tail(page, &list);
++ if ( !--nr )
++ break;
++ }
++ spin_unlock(&balloon_lock);
++
++ if ( nr )
++ gprintk(XENLOG_WARNING,
++ "failed to allocate %u extents of order %u for onlining\n",
++ nr, order);
++
++ nr = batch_memory_op(XENMEM_populate_physmap, order, &list);
++ while ( nr-- )
++ {
++ BUG_ON((page = page_list_remove_head(&list)) == NULL);
++ free_domheap_pages(page, order);
++ }
++
++ if ( !page_list_empty(&list) )
++ {
++ gprintk(XENLOG_WARNING,
++ "failed to online some of the memory regions\n");
++ spin_lock(&balloon_lock);
++ page_list_splice(&list, &balloon);
++ spin_unlock(&balloon_lock);
++ }
++}
++
++void pv_shim_offline_memory(unsigned int nr, unsigned int order)
++{
++ struct page_info *page;
++ PAGE_LIST_HEAD(list);
++
++ while ( nr-- )
++ {
++ page = alloc_domheap_pages(NULL, order, 0);
++ if ( !page )
++ break;
++
++ page_list_add_tail(page, &list);
++ page->v.free.order = order;
++ }
++
++ if ( nr + 1 )
++ gprintk(XENLOG_WARNING,
++ "failed to reserve %u extents of order %u for offlining\n",
++ nr + 1, order);
++
++
++ nr = batch_memory_op(XENMEM_decrease_reservation, order, &list);
++ spin_lock(&balloon_lock);
++ while ( nr-- )
++ {
++ BUG_ON((page = page_list_remove_head(&list)) == NULL);
++ page_list_add_tail(page, &balloon);
++ }
++ spin_unlock(&balloon_lock);
++
++ if ( !page_list_empty(&list) )
++ {
++ gprintk(XENLOG_WARNING,
++ "failed to offline some of the memory regions\n");
++ while ( (page = page_list_remove_head(&list)) != NULL )
++ free_domheap_pages(page, order);
++ }
++}
++
+ domid_t get_initial_domain_id(void)
+ {
+ uint32_t eax, ebx, ecx, edx;
+diff --git a/xen/common/memory.c b/xen/common/memory.c
+index a6ba33fdcb..9eed96a9ce 100644
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -29,6 +29,10 @@
+ #include <public/memory.h>
+ #include <xsm/xsm.h>
+
++#ifdef CONFIG_X86
++#include <asm/guest.h>
++#endif
++
+ struct memop_args {
+ /* INPUT */
+ struct domain *domain; /* Domain to be affected. */
+@@ -1019,6 +1023,12 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void)
arg)
+ return start_extent;
+ }
+
++#ifdef CONFIG_X86
++ if ( pv_shim && op != XENMEM_decrease_reservation &&
!args.preempted )
++ /* Avoid calling pv_shim_online_memory when preempted. */
++ pv_shim_online_memory(args.nr_extents, args.extent_order);
++#endif
++
+ switch ( op )
+ {
+ case XENMEM_increase_reservation:
+@@ -1041,6 +1051,17 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void)
arg)
+ __HYPERVISOR_memory_op, "lh",
+ op | (rc << MEMOP_EXTENT_SHIFT), arg);
+
++#ifdef CONFIG_X86
++ if ( pv_shim && op == XENMEM_decrease_reservation )
++ /*
++ * Only call pv_shim_offline_memory when the hypercall has
++ * finished. Note that nr_done is used to cope in case the
++ * hypercall has failed and only part of the extents where
++ * processed.
++ */
++ pv_shim_offline_memory(args.nr_extents, args.nr_done);
++#endif
++
+ break;
+
+ case XENMEM_exchange:
+diff --git a/xen/include/asm-x86/pv/shim.h b/xen/include/asm-x86/pv/shim.h
+index eb59ddd38a..fb739772df 100644
+--- a/xen/include/asm-x86/pv/shim.h
++++ b/xen/include/asm-x86/pv/shim.h
+@@ -39,6 +39,8 @@ int pv_shim_shutdown(uint8_t reason);
+ void pv_shim_inject_evtchn(unsigned int port);
+ long pv_shim_cpu_up(void *data);
+ long pv_shim_cpu_down(void *data);
++void pv_shim_online_memory(unsigned int nr, unsigned int order);
++void pv_shim_offline_memory(unsigned int nr, unsigned int order);
+ domid_t get_initial_domain_id(void);
+ uint64_t pv_shim_mem(uint64_t avail);
+
+@@ -72,6 +74,14 @@ static inline long pv_shim_cpu_down(void *data)
+ ASSERT_UNREACHABLE();
+ return 0;
+ }
++static inline void pv_shim_online_memory(unsigned int nr, unsigned int order)
++{
++ ASSERT_UNREACHABLE();
++}
++static inline void pv_shim_offline_memory(unsigned int nr, unsigned int order)
++{
++ ASSERT_UNREACHABLE();
++}
+ static inline domid_t get_initial_domain_id(void)
+ {
+ return 0;
+--
+2.14.3
+
+
+From 9d60bc96bef01444e30a9653ebf06b24c5bc8be5 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:20 +0000
+Subject: [PATCH 66/77] xen/shim: modify shim_mem parameter behaviour
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+shim_mem will now account for both the memory used by the hypervisor
+loaded in memory and the free memory slack given to the shim for
+runtime usage.
+
+From experimental testing it seems like the total amount of MiB used
+by the shim (giving it ~1MB of free memory for runtime) is:
+
+memory/113 + 20
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+ docs/misc/xen-command-line.markdown | 13 +++++++------
+ xen/arch/x86/dom0_build.c | 14 +++-----------
+ xen/arch/x86/pv/shim.c | 30 +++++++++++++++++++-----------
+ 3 files changed, 29 insertions(+), 28 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index 9f51710a46..68ec52b5c2 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -1461,16 +1461,17 @@ constructed into a plain unprivileged PV domain.
+ ### shim\_mem (x86)
+ > `= List of ( min:<size> | max:<size> | <size> )`
+
+-Set the amount of memory that xen-shim reserves for itself. Only has effect
+-if pv-shim mode is enabled.
++Set the amount of memory that xen-shim uses. Only has effect if pv-shim mode is
++enabled. Note that this value accounts for the memory used by the shim itself
++plus the free memory slack given to the shim for runtime allocations.
+
+ * `min:<size>` specifies the minimum amount of memory. Ignored if greater
+- than max. Default: 10M.
+-* `max:<size>` specifies the maximum amount of memory. Default: 128M.
++ than max.
++* `max:<size>` specifies the maximum amount of memory.
+ * `<size>` specifies the exact amount of memory. Overrides both min and max.
+
+-By default, 1/16th of total HVM container's memory is reserved for xen-shim
+-with minimum amount being 10MB and maximum amount 128MB.
++By default, the amount of free memory slack given to the shim for runtime usage
++is 1MB.
+
+ ### rcu-idle-timer-period-ms
+ > `= <integer>`
+diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
+index bc713fb2b5..d77c6b40de 100644
+--- a/xen/arch/x86/dom0_build.c
++++ b/xen/arch/x86/dom0_build.c
+@@ -290,17 +290,9 @@ unsigned long __init dom0_compute_nr_pages(
+ * for things like DMA buffers. This reservation is clamped to a
+ * maximum of 128MB.
+ */
+- if ( nr_pages == 0 )
+- {
+- uint64_t rsvd = min(avail / 16, 128UL << (20 - PAGE_SHIFT));
+- if ( pv_shim )
+- {
+- rsvd = pv_shim_mem(avail);
+- printk("Reserved %lu pages for xen-shim\n", rsvd);
+-
+- }
+- nr_pages = -rsvd;
+- }
++ if ( !nr_pages )
++ nr_pages = -(pv_shim ? pv_shim_mem(avail)
++ : min(avail / 16, 128UL << (20 - PAGE_SHIFT)));
+
+ /* Negative specification means "all memory - specified amount". */
+ if ( (long)nr_pages < 0 ) nr_pages += avail;
+diff --git a/xen/arch/x86/pv/shim.c b/xen/arch/x86/pv/shim.c
+index 4120cc550e..702249719e 100644
+--- a/xen/arch/x86/pv/shim.c
++++ b/xen/arch/x86/pv/shim.c
+@@ -57,9 +57,8 @@ static long pv_shim_grant_table_op(unsigned int cmd,
+ unsigned int count);
+
+ /*
+- * By default, 1/16th of total HVM container's memory is reserved for xen-shim
+- * with minimum amount being 10MB and maximum amount 128MB. Some users may wish
+- * to tune this constants for better memory utilization. This can be achieved
++ * By default give the shim 1MB of free memory slack. Some users may wish to
++ * tune this constants for better memory utilization. This can be achieved
+ * using the following xen-shim's command line option:
+ *
+ * shim_mem=[min:<min_amt>,][max:<max_amt>,][<amt>]
+@@ -71,8 +70,8 @@ static long pv_shim_grant_table_op(unsigned int cmd,
+ * (overrides both min and max)
+ */
+ static uint64_t __initdata shim_nrpages;
+-static uint64_t __initdata shim_min_nrpages = 10UL << (20 - PAGE_SHIFT);
+-static uint64_t __initdata shim_max_nrpages = 128UL << (20 - PAGE_SHIFT);
++static uint64_t __initdata shim_min_nrpages;
++static uint64_t __initdata shim_max_nrpages;
+
+ static int __init parse_shim_mem(const char *s)
+ {
+@@ -91,15 +90,24 @@ custom_param("shim_mem", parse_shim_mem);
+
+ uint64_t pv_shim_mem(uint64_t avail)
+ {
+- uint64_t rsvd = min(avail / 16, shim_max_nrpages);
++ if ( !shim_nrpages )
++ {
++ shim_nrpages = max(shim_min_nrpages,
++ total_pages - avail + (1UL << (20 - PAGE_SHIFT)));
++ if ( shim_max_nrpages )
++ shim_max_nrpages = min(shim_nrpages, shim_max_nrpages);
++ }
++
++ if ( total_pages - avail > shim_nrpages )
++ panic("pages used by shim > shim_nrpages (%#lx > %#lx)",
++ total_pages - avail, shim_nrpages);
+
+- if ( shim_nrpages )
+- return shim_nrpages;
++ shim_nrpages -= total_pages - avail;
+
+- if ( shim_min_nrpages <= shim_max_nrpages )
+- rsvd = max(rsvd, shim_min_nrpages);
++ printk("shim used pages %#lx reserving %#lx free pages\n",
++ total_pages - avail, shim_nrpages);
+
+- return rsvd;
++ return shim_nrpages;
+ }
+
+ #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER| \
+--
+2.14.3
+
+
+From b5be9c817d04b006886a0d7b87eacf7bd78f504d Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:20 +0000
+Subject: [PATCH 67/77] xen/pvshim: use default position for the m2p mappings
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When running a 32bit kernel as Dom0 on a 64bit hypervisor the
+hypervisor will try to shrink the hypervisor hole to the minimum
+needed, and thus requires the Dom0 to use XENMEM_machphys_mapping in
+order to fetch the position of the start of the hypervisor virtual
+mappings.
+
+Disable this feature when running as a PV shim, since some DomU
+kernels don't implemented XENMEM_machphys_mapping and break if the m2p
+doesn't begin at the default address.
+
+NB: support for the XENMEM_machphys_mapping was added in Linux by
+commit 7e7750.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Acked-by: Jan Beulich <jbeulich(a)suse.com>
+---
+ xen/arch/x86/pv/dom0_build.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
+index 72752b8656..ebcb47bf26 100644
+--- a/xen/arch/x86/pv/dom0_build.c
++++ b/xen/arch/x86/pv/dom0_build.c
+@@ -398,7 +398,8 @@ int __init dom0_construct_pv(struct domain *d,
+ if ( parms.pae == XEN_PAE_EXTCR3 )
+ set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
+
+- if ( (parms.virt_hv_start_low != UNSET_ADDR) && elf_32bit(&elf) )
++ if ( !pv_shim && (parms.virt_hv_start_low != UNSET_ADDR) &&
++ elf_32bit(&elf) )
+ {
+ unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+ value = (parms.virt_hv_start_low + mask) & ~mask;
+--
+2.14.3
+
+
+From c9083de0ae6b0f5b42e7f92f6d43edc3bd09d4f1 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:21 +0000
+Subject: [PATCH 68/77] xen/shim: crash instead of reboot in shim mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+All guest shutdown operations are forwarded to L0, so the only native
+calls to machine_restart happen from crash related paths inside the
+hypervisor, hence switch the reboot code to instead issue a crash
+shutdown.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Acked-by: Jan Beulich <jbeulich(a)suse.com>
+[ wei: fix arm build ]
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+Changes since v1:
+ - Use the ternary operator.
+---
+ xen/arch/x86/shutdown.c | 7 ++++++-
+ xen/drivers/char/console.c | 4 ++++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/shutdown.c b/xen/arch/x86/shutdown.c
+index 689f6f137d..a350714319 100644
+--- a/xen/arch/x86/shutdown.c
++++ b/xen/arch/x86/shutdown.c
+@@ -642,7 +642,12 @@ void machine_restart(unsigned int delay_millisecs)
+ break;
+
+ case BOOT_XEN:
+- xen_hypercall_shutdown(SHUTDOWN_reboot);
++ /*
++ * When running in PV shim mode guest shutdown calls are
++ * forwarded to L0, hence the only way to get here is if a
++ * shim crash happens.
++ */
++ xen_hypercall_shutdown(pv_shim ? SHUTDOWN_crash : SHUTDOWN_reboot);
+ break;
+ }
+ }
+diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
+index 18f5b7f7aa..121073c8ed 100644
+--- a/xen/drivers/char/console.c
++++ b/xen/drivers/char/console.c
+@@ -1253,7 +1253,11 @@ void panic(const char *fmt, ...)
+ if ( opt_noreboot )
+ printk("Manual reset required ('noreboot' specified)\n");
+ else
++#ifdef CONFIG_X86
++ printk("%s in five seconds...\n", pv_shim ? "Crash" :
"Reboot");
++#else
+ printk("Reboot in five seconds...\n");
++#endif
+
+ spin_unlock_irqrestore(&lock, flags);
+
+--
+2.14.3
+
+
+From 321ef983a06bc14570b79da1ab60344e3feb2c2b Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Thu, 11 Jan 2018 11:41:21 +0000
+Subject: [PATCH 69/77] xen/shim: allow DomU to have as many vcpus as available
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Since the shim VCPUOP_{up/down} hypercall is wired to the plug/unplug
+of CPUs to the shim itself, start the shim DomU with only the BSP
+online, and let the guest bring up other CPUs as it needs them.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+---
+Changes since v1:
+ - Fix single line comment style.
+ - Print Dom%u d->domain_id.
+ - Change position of __start_xen comment.
+---
+ xen/arch/x86/dom0_build.c | 30 +++++++++++++++++++++++++++---
+ xen/arch/x86/pv/dom0_build.c | 2 +-
+ xen/arch/x86/setup.c | 28 ++++++++++++++++++----------
+ 3 files changed, 46 insertions(+), 14 deletions(-)
+
+diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
+index d77c6b40de..b4f4a4ac4a 100644
+--- a/xen/arch/x86/dom0_build.c
++++ b/xen/arch/x86/dom0_build.c
+@@ -138,9 +138,18 @@ struct vcpu *__init dom0_setup_vcpu(struct domain *d,
+
+ if ( v )
+ {
+- if ( !d->is_pinned && !dom0_affinity_relaxed )
+- cpumask_copy(v->cpu_hard_affinity, &dom0_cpus);
+- cpumask_copy(v->cpu_soft_affinity, &dom0_cpus);
++ if ( pv_shim )
++ {
++
++ cpumask_setall(v->cpu_hard_affinity);
++ cpumask_setall(v->cpu_soft_affinity);
++ }
++ else
++ {
++ if ( !d->is_pinned && !dom0_affinity_relaxed )
++ cpumask_copy(v->cpu_hard_affinity, &dom0_cpus);
++ cpumask_copy(v->cpu_soft_affinity, &dom0_cpus);
++ }
+ }
+
+ return v;
+@@ -153,6 +162,21 @@ unsigned int __init dom0_max_vcpus(void)
+ unsigned int i, max_vcpus, limit;
+ nodeid_t node;
+
++ if ( pv_shim )
++ {
++ nodes_setall(dom0_nodes);
++
++ /*
++ * When booting in shim mode APs are not started until the guest brings
++ * other vCPUs up.
++ */
++ cpumask_set_cpu(0, &dom0_cpus);
++
++ /* On PV shim mode allow the guest to have as many CPUs as available. */
++ return nr_cpu_ids;
++ }
++
++
+ for ( i = 0; i < dom0_nr_pxms; ++i )
+ if ( (node = pxm_to_node(dom0_pxms[i])) != NUMA_NO_NODE )
+ node_set(node, dom0_nodes);
+diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
+index ebcb47bf26..5d8909fa13 100644
+--- a/xen/arch/x86/pv/dom0_build.c
++++ b/xen/arch/x86/pv/dom0_build.c
+@@ -701,7 +701,7 @@ int __init dom0_construct_pv(struct domain *d,
+ for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
+ shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1;
+
+- printk("Dom0 has maximum %u VCPUs\n", d->max_vcpus);
++ printk("Dom%u has maximum %u VCPUs\n", d->domain_id, d->max_vcpus);
+
+ cpu = v->processor;
+ for ( i = 1; i < d->max_vcpus; i++ )
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index 7091c38047..cf07e5045d 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -1584,18 +1584,26 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+
+ do_presmp_initcalls();
+
+- for_each_present_cpu ( i )
++ /*
++ * NB: when running as a PV shim VCPUOP_up/down is wired to the shim
++ * physical cpu_add/remove functions, so launch the guest with only
++ * the BSP online and let it bring up the other CPUs as required.
++ */
++ if ( !pv_shim )
+ {
+- /* Set up cpu_to_node[]. */
+- srat_detect_node(i);
+- /* Set up node_to_cpumask based on cpu_to_node[]. */
+- numa_add_cpu(i);
+-
+- if ( (num_online_cpus() < max_cpus) && !cpu_online(i) )
++ for_each_present_cpu ( i )
+ {
+- int ret = cpu_up(i);
+- if ( ret != 0 )
+- printk("Failed to bring up CPU %u (error %d)\n", i, ret);
++ /* Set up cpu_to_node[]. */
++ srat_detect_node(i);
++ /* Set up node_to_cpumask based on cpu_to_node[]. */
++ numa_add_cpu(i);
++
++ if ( (num_online_cpus() < max_cpus) && !cpu_online(i) )
++ {
++ int ret = cpu_up(i);
++ if ( ret != 0 )
++ printk("Failed to bring up CPU %u (error %d)\n", i, ret);
++ }
+ }
+ }
+
+--
+2.14.3
+
+
+From abdde49edc15cc4dc61356d7f3f8f52a2d14e2d8 Mon Sep 17 00:00:00 2001
+From: Ian Jackson <Ian.Jackson(a)eu.citrix.com>
+Date: Thu, 14 Dec 2017 16:16:20 +0000
+Subject: [PATCH 70/77] libxl: pvshim: Provide first-class config settings to
+ enable shim mode
+
+This is API-compatible because old callers are supposed to call
+libxl_*_init to initialise the struct; and the updated function clears
+these members.
+
+It is ABI-compatible because the new fields make this member of the
+guest type union larger but only within the existing size of that
+union.
+
+Unfortunately it is not easy to backport because it depends on the PVH
+domain type. Attempts to avoid use of the PVH domain type involved
+working with two views of the configuration: the "underlying" domain
+type and the "visible" type (and corresponding config info). Also
+there are different sets of config settings for PV and PVH, which
+callers would have to know to set.
+
+And, unfortunately, it will not be possible, with this approach, to
+enable the shim by default for all libxl callers. (Although it could
+perhaps be done in xl.)
+
+For now, our config defaults are:
+ * if enabled, path is "xen-shim" in the xen firmware directory
+ * if enabled, cmdline is the one we are currently debugging with
+
+The debugging arguments will be rationalised in a moment.
+
+Signed-off-by: Ian Jackson <Ian.Jackson(a)eu.citrix.com>
+Signed-off-by: George Dunlap <george.dunlap(a)citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+v2: pvshim, not pvhshim
+ works with type "pvh", not type "pv"
+---
+ tools/libxl/libxl.h | 8 +++++++
+ tools/libxl/libxl_create.c | 15 ++++++++++++
+ tools/libxl/libxl_dom.c | 57 +++++++++++++++++++++++++++++++++++---------
+ tools/libxl/libxl_internal.h | 4 ++++
+ tools/libxl/libxl_types.idl | 5 +++-
+ 5 files changed, 77 insertions(+), 12 deletions(-)
+
+diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
+index 5e9aed739d..9632fd6d2f 100644
+--- a/tools/libxl/libxl.h
++++ b/tools/libxl/libxl.h
+@@ -1101,6 +1101,14 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, const
libxl_mac *src);
+ */
+ #define LIBXL_HAVE_SET_PARAMETERS 1
+
++/*
++ * LIBXL_HAVE_PV_SHIM
++ *
++ * If this is defined, libxl_domain_build_info's pvh type information
++ * contains members pvshim, pvshim_path, pvshim_cmdline.
++ */
++#define LIBXL_HAVE_PV_SHIM 1
++
+ typedef char **libxl_string_list;
+ void libxl_string_list_dispose(libxl_string_list *sl);
+ int libxl_string_list_length(const libxl_string_list *sl);
+diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
+index f15fb215c2..576c61ffab 100644
+--- a/tools/libxl/libxl_create.c
++++ b/tools/libxl/libxl_create.c
+@@ -389,6 +389,18 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc,
+ }
+ break;
+ case LIBXL_DOMAIN_TYPE_PVH:
++ libxl_defbool_setdefault(&b_info->u.pvh.pvshim, false);
++ if (libxl_defbool_val(b_info->u.pvh.pvshim)) {
++ if (!b_info->u.pvh.pvshim_path)
++ b_info->u.pvh.pvshim_path =
++ libxl__sprintf(NOGC, "%s/%s",
++ libxl__xenfirmwaredir_path(),
++ PVSHIM_BASENAME);
++ if (!b_info->u.pvh.pvshim_cmdline)
++ b_info->u.pvh.pvshim_cmdline =
++ libxl__strdup(NOGC, PVSHIM_CMDLINE);
++ }
++
+ break;
+ default:
+ LOG(ERROR, "invalid domain type %s in create info",
+@@ -499,6 +511,9 @@ int libxl__domain_build(libxl__gc *gc,
+
+ break;
+ case LIBXL_DOMAIN_TYPE_PVH:
++ state->shim_path = info->u.pvh.pvshim_path;
++ state->shim_cmdline = info->u.pvh.pvshim_cmdline;
++
+ ret = libxl__build_hvm(gc, domid, d_config, state);
+ if (ret)
+ goto out;
+diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
+index fbbdb9ec2f..b03386409f 100644
+--- a/tools/libxl/libxl_dom.c
++++ b/tools/libxl/libxl_dom.c
+@@ -1025,22 +1025,51 @@ static int libxl__domain_firmware(libxl__gc *gc,
+
+ if (state->pv_kernel.path != NULL &&
+ info->type == LIBXL_DOMAIN_TYPE_PVH) {
+- /* Try to load a kernel instead of the firmware. */
+- if (state->pv_kernel.mapped) {
+- rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
+- state->pv_kernel.size);
++
++ if (state->shim_path) {
++ rc = xc_dom_kernel_file(dom, state->shim_path);
+ if (rc) {
+- LOGE(ERROR, "xc_dom_kernel_mem failed");
++ LOGE(ERROR, "xc_dom_kernel_file failed");
+ goto out;
+ }
++
++ /* We've loaded the shim, so load the kernel as a secondary module */
++ if (state->pv_kernel.mapped) {
++ LOG(WARN, "xc_dom_module_mem, cmdline %s",
++ state->pv_cmdline);
++ rc = xc_dom_module_mem(dom, state->pv_kernel.data,
++ state->pv_kernel.size, state->pv_cmdline);
++ if (rc) {
++ LOGE(ERROR, "xc_dom_kernel_mem failed");
++ goto out;
++ }
++ } else {
++ LOG(WARN, "xc_dom_module_file, path %s cmdline %s",
++ state->pv_kernel.path, state->pv_cmdline);
++ rc = xc_dom_module_file(dom, state->pv_kernel.path,
state->pv_cmdline);
++ if (rc) {
++ LOGE(ERROR, "xc_dom_kernel_file failed");
++ goto out;
++ }
++ }
+ } else {
+- rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
+- if (rc) {
+- LOGE(ERROR, "xc_dom_kernel_file failed");
+- goto out;
++ /* No shim, so load the kernel directly */
++ if (state->pv_kernel.mapped) {
++ rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
++ state->pv_kernel.size);
++ if (rc) {
++ LOGE(ERROR, "xc_dom_kernel_mem failed");
++ goto out;
++ }
++ } else {
++ rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
++ if (rc) {
++ LOGE(ERROR, "xc_dom_kernel_file failed");
++ goto out;
++ }
+ }
+ }
+-
++
+ if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
+ if (state->pv_ramdisk.mapped) {
+ rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
+@@ -1154,8 +1183,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
+
+ xc_dom_loginit(ctx->xch);
+
++ /*
++ * If PVH and we have a shim override, use the shim cmdline.
++ * If PVH and no shim override, use the pv cmdline.
++ * If not PVH, use info->cmdline.
++ */
+ dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
+- state->pv_cmdline : info->cmdline, NULL);
++ (state->shim_path ? state->shim_cmdline :
state->pv_cmdline) :
++ info->cmdline, NULL);
+ if (!dom) {
+ LOGE(ERROR, "xc_dom_allocate failed");
+ rc = ERROR_NOMEM;
+diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
+index bfa95d8619..2454efa621 100644
+--- a/tools/libxl/libxl_internal.h
++++ b/tools/libxl/libxl_internal.h
+@@ -118,6 +118,8 @@
+ #define TAP_DEVICE_SUFFIX "-emu"
+ #define DOMID_XS_PATH "domid"
+ #define INVALID_DOMID ~0
++#define PVSHIM_BASENAME "xen-shim"
++#define PVSHIM_CMDLINE "pv-shim console=xen,pv sched=null loglvl=all
guest_loglvl=all apic_verbosity=debug e820-verbose"
+
+ /* Size macros. */
+ #define __AC(X,Y) (X##Y)
+@@ -1136,6 +1138,8 @@ typedef struct {
+
+ libxl__file_reference pv_kernel;
+ libxl__file_reference pv_ramdisk;
++ const char * shim_path;
++ const char * shim_cmdline;
+ const char * pv_cmdline;
+
+ xen_vmemrange_t *vmemranges;
+diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
+index a239324341..6d060edc0d 100644
+--- a/tools/libxl/libxl_types.idl
++++ b/tools/libxl/libxl_types.idl
+@@ -592,7 +592,10 @@ libxl_domain_build_info = Struct("domain_build_info",[
+ # Use host's E820 for PCI passthrough.
+ ("e820_host", libxl_defbool),
+ ])),
+- ("pvh", None),
++ ("pvh", Struct(None, [("pvshim", libxl_defbool),
++ ("pvshim_path", string),
++ ("pvshim_cmdline", string),
++ ])),
+ ("invalid", None),
+ ], keyvar_init_val = "LIBXL_DOMAIN_TYPE_INVALID")),
+
+--
+2.14.3
+
+
+From ab9e3854ddb2fad2b86aaf5144a26f5569b63cfc Mon Sep 17 00:00:00 2001
+From: Ian Jackson <ian.jackson(a)eu.citrix.com>
+Date: Fri, 5 Jan 2018 15:59:29 +0000
+Subject: [PATCH 71/77] libxl: pvshim: Introduce pvshim_extra
+
+And move the debugging options from the default config into a doc
+comment in libxl_types.idl.
+
+Signed-off-by: Ian Jackson <Ian.Jackson(a)eu.citrix.com>
+---
+v2: pvshim, not pvhshim
+ works with type "pvh", not type "pv"
+---
+ tools/libxl/libxl.h | 2 +-
+ tools/libxl/libxl_create.c | 5 ++++-
+ tools/libxl/libxl_internal.h | 2 +-
+ tools/libxl/libxl_types.idl | 1 +
+ 4 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
+index 9632fd6d2f..3c0ae6de47 100644
+--- a/tools/libxl/libxl.h
++++ b/tools/libxl/libxl.h
+@@ -1105,7 +1105,7 @@ void libxl_mac_copy(libxl_ctx *ctx, libxl_mac *dst, const libxl_mac
*src);
+ * LIBXL_HAVE_PV_SHIM
+ *
+ * If this is defined, libxl_domain_build_info's pvh type information
+- * contains members pvshim, pvshim_path, pvshim_cmdline.
++ * contains members pvshim, pvshim_path, pvshim_cmdline, pvshim_extra.
+ */
+ #define LIBXL_HAVE_PV_SHIM 1
+
+diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
+index 576c61ffab..1fa1d3a621 100644
+--- a/tools/libxl/libxl_create.c
++++ b/tools/libxl/libxl_create.c
+@@ -512,7 +512,10 @@ int libxl__domain_build(libxl__gc *gc,
+ break;
+ case LIBXL_DOMAIN_TYPE_PVH:
+ state->shim_path = info->u.pvh.pvshim_path;
+- state->shim_cmdline = info->u.pvh.pvshim_cmdline;
++ state->shim_cmdline = GCSPRINTF("%s%s%s",
++ info->u.pvh.pvshim_cmdline,
++ info->u.pvh.pvshim_extra ? " " : "",
++ info->u.pvh.pvshim_extra ? info->u.pvh.pvshim_extra :
"");
+
+ ret = libxl__build_hvm(gc, domid, d_config, state);
+ if (ret)
+diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
+index 2454efa621..0f89364466 100644
+--- a/tools/libxl/libxl_internal.h
++++ b/tools/libxl/libxl_internal.h
+@@ -119,7 +119,7 @@
+ #define DOMID_XS_PATH "domid"
+ #define INVALID_DOMID ~0
+ #define PVSHIM_BASENAME "xen-shim"
+-#define PVSHIM_CMDLINE "pv-shim console=xen,pv sched=null loglvl=all
guest_loglvl=all apic_verbosity=debug e820-verbose"
++#define PVSHIM_CMDLINE "pv-shim console=xen,pv sched=null"
+
+ /* Size macros. */
+ #define __AC(X,Y) (X##Y)
+diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
+index 6d060edc0d..d74fac7d30 100644
+--- a/tools/libxl/libxl_types.idl
++++ b/tools/libxl/libxl_types.idl
+@@ -595,6 +595,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
+ ("pvh", Struct(None, [("pvshim", libxl_defbool),
+ ("pvshim_path", string),
+ ("pvshim_cmdline", string),
++ ("pvshim_extra", string), # eg
"loglvl=all guest_loglvl=all apic_verbosity=debug e820-verbose"
+ ])),
+ ("invalid", None),
+ ], keyvar_init_val = "LIBXL_DOMAIN_TYPE_INVALID")),
+--
+2.14.3
+
+
+From 0e2d64ae8f4af4dbd49127107ae6237e7f748c04 Mon Sep 17 00:00:00 2001
+From: Ian Jackson <ian.jackson(a)eu.citrix.com>
+Date: Fri, 22 Dec 2017 16:12:23 +0000
+Subject: [PATCH 72/77] xl: pvshim: Provide and document xl config
+
+Signed-off-by: Ian Jackson <Ian.Jackson(a)eu.citrix.com>
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+v2: pvshim, not pvhshim
+ works with type "pvh", not type "pv"
+ pvshim_etc. options in config are not erroneously ignored
+---
+ docs/man/xl.cfg.pod.5.in | 35 +++++++++++++++++++++++++++++++++++
+ tools/xl/xl_parse.c | 14 ++++++++++++++
+ 2 files changed, 49 insertions(+)
+
+diff --git a/docs/man/xl.cfg.pod.5.in b/docs/man/xl.cfg.pod.5.in
+index b7b91d8627..bf6c266de1 100644
+--- a/docs/man/xl.cfg.pod.5.in
++++ b/docs/man/xl.cfg.pod.5.in
+@@ -508,6 +508,41 @@ Load the specified file as firmware for the guest.
+ Currently there's no firmware available for PVH guests, they should be
+ booted using the B<Direct Kernel Boot> method or the B<bootloader> option.
+
++=over 4
++
++=item B<pvshim=BOOLEAN>
++
++Whether to boot this guest as a PV guest within a PVH container.
++Ie, the guest will experience a PV environment,
++but
++processor hardware extensions are used to
++separate its address space
++to mitigate the Meltdown attack (CVE-2017-5754).
++
++Default is false.
++
++=item B<pvshim_path="PATH">
++
++The PV shim is a specially-built firmware-like executable
++constructed from the hypervisor source tree.
++This option specifies to use a non-default shim.
++Ignored if pvhsim is false.
++
++=item B<pvshim_cmdline="STRING">
++
++Command line for the shim.
++Default is "pv-shim console=xen,pv sched=null".
++Ignored if pvhsim is false.
++
++=item B<pvshim_extra="STRING">
++
++Extra command line arguments for the shim.
++If supplied, appended to the value for pvshim_cmdline.
++Default is empty.
++Ignored if pvhsim is false.
++
++=back
++
+ =head3 Other Options
+
+ =over 4
+diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
+index 9a692d5ae6..fdfe693de1 100644
+--- a/tools/xl/xl_parse.c
++++ b/tools/xl/xl_parse.c
+@@ -964,6 +964,20 @@ void parse_config_data(const char *config_source,
+ xlu_cfg_replace_string(config, "pool", &c_info->pool_name, 0);
+
+ libxl_domain_build_info_init_type(b_info, c_info->type);
++
++ if (b_info->type == LIBXL_DOMAIN_TYPE_PVH) {
++ xlu_cfg_get_defbool(config, "pvshim", &b_info->u.pvh.pvshim,
0);
++ if (!xlu_cfg_get_string(config, "pvshim_path", &buf, 0))
++ xlu_cfg_replace_string(config, "pvshim_path",
++ &b_info->u.pvh.pvshim_path, 0);
++ if (!xlu_cfg_get_string(config, "pvshim_cmdline", &buf, 0))
++ xlu_cfg_replace_string(config, "pvshim_cmdline",
++ &b_info->u.pvh.pvshim_cmdline, 0);
++ if (!xlu_cfg_get_string(config, "pvshim_extra", &buf, 0))
++ xlu_cfg_replace_string(config, "pvshim_extra",
++ &b_info->u.pvh.pvshim_extra, 0);
++ }
++
+ if (blkdev_start)
+ b_info->blkdev_start = strdup(blkdev_start);
+
+--
+2.14.3
+
+
+From 0a515eeb966add7c63d764cabffec3b2f560a588 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Tue, 16 Jan 2018 14:48:53 +0000
+Subject: [PATCH 73/77] xen/pvshim: map vcpu_info earlier for APs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Or else init_percpu_time is going to dereference a NULL pointer when
+trying to access vcpu_info.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Tested-by: George Dunlap <george.dunlap(a)citrix.com>
+---
+ xen/arch/x86/smpboot.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 5c7863035e..5ed82b16a8 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -346,6 +346,9 @@ void start_secondary(void *unused)
+ else
+ microcode_resume_cpu(cpu);
+
++ if ( xen_guest )
++ hypervisor_ap_setup();
++
+ smp_callin();
+
+ init_percpu_time();
+@@ -374,9 +377,6 @@ void start_secondary(void *unused)
+ cpumask_set_cpu(cpu, &cpu_online_map);
+ unlock_vector_lock();
+
+- if ( xen_guest )
+- hypervisor_ap_setup();
+-
+ /* We can take interrupts now: we're officially "up". */
+ local_irq_enable();
+ mtrr_ap_init();
+--
+2.14.3
+
+
+From 6f1979c8e4184f1f2b24b860e30d3b037b2e7f05 Mon Sep 17 00:00:00 2001
+From: Michael Young <m.a.young(a)durham.ac.uk>
+Date: Mon, 15 Jan 2018 21:23:20 +0000
+Subject: [PATCH 74/77] -xen-attach is needed for pvh boot with qemu-xen
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently the boot of a pvh guest using the qemu-xen device model fails
+with the error
+xen emulation not implemented (yet)
+in the qemu-dm log file. This patch adds the missing -xen-attach
+argument.
+
+V2: Use b_info->type != LIBXL_DOMAIN_TYPE_HVM instead of
+ (b_info->type == LIBXL_DOMAIN_TYPE_PV) ||
+ (b_info->type == LIBXL_DOMAIN_TYPE_PVH)
+as recommended by Roger Pau Monné.
+
+Signed-off-by: Michael Young <m.a.young(a)durham.ac.uk>
+Reviewed-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Acked-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ tools/libxl/libxl_dm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
+index a2ea95a9be..a3cddce8b7 100644
+--- a/tools/libxl/libxl_dm.c
++++ b/tools/libxl/libxl_dm.c
+@@ -1021,7 +1021,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+ */
+ flexarray_append(dm_args, "-no-user-config");
+
+- if (b_info->type == LIBXL_DOMAIN_TYPE_PV) {
++ if (b_info->type != LIBXL_DOMAIN_TYPE_HVM) {
+ flexarray_append(dm_args, "-xen-attach");
+ }
+
+--
+2.14.3
+
+
+From 69f4d872e524932d392acd80989c5b776baa4522 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Wed, 17 Jan 2018 10:57:02 +0000
+Subject: [PATCH 75/77] x86/guest: use the vcpu_info area from shared_info
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+If using less than 32 vCPUs (XEN_LEGACY_MAX_VCPUS).
+
+This is a workaround that should allow to boot the shim on hypervisors
+without commit "x86/upcall: inject a spurious event after setting
+upcall vector" as long as less than 32 vCPUs are assigned to the
+shim.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Acked-by: Jan Beulich <JBeulich(a)suse.com>
+---
+ xen/arch/x86/guest/xen.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/guest/xen.c b/xen/arch/x86/guest/xen.c
+index 2a5554ab26..ed8b8c8c7b 100644
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -257,7 +257,8 @@ void __init hypervisor_setup(void)
+ map_shared_info();
+
+ set_vcpu_id();
+- vcpu_info = xzalloc_array(struct vcpu_info, nr_cpu_ids);
++ if ( nr_cpu_ids > XEN_LEGACY_MAX_VCPUS )
++ vcpu_info = xzalloc_array(struct vcpu_info, nr_cpu_ids);
+ if ( map_vcpuinfo() )
+ {
+ xfree(vcpu_info);
+--
+2.14.3
+
+
+From 79f797c3f41c15a74d627a8eabc373ec7b202933 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Wed, 17 Jan 2018 09:48:14 +0000
+Subject: [PATCH 76/77] firmware/shim: fix build process to use POSIX find
+ options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The -printf find option is not POSIX compatible, so replace it with
+another rune.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Acked-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ tools/firmware/xen-dir/Makefile | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tools/firmware/xen-dir/Makefile b/tools/firmware/xen-dir/Makefile
+index adf6c31e8d..de754c752e 100644
+--- a/tools/firmware/xen-dir/Makefile
++++ b/tools/firmware/xen-dir/Makefile
+@@ -21,7 +21,8 @@ linkfarm.stamp: $(DEP_DIRS) $(DEP_FILES) FORCE
+ $(foreach d, $(LINK_DIRS), \
+ (mkdir -p $(D)/$(d); \
+ cd $(D)/$(d); \
+- find $(XEN_ROOT)/$(d)/ -type d -printf "./%P\n" | xargs mkdir -p);)
++ find $(XEN_ROOT)/$(d)/ -type d -exec sh -c \
++ "echo {} | sed 's,^$(XEN_ROOT)/$(d)/,,g' | xargs mkdir -p"
\;);)
+ $(foreach d, $(LINK_DIRS), \
+ (cd $(XEN_ROOT); \
+ find $(d) ! -type l -type f \
+--
+2.14.3
+
+
+From fa23f2aaa24c603f748b49b32378b738d18cc68f Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau(a)citrix.com>
+Date: Wed, 17 Jan 2018 12:00:41 +0000
+Subject: [PATCH 77/77] xen/pvh: place the trampoline at page 0x1
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Since PVH guest jump straight into trampoline_setup trampoline_phys is
+not initialized, thus the trampoline is relocated to address 0.
+
+This works, but has the undesirable effect of having VA 0 mapped to
+MFN 0, which means NULL pointed dereferences no longer trigger a page
+fault.
+
+In order to solve this, place the trampoline at page 0x1 and reserve
+the memory used by it.
+
+Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
+Reviewed-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ xen/arch/x86/boot/head.S | 3 +++
+ xen/arch/x86/mm.c | 9 +++++++--
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/boot/head.S b/xen/arch/x86/boot/head.S
+index 14caca6798..c527910478 100644
+--- a/xen/arch/x86/boot/head.S
++++ b/xen/arch/x86/boot/head.S
+@@ -411,6 +411,9 @@ __pvh_start:
+ /* Skip bootloader setup and bios setup, go straight to trampoline */
+ movb $1, sym_esi(pvh_boot)
+ movb $1, sym_esi(skip_realmode)
++
++ /* Set trampoline_phys to use mfn 1 to avoid having a mapping at VA 0 */
++ movw $0x1000, sym_esi(trampoline_phys)
+ jmp trampoline_setup
+
+ #endif /* CONFIG_PVH_GUEST */
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 371c764027..a8b59617d3 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -292,9 +292,14 @@ void __init arch_init_memory(void)
+ /*
+ * First 1MB of RAM is historically marked as I/O. If we booted PVH,
+ * reclaim the space. Irrespective, leave MFN 0 as special for the sake
+- * of 0 being a very common default value.
++ * of 0 being a very common default value. Also reserve page 0x1 which is
++ * used by the trampoline code on PVH.
+ */
+- for ( i = 0; i < (pvh_boot ? 1 : 0x100); i++ )
++ BUG_ON(pvh_boot && trampoline_phys != 0x1000);
++ for ( i = 0;
++ i < (pvh_boot ? (1 + PFN_UP(trampoline_end - trampoline_start))
++ : 0x100);
++ i++ )
+ share_xen_page_with_guest(mfn_to_page(_mfn(i)),
+ dom_io, XENSHARE_writable);
+
+--
+2.14.3
+
diff --git a/xen.comet.fixes.patch b/xen.comet.fixes.patch
new file mode 100644
index 0000000..2cc0465
--- /dev/null
+++ b/xen.comet.fixes.patch
@@ -0,0 +1,150 @@
+From db3ae8becc2b4f9f544eafa06a7c858c7cc9f029 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Wed, 17 Jan 2018 09:50:27 +0000
+Subject: [PATCH] tools: fix arm build after bdf693ee61b48
+
+The ramdisk fields were removed. We should use modules[0] instead.
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Acked-by: Ian Jackson <ian.jackson(a)eu.citrix.com>
+---
+ tools/libxc/xc_dom_arm.c | 10 +++++-----
+ tools/libxl/libxl_arm.c | 6 +++---
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c
+index fce151d821..5b9eca6087 100644
+--- a/tools/libxc/xc_dom_arm.c
++++ b/tools/libxc/xc_dom_arm.c
+@@ -390,8 +390,8 @@ static int meminit(struct xc_dom_image *dom)
+ const uint64_t kernsize = kernend - kernbase;
+ const uint64_t dtb_size = dom->devicetree_blob ?
+ ROUNDUP(dom->devicetree_size, XC_PAGE_SHIFT) : 0;
+- const uint64_t ramdisk_size = dom->ramdisk_blob ?
+- ROUNDUP(dom->ramdisk_size, XC_PAGE_SHIFT) : 0;
++ const uint64_t ramdisk_size = dom->modules[0].blob ?
++ ROUNDUP(dom->modules[0].size, XC_PAGE_SHIFT) : 0;
+ const uint64_t modsize = dtb_size + ramdisk_size;
+ const uint64_t ram128mb = bankbase[0] + (128<<20);
+
+@@ -483,12 +483,12 @@ static int meminit(struct xc_dom_image *dom)
+ */
+ if ( ramdisk_size )
+ {
+- dom->ramdisk_seg.vstart = modbase;
+- dom->ramdisk_seg.vend = modbase + ramdisk_size;
++ dom->modules[0].seg.vstart = modbase;
++ dom->modules[0].seg.vend = modbase + ramdisk_size;
+
+ DOMPRINTF("%s: ramdisk: 0x%" PRIx64 " -> 0x%" PRIx64
"",
+ __FUNCTION__,
+- dom->ramdisk_seg.vstart, dom->ramdisk_seg.vend);
++ dom->modules[0].seg.vstart, dom->modules[0].seg.vend);
+
+ modbase += ramdisk_size;
+ }
+diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
+index de1840bece..3e46554301 100644
+--- a/tools/libxl/libxl_arm.c
++++ b/tools/libxl/libxl_arm.c
+@@ -923,7 +923,7 @@ next_resize:
+ FDT( fdt_begin_node(fdt, "") );
+
+ FDT( make_root_properties(gc, vers, fdt) );
+- FDT( make_chosen_node(gc, fdt, !!dom->ramdisk_blob, state, info) );
++ FDT( make_chosen_node(gc, fdt, !!dom->modules[0].blob, state, info) );
+ FDT( make_cpus_node(gc, fdt, info->max_vcpus, ainfo) );
+ FDT( make_psci_node(gc, fdt) );
+
+@@ -1053,8 +1053,8 @@ int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
+ int i;
+ const uint64_t bankbase[] = GUEST_RAM_BANK_BASES;
+
+- const struct xc_dom_seg *ramdisk = dom->ramdisk_blob ?
+- &dom->ramdisk_seg : NULL;
++ const struct xc_dom_seg *ramdisk = dom->modules[0].blob ?
++ &dom->modules[0].seg : NULL;
+
+ if (ramdisk) {
+ int chosen, res;
+--
+2.14.3
+
+From 81838c9067ab7f4b89d33f90a71225ffff9800ba Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Wed, 17 Jan 2018 16:43:54 +0000
+Subject: [PATCH] ocaml: fix arm build
+
+ARM doesn't have emulation_flags in the arch_domainconfig.
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+Reviewed-by: Julien Grall <julien.grall(a)linaro.org>
+---
+ tools/ocaml/libs/xc/xenctrl_stubs.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
+index 0b5a2361c0..dd6000caa3 100644
+--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
++++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
+@@ -176,10 +176,14 @@ CAMLprim value stub_xc_domain_create(value xch, value ssidref,
+ break;
+
+ case 1: /* X86 - emulation flags in the block */
++#if defined(__i386__) || defined(__x86_64__)
+ for (l = Field(Field(domconfig, 0), 0);
+ l != Val_none;
+ l = Field(l, 1))
+ config.emulation_flags |= 1u << Int_val(Field(l, 0));
++#else
++ caml_failwith("Unhandled: x86");
++#endif
+ break;
+
+ default:
+@@ -320,6 +324,7 @@ static value alloc_domaininfo(xc_domaininfo_t * info)
+
+ Store_field(result, 15, tmp);
+
++#if defined(__i386__) || defined(__x86_64__)
+ /* emulation_flags: x86_arch_emulation_flags list; */
+ tmp = emul_list = Val_emptylist;
+ for (i = 0; i < 10; i++) {
+@@ -341,6 +346,7 @@ static value alloc_domaininfo(xc_domaininfo_t * info)
+ Store_field(arch_config, 0, x86_arch_config);
+
+ Store_field(result, 16, arch_config);
++#endif
+
+ CAMLreturn(result);
+ }
+--
+2.14.3
+
+From 36c560e7f38130f12a36e8b66b0785fb655fe893 Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2(a)citrix.com>
+Date: Tue, 16 Jan 2018 18:56:45 +0000
+Subject: [PATCH] Don't build xen-shim for 32 bit build host
+
+Signed-off-by: Wei Liu <wei.liu2(a)citrix.com>
+---
+ tools/firmware/Makefile | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/firmware/Makefile b/tools/firmware/Makefile
+index 9387cc0878..b2f011df49 100644
+--- a/tools/firmware/Makefile
++++ b/tools/firmware/Makefile
+@@ -1,7 +1,9 @@
+ XEN_ROOT = $(CURDIR)/../..
+ include $(XEN_ROOT)/tools/Rules.mk
+
++ifneq ($(XEN_TARGET_ARCH),x86_32)
+ CONFIG_PV_SHIM := y
++endif
+
+ # hvmloader is a 32-bit protected mode binary.
+ TARGET := hvmloader/hvmloader
+--
+2.14.3
+
diff --git a/xen.fedora.efi.build.patch b/xen.fedora.efi.build.patch
index a531445..96cfb35 100644
--- a/xen.fedora.efi.build.patch
+++ b/xen.fedora.efi.build.patch
@@ -6,8 +6,8 @@
echo '$(TARGET).efi'; fi)
+LD_EFI ?= $(LD)
- ifneq ($(build_id_linker),)
- notes_phdrs = --notes
+ shim-$(CONFIG_PVH_GUEST) := $(TARGET)-shim
+
@@ -173,20 +174,20 @@
$(TARGET).efi: prelink-efi.o $(note_file) efi.lds efi/relocs-dummy.o
$(BASEDIR)/common/symbols-dummy.o efi/mkreloc
diff --git a/xen.hypervisor.config b/xen.hypervisor.config
index 58c9858..9101058 100644
--- a/xen.hypervisor.config
+++ b/xen.hypervisor.config
@@ -61,6 +61,7 @@ CONFIG_HAS_PCI=y
CONFIG_VIDEO=y
CONFIG_VGA=y
CONFIG_DEFCONFIG_LIST="$ARCH_DEFCONFIG"
+CONFIG_XEN_GUEST=n
#
# Debugging Options
diff --git a/xen.spec b/xen.spec
index 2c0a30b..3b1d309 100644
--- a/xen.spec
+++ b/xen.spec
@@ -1,4 +1,4 @@
-%{!?python_sitearch: %define python_sitearch %(%{__python} -c "from
distutils.sysconfig import get_python_lib; print get_python_lib(1)")}
+%{!?python_sitearch: %define python_sitearch %(/usr/bin/python2 -c "from
distutils.sysconfig import get_python_lib; print get_python_lib(1)")}
# Build ocaml bits unless rpmbuild was run with --without ocaml
# or ocamlopt is missing (the xen makefile doesn't build ocaml bits if it isn't
there)
%define with_ocaml %{?_without_ocaml: 0} %{?!_without_ocaml: 1}
@@ -60,7 +60,7 @@
Summary: Xen is a virtual machine monitor
Name: xen
Version: 4.10.0
-Release: 2%{?dist}
+Release: 3%{?dist}
Group: Development/Libraries
License: GPLv2+ and LGPLv2+ and BSD
URL:
http://xen.org/
@@ -117,6 +117,9 @@ Patch38: qemu.trad.CVE-2017-8309.patch
Patch39: qemu.trad.CVE-2017-9330.patch
Patch40: xen.ocaml.safe-strings.patch
Patch41: xsa253.patch
+Patch42: 4.10.0-shim-comet-3.patch
+Patch43: xen.comet.fixes.patch
+Patch44: xen.xsa254.pti.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
@@ -297,8 +300,6 @@ manage Xen virtual machines.
%prep
%setup -q
%patch1 -p1
-%patch2 -p1
-%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
@@ -327,7 +328,12 @@ manage Xen virtual machines.
%patch34 -p1
%patch37 -p1
%patch40 -p1
-%patch41 -p1
+#%patch41 -p1
+%patch42 -p1
+%patch43 -p1
+%patch44 -p1
+%patch2 -p1
+%patch3 -p1
# qemu-xen-traditional patches
pushd tools/qemu-xen-traditional
@@ -374,7 +380,7 @@ export EXTRA_CFLAGS_QEMU_XEN="$RPM_OPT_FLAGS"
%if %build_hyp
%if %build_crosshyp
%define efi_flags LD_EFI=false
-XEN_TARGET_ARCH=x86_64 make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen
CC="/usr/bin/x86_64-linux-gnu-gcc `echo $RPM_OPT_FLAGS | sed -e 's/-m32//g'
-e 's/-march=i686//g' -e 's/-mtune=atom//g' -e
's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g'`"
+XEN_TARGET_ARCH=x86_64 make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen
CC="/usr/bin/x86_64-linux-gnu-gcc `echo $RPM_OPT_FLAGS | sed -e 's/-m32//g'
-e 's/-march=i686//g' -e 's/-mtune=atom//g' -e
's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g' -e
's/-fstack-clash-protection//g'`"
%else
%ifarch armv7hl
make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="gcc `echo $RPM_OPT_FLAGS |
sed -e 's/-mfloat-abi=hard//g' -e 's/-march=armv7-a//g'`"
@@ -382,7 +388,7 @@ make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="gcc `echo
$RPM_OPT_FLAGS
%ifarch aarch64
make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="gcc $RPM_OPT_FLAGS"
%else
-make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="gcc `echo $RPM_OPT_FLAGSi |
sed -e 's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g'`"
+make %{?_smp_mflags} %{?efi_flags} prefix=/usr xen CC="gcc `echo $RPM_OPT_FLAGS |
sed -e 's/-specs=\/usr\/lib\/rpm\/redhat\/redhat-annobin-cc1//g'`"
%endif
%endif
%endif
@@ -736,6 +742,9 @@ rm -rf %{buildroot}
%ifarch %{ix86} x86_64
%dir /usr/lib/%{name}/boot
/usr/lib/xen/boot/hvmloader
+%ifnarch %{ix86}
+/usr/lib/xen/boot/xen-shim
+%endif
%if %build_stubdom
/usr/lib/xen/boot/ioemu-stubdom.gz
/usr/lib/xen/boot/xenstore-stubdom.gz
@@ -877,6 +886,18 @@ rm -rf %{buildroot}
%endif
%changelog
+* Sun Jan 14 2018 Michael Young <m.a.young(a)durham.ac.uk> - 4.10.0-3
+- fix typo in annobin build fix
+- add 4.10.0-shim-comet-3 shim mitigation for [XSA-254, CVE-2017-5753,
+ CVE-2017-5715, CVE-2017-5754] + build fixes
+ XSA-253 patch included in comet patches
+ CONFIG_XEN_GUEST line needed xen.hypervisor.config for comet
+ delay and adjust xen.use.fedora.ipxe.patch and xen.fedora.efi.build.patch
+ package /usr/lib/xen/boot/xen-shim
+- add Xen page-table isolation (XPTI) mitigation for XSA-254
+- -fstack-clash-protection isn't recognized in hypervisor build x86_64 on i686
+- __python macro is no longer set, replace by /usr/bin/python2
+
* Thu Jan 04 2018 Michael Young <m.a.young(a)durham.ac.uk> - 4.10.0-2
- x86: memory leak with MSR emulation [XSA-253, CVE-2018-5244] (#1531110)
diff --git a/xen.use.fedora.ipxe.patch b/xen.use.fedora.ipxe.patch
index 500bc20..8785393 100644
--- a/xen.use.fedora.ipxe.patch
+++ b/xen.use.fedora.ipxe.patch
@@ -28,6 +28,6 @@
SUBDIRS-$(CONFIG_ROMBIOS) += vgabios
-SUBDIRS-$(CONFIG_ROMBIOS) += etherboot
+#SUBDIRS-$(CONFIG_ROMBIOS) += etherboot
+ SUBDIRS-$(CONFIG_PV_SHIM) += xen-dir
SUBDIRS-y += hvmloader
- LD32BIT-$(CONFIG_FreeBSD) := LD32BIT_FLAG=-melf_i386_fbsd
diff --git a/xen.xsa254.pti.patch b/xen.xsa254.pti.patch
new file mode 100644
index 0000000..5295737
--- /dev/null
+++ b/xen.xsa254.pti.patch
@@ -0,0 +1,1377 @@
+From 910dd005da20f27f3415b7eccdf436874989506b Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 17 Jan 2018 16:54:44 +0100
+Subject: [PATCH 1/5] x86/entry: Remove support for partial cpu_user_regs
+ frames
+
+Save all GPRs on entry to Xen.
+
+The entry_int82() path is via a DPL1 gate, only usable by 32bit PV guests, so
+can get away with only saving the 32bit registers. All other entrypoints can
+be reached from 32 or 64bit contexts.
+
+This is part of XSA-254.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Wei Liu <wei.liu2(a)citrix.com>
+Acked-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: f9eb74789af77e985ae653193f3622263499f674
+master date: 2018-01-05 19:57:07 +0000
+---
+ tools/tests/x86_emulator/x86-emulate.c | 1 -
+ xen/arch/x86/pv/domain.c | 1 -
+ xen/arch/x86/pv/emul-priv-op.c | 2 -
+ xen/arch/x86/x86_64/compat/entry.S | 7 ++-
+ xen/arch/x86/x86_64/entry.S | 12 ++--
+ xen/arch/x86/x86_64/traps.c | 13 ++--
+ xen/arch/x86/x86_emulate.c | 1 -
+ xen/arch/x86/x86_emulate/x86_emulate.c | 8 +--
+ xen/common/wait.c | 1 -
+ xen/include/asm-x86/asm_defns.h | 105 +++------------------------------
+ 10 files changed, 26 insertions(+), 125 deletions(-)
+
+diff --git a/tools/tests/x86_emulator/x86-emulate.c
b/tools/tests/x86_emulator/x86-emulate.c
+index 975ddc7e53..9056610907 100644
+--- a/tools/tests/x86_emulator/x86-emulate.c
++++ b/tools/tests/x86_emulator/x86-emulate.c
+@@ -3,7 +3,6 @@
+ #include <sys/mman.h>
+
+ #define cpu_has_amd_erratum(nr) 0
+-#define mark_regs_dirty(r) ((void)(r))
+ #define cpu_has_mpx false
+ #define read_bndcfgu() 0
+ #define xstate_set_init(what)
+diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
+index 2234128bb3..74e9e667d2 100644
+--- a/xen/arch/x86/pv/domain.c
++++ b/xen/arch/x86/pv/domain.c
+@@ -20,7 +20,6 @@
+ static void noreturn continue_nonidle_domain(struct vcpu *v)
+ {
+ check_wakeup_from_wait();
+- mark_regs_dirty(guest_cpu_user_regs());
+ reset_stack_and_jump(ret_from_intr);
+ }
+
+diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
+index 2f9264548a..5f23c2cfbf 100644
+--- a/xen/arch/x86/pv/emul-priv-op.c
++++ b/xen/arch/x86/pv/emul-priv-op.c
+@@ -337,7 +337,6 @@ static int read_io(unsigned int port, unsigned int bytes,
+ io_emul_stub_t *io_emul =
+ io_emul_stub_setup(poc, ctxt->opcode, port, bytes);
+
+- mark_regs_dirty(ctxt->regs);
+ io_emul(ctxt->regs);
+ return X86EMUL_DONE;
+ }
+@@ -436,7 +435,6 @@ static int write_io(unsigned int port, unsigned int bytes,
+ io_emul_stub_t *io_emul =
+ io_emul_stub_setup(poc, ctxt->opcode, port, bytes);
+
+- mark_regs_dirty(ctxt->regs);
+ io_emul(ctxt->regs);
+ if ( (bytes == 1) && pv_post_outb_hook )
+ pv_post_outb_hook(port, val);
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index ba6e941837..3fea54ee9d 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -16,7 +16,8 @@
+ ENTRY(entry_int82)
+ ASM_CLAC
+ pushq $0
+- SAVE_VOLATILE type=HYPERCALL_VECTOR compat=1
++ movl $HYPERCALL_VECTOR, 4(%rsp)
++ SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
+ CR4_PV32_RESTORE
+
+ GET_CURRENT(bx)
+@@ -60,7 +61,6 @@ compat_test_guest_events:
+ /* %rbx: struct vcpu */
+ compat_process_softirqs:
+ sti
+- andl $~TRAP_regs_partial,UREGS_entry_vector(%rsp)
+ call do_softirq
+ jmp compat_test_all_events
+
+@@ -197,7 +197,8 @@ ENTRY(cstar_enter)
+ pushq $FLAT_USER_CS32
+ pushq %rcx
+ pushq $0
+- SAVE_VOLATILE TRAP_syscall
++ movl $TRAP_syscall, 4(%rsp)
++ SAVE_ALL
+ GET_CURRENT(bx)
+ movq VCPU_domain(%rbx),%rcx
+ cmpb $0,DOMAIN_is_32bit_pv(%rcx)
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 6066ed8b18..1dd9ccf6a2 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -98,7 +98,8 @@ ENTRY(lstar_enter)
+ pushq $FLAT_KERNEL_CS64
+ pushq %rcx
+ pushq $0
+- SAVE_VOLATILE TRAP_syscall
++ movl $TRAP_syscall, 4(%rsp)
++ SAVE_ALL
+ GET_CURRENT(bx)
+ testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
+ jz switch_to_kernel
+@@ -140,7 +141,6 @@ test_guest_events:
+ /* %rbx: struct vcpu */
+ process_softirqs:
+ sti
+- SAVE_PRESERVED
+ call do_softirq
+ jmp test_all_events
+
+@@ -190,7 +190,8 @@ GLOBAL(sysenter_eflags_saved)
+ pushq $3 /* ring 3 null cs */
+ pushq $0 /* null rip */
+ pushq $0
+- SAVE_VOLATILE TRAP_syscall
++ movl $TRAP_syscall, 4(%rsp)
++ SAVE_ALL
+ GET_CURRENT(bx)
+ cmpb $0,VCPU_sysenter_disables_events(%rbx)
+ movq VCPU_sysenter_addr(%rbx),%rax
+@@ -207,7 +208,6 @@ UNLIKELY_END(sysenter_nt_set)
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ UNLIKELY_START(z, sysenter_gpf)
+ movq VCPU_trap_ctxt(%rbx),%rsi
+- SAVE_PRESERVED
+ movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ movl %eax,TRAPBOUNCE_error_code(%rdx)
+ movq TRAP_gp_fault * TRAPINFO_sizeof + TRAPINFO_eip(%rsi),%rax
+@@ -225,7 +225,8 @@ UNLIKELY_END(sysenter_gpf)
+ ENTRY(int80_direct_trap)
+ ASM_CLAC
+ pushq $0
+- SAVE_VOLATILE 0x80
++ movl $0x80, 4(%rsp)
++ SAVE_ALL
+
+ cmpb $0,untrusted_msi(%rip)
+ UNLIKELY_START(ne, msi_check)
+@@ -253,7 +254,6 @@ int80_slow_path:
+ * IDT entry with DPL==0.
+ */
+ movl $((0x80 << 3) | X86_XEC_IDT),UREGS_error_code(%rsp)
+- SAVE_PRESERVED
+ movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ /* A GPF wouldn't have incremented the instruction pointer. */
+ subq $2,UREGS_rip(%rsp)
+diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
+index 2a326be58e..3652f5ff21 100644
+--- a/xen/arch/x86/x86_64/traps.c
++++ b/xen/arch/x86/x86_64/traps.c
+@@ -80,15 +80,10 @@ static void _show_registers(
+ regs->rbp, regs->rsp, regs->r8);
+ printk("r9: %016lx r10: %016lx r11: %016lx\n",
+ regs->r9, regs->r10, regs->r11);
+- if ( !(regs->entry_vector & TRAP_regs_partial) )
+- {
+- printk("r12: %016lx r13: %016lx r14: %016lx\n",
+- regs->r12, regs->r13, regs->r14);
+- printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
+- regs->r15, crs[0], crs[4]);
+- }
+- else
+- printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]);
++ printk("r12: %016lx r13: %016lx r14: %016lx\n",
++ regs->r12, regs->r13, regs->r14);
++ printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
++ regs->r15, crs[0], crs[4]);
+ printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]);
+ printk("fsb: %016lx gsb: %016lx gss: %016lx\n",
+ crs[5], crs[6], crs[7]);
+diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
+index cc334ca8f9..c7ba221d11 100644
+--- a/xen/arch/x86/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate.c
+@@ -11,7 +11,6 @@
+
+ #include <xen/domain_page.h>
+ #include <asm/x86_emulate.h>
+-#include <asm/asm_defns.h> /* mark_regs_dirty() */
+ #include <asm/processor.h> /* current_cpu_info */
+ #include <asm/xstate.h>
+ #include <asm/amd.h> /* cpu_has_amd_erratum() */
+diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c
b/xen/arch/x86/x86_emulate/x86_emulate.c
+index 54a275664a..820495fb9c 100644
+--- a/xen/arch/x86/x86_emulate/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -1956,10 +1956,10 @@ decode_register(
+ case 9: p = ®s->r9; break;
+ case 10: p = ®s->r10; break;
+ case 11: p = ®s->r11; break;
+- case 12: mark_regs_dirty(regs); p = ®s->r12; break;
+- case 13: mark_regs_dirty(regs); p = ®s->r13; break;
+- case 14: mark_regs_dirty(regs); p = ®s->r14; break;
+- case 15: mark_regs_dirty(regs); p = ®s->r15; break;
++ case 12: p = ®s->r12; break;
++ case 13: p = ®s->r13; break;
++ case 14: p = ®s->r14; break;
++ case 15: p = ®s->r15; break;
+ #endif
+ default: BUG(); p = NULL; break;
+ }
+diff --git a/xen/common/wait.c b/xen/common/wait.c
+index 9490a17dc2..c5fc094e2c 100644
+--- a/xen/common/wait.c
++++ b/xen/common/wait.c
+@@ -127,7 +127,6 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
+ unsigned long dummy;
+ u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
+
+- cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
+ ASSERT(wqv->esp == 0);
+
+ /* Save current VCPU affinity; force wakeup on *this* CPU only. */
+diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
+index 388fc93b9d..98192eb4e6 100644
+--- a/xen/include/asm-x86/asm_defns.h
++++ b/xen/include/asm-x86/asm_defns.h
+@@ -17,15 +17,6 @@
+ void ret_from_intr(void);
+ #endif
+
+-#ifdef CONFIG_FRAME_POINTER
+-/* Indicate special exception stack frame by inverting the frame pointer. */
+-#define SETUP_EXCEPTION_FRAME_POINTER(offs) \
+- leaq offs(%rsp),%rbp; \
+- notq %rbp
+-#else
+-#define SETUP_EXCEPTION_FRAME_POINTER(offs)
+-#endif
+-
+ #ifndef NDEBUG
+ #define ASSERT_INTERRUPT_STATUS(x, msg) \
+ pushf; \
+@@ -42,31 +33,6 @@ void ret_from_intr(void);
+ #define ASSERT_INTERRUPTS_DISABLED \
+ ASSERT_INTERRUPT_STATUS(z, "INTERRUPTS DISABLED")
+
+-/*
+- * This flag is set in an exception frame when registers R12-R15 did not get
+- * saved.
+- */
+-#define _TRAP_regs_partial 16
+-#define TRAP_regs_partial (1 << _TRAP_regs_partial)
+-/*
+- * This flag gets set in an exception frame when registers R12-R15 possibly
+- * get modified from their originally saved values and hence need to be
+- * restored even if the normal call flow would restore register values.
+- *
+- * The flag being set implies _TRAP_regs_partial to be unset. Restoring
+- * R12-R15 thus is
+- * - required when this flag is set,
+- * - safe when _TRAP_regs_partial is unset.
+- */
+-#define _TRAP_regs_dirty 17
+-#define TRAP_regs_dirty (1 << _TRAP_regs_dirty)
+-
+-#define mark_regs_dirty(r) ({ \
+- struct cpu_user_regs *r__ = (r); \
+- ASSERT(!((r__)->entry_vector & TRAP_regs_partial)); \
+- r__->entry_vector |= TRAP_regs_dirty; \
+-})
+-
+ #ifdef __ASSEMBLY__
+ # define _ASM_EX(p) p-.
+ #else
+@@ -236,7 +202,7 @@ static always_inline void stac(void)
+ #endif
+
+ #ifdef __ASSEMBLY__
+-.macro SAVE_ALL op
++.macro SAVE_ALL op, compat=0
+ .ifeqs "\op", "CLAC"
+ ASM_CLAC
+ .else
+@@ -255,40 +221,6 @@ static always_inline void stac(void)
+ movq %rdx,UREGS_rdx(%rsp)
+ movq %rcx,UREGS_rcx(%rsp)
+ movq %rax,UREGS_rax(%rsp)
+- movq %r8,UREGS_r8(%rsp)
+- movq %r9,UREGS_r9(%rsp)
+- movq %r10,UREGS_r10(%rsp)
+- movq %r11,UREGS_r11(%rsp)
+- movq %rbx,UREGS_rbx(%rsp)
+- movq %rbp,UREGS_rbp(%rsp)
+- SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+- movq %r12,UREGS_r12(%rsp)
+- movq %r13,UREGS_r13(%rsp)
+- movq %r14,UREGS_r14(%rsp)
+- movq %r15,UREGS_r15(%rsp)
+-.endm
+-
+-/*
+- * Save all registers not preserved by C code or used in entry/exit code. Mark
+- * the frame as partial.
+- *
+- * @type: exception type
+- * @compat: R8-R15 don't need saving, and the frame nevertheless is complete
+- */
+-.macro SAVE_VOLATILE type compat=0
+-.if \compat
+- movl $\type,UREGS_entry_vector-UREGS_error_code(%rsp)
+-.else
+- movl $\type|TRAP_regs_partial,\
+- UREGS_entry_vector-UREGS_error_code(%rsp)
+-.endif
+- addq $-(UREGS_error_code-UREGS_r15),%rsp
+- cld
+- movq %rdi,UREGS_rdi(%rsp)
+- movq %rsi,UREGS_rsi(%rsp)
+- movq %rdx,UREGS_rdx(%rsp)
+- movq %rcx,UREGS_rcx(%rsp)
+- movq %rax,UREGS_rax(%rsp)
+ .if !\compat
+ movq %r8,UREGS_r8(%rsp)
+ movq %r9,UREGS_r9(%rsp)
+@@ -297,20 +229,17 @@ static always_inline void stac(void)
+ .endif
+ movq %rbx,UREGS_rbx(%rsp)
+ movq %rbp,UREGS_rbp(%rsp)
+- SETUP_EXCEPTION_FRAME_POINTER(UREGS_rbp)
+-.endm
+-
+-/*
+- * Complete a frame potentially only partially saved.
+- */
+-.macro SAVE_PRESERVED
+- btrl $_TRAP_regs_partial,UREGS_entry_vector(%rsp)
+- jnc 987f
++#ifdef CONFIG_FRAME_POINTER
++/* Indicate special exception stack frame by inverting the frame pointer. */
++ leaq UREGS_rbp(%rsp), %rbp
++ notq %rbp
++#endif
++.if !\compat
+ movq %r12,UREGS_r12(%rsp)
+ movq %r13,UREGS_r13(%rsp)
+ movq %r14,UREGS_r14(%rsp)
+ movq %r15,UREGS_r15(%rsp)
+-987:
++.endif
+ .endm
+
+ #define LOAD_ONE_REG(reg, compat) \
+@@ -330,7 +259,6 @@ static always_inline void stac(void)
+ */
+ .macro RESTORE_ALL adj=0 compat=0
+ .if !\compat
+- testl $TRAP_regs_dirty,UREGS_entry_vector(%rsp)
+ movq UREGS_r11(%rsp),%r11
+ movq UREGS_r10(%rsp),%r10
+ movq UREGS_r9(%rsp),%r9
+@@ -347,33 +275,16 @@ static always_inline void stac(void)
+ LOAD_ONE_REG(si, \compat)
+ LOAD_ONE_REG(di, \compat)
+ .if !\compat
+- jz 987f
+ movq UREGS_r15(%rsp),%r15
+ movq UREGS_r14(%rsp),%r14
+ movq UREGS_r13(%rsp),%r13
+ movq UREGS_r12(%rsp),%r12
+-#ifndef NDEBUG
+- .subsection 1
+-987: testl $TRAP_regs_partial,UREGS_entry_vector(%rsp)
+- jnz 987f
+- cmpq UREGS_r15(%rsp),%r15
+- jne 789f
+- cmpq UREGS_r14(%rsp),%r14
+- jne 789f
+- cmpq UREGS_r13(%rsp),%r13
+- jne 789f
+- cmpq UREGS_r12(%rsp),%r12
+- je 987f
+-789: BUG /* Corruption of partial register state. */
+- .subsection 0
+-#endif
+ .else
+ xor %r15, %r15
+ xor %r14, %r14
+ xor %r13, %r13
+ xor %r12, %r12
+ .endif
+-987:
+ LOAD_ONE_REG(bp, \compat)
+ LOAD_ONE_REG(bx, \compat)
+ subq $-(UREGS_error_code-UREGS_r15+\adj), %rsp
+--
+2.14.3
+
+
+From 57dc197cf0d36c56ba1d9d32c6a1454bb52605bb Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Date: Wed, 17 Jan 2018 16:56:03 +0100
+Subject: [PATCH 3/5] x86/mm: Always set _PAGE_ACCESSED on L4e updates
+
+Signed-off-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+Reviewed-by: Jan Beulich <jbeulich(a)suse.com>
+master commit: bd61fe94bee0556bc2f64999a4a8315b93f90f21
+master date: 2018-01-15 13:53:16 +0000
+---
+ xen/arch/x86/pv/mm.h | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/pv/mm.h b/xen/arch/x86/pv/mm.h
+index 7502d533c6..976209ba4c 100644
+--- a/xen/arch/x86/pv/mm.h
++++ b/xen/arch/x86/pv/mm.h
+@@ -144,9 +144,21 @@ static inline l3_pgentry_t unadjust_guest_l3e(l3_pgentry_t l3e,
+ static inline l4_pgentry_t adjust_guest_l4e(l4_pgentry_t l4e,
+ const struct domain *d)
+ {
+- if ( likely(l4e_get_flags(l4e) & _PAGE_PRESENT) &&
+- likely(!is_pv_32bit_domain(d)) )
+- l4e_add_flags(l4e, _PAGE_USER);
++ /*
++ * When shadowing an L4 behind the guests back (e.g. for per-pcpu
++ * purposes), we cannot efficiently sync access bit updates from hardware
++ * (on the shadow tables) back into the guest view.
++ *
++ * We therefore unconditionally set _PAGE_ACCESSED even in the guests
++ * view. This will appear to the guest as a CPU which proactively pulls
++ * all valid L4e's into its TLB, which is compatible with the x86 ABI.
++ *
++ * At the time of writing, all PV guests set the access bit anyway, so
++ * this is no actual change in their behaviour.
++ */
++ if ( likely(l4e_get_flags(l4e) & _PAGE_PRESENT) )
++ l4e_add_flags(l4e, (_PAGE_ACCESSED |
++ (is_pv_32bit_domain(d) ? 0 : _PAGE_USER)));
+
+ return l4e;
+ }
+--
+2.14.3
+
+
+From 234f481337ea1a93db968d614649a6bdfdc8418a Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich(a)suse.com>
+Date: Wed, 17 Jan 2018 16:56:57 +0100
+Subject: [PATCH 4/5] x86: Meltdown band-aid against malicious 64-bit PV guests
+
+This is a very simplistic change limiting the amount of memory a running
+64-bit PV guest has mapped (and hence available for attacking): Only the
+mappings of stack, IDT, and TSS are being cloned from the direct map
+into per-CPU page tables. Guest controlled parts of the page tables are
+being copied into those per-CPU page tables upon entry into the guest.
+Cross-vCPU synchronization of top level page table entry changes is
+being effected by forcing other active vCPU-s of the guest into the
+hypervisor.
+
+The change to context_switch() isn't strictly necessary, but there's no
+reason to keep switching page tables once a PV guest is being scheduled
+out.
+
+This isn't providing full isolation yet, but it should be covering all
+pieces of information exposure of which would otherwise require an XSA.
+
+There is certainly much room for improvement, especially of performance,
+here - first and foremost suppressing all the negative effects on AMD
+systems. But in the interest of backportability (including to really old
+hypervisors, which may not even have alternative patching) any such is
+being left out here.
+
+Signed-off-by: Jan Beulich <jbeulich(a)suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+master commit: 5784de3e2067ed73efc2fe42e62831e8ae7f46c4
+master date: 2018-01-16 17:49:03 +0100
+---
+ xen/arch/x86/domain.c | 5 +
+ xen/arch/x86/mm.c | 21 ++++
+ xen/arch/x86/smpboot.c | 198 +++++++++++++++++++++++++++++++++++++
+ xen/arch/x86/x86_64/asm-offsets.c | 2 +
+ xen/arch/x86/x86_64/compat/entry.S | 11 +++
+ xen/arch/x86/x86_64/entry.S | 149 +++++++++++++++++++++++++++-
+ xen/include/asm-x86/asm_defns.h | 30 ++++++
+ xen/include/asm-x86/current.h | 12 +++
+ xen/include/asm-x86/processor.h | 1 +
+ xen/include/asm-x86/x86_64/page.h | 5 +-
+ 10 files changed, 428 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index b44c95b493..f4a3d7445b 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1507,6 +1507,9 @@ void paravirt_ctxt_switch_to(struct vcpu *v)
+ {
+ unsigned long cr4;
+
++ this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] =
++ l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW);
++
+ cr4 = pv_guest_cr4_to_real_cr4(v);
+ if ( unlikely(cr4 != read_cr4()) )
+ write_cr4(cr4);
+@@ -1676,6 +1679,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
+
+ ASSERT(local_irq_is_enabled());
+
++ get_cpu_info()->xen_cr3 = 0;
++
+ cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask);
+ /* Allow at most one CPU at a time to be dirty. */
+ ASSERT(cpumask_weight(&dirty_mask) <= 1);
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index a7a76a71db..6c7d12034b 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -3509,6 +3509,7 @@ long do_mmu_update(
+ struct vcpu *curr = current, *v = curr;
+ struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+ mfn_t map_mfn = INVALID_MFN;
++ bool sync_guest = false;
+ uint32_t xsm_needed = 0;
+ uint32_t xsm_checked = 0;
+ int rc = put_old_guest_table(curr);
+@@ -3663,6 +3664,8 @@ long do_mmu_update(
+ case PGT_l4_page_table:
+ rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++ if ( !rc )
++ sync_guest = true;
+ break;
+ case PGT_writable_page:
+ perfc_incr(writable_mmu_updates);
+@@ -3765,6 +3768,24 @@ long do_mmu_update(
+ if ( va )
+ unmap_domain_page(va);
+
++ if ( sync_guest )
++ {
++ /*
++ * Force other vCPU-s of the affected guest to pick up L4 entry
++ * changes (if any). Issue a flush IPI with empty operation mask to
++ * facilitate this (including ourselves waiting for the IPI to
++ * actually have arrived). Utilize the fact that FLUSH_VA_VALID is
++ * meaningless without FLUSH_CACHE, but will allow to pass the no-op
++ * check in flush_area_mask().
++ */
++ unsigned int cpu = smp_processor_id();
++ cpumask_t *mask = per_cpu(scratch_cpumask, cpu);
++
++ cpumask_andnot(mask, pt_owner->domain_dirty_cpumask, cpumask_of(cpu));
++ if ( !cpumask_empty(mask) )
++ flush_area_mask(mask, ZERO_BLOCK_PTR, FLUSH_VA_VALID);
++ }
++
+ perfc_add(num_page_updates, i);
+
+ out:
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 1609b627ae..b1fbb57a81 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -327,6 +327,9 @@ void start_secondary(void *unused)
+ */
+ spin_debug_disable();
+
++ get_cpu_info()->xen_cr3 = 0;
++ get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
++
+ load_system_tables();
+
+ /* Full exception support from here on in. */
+@@ -635,6 +638,187 @@ void cpu_exit_clear(unsigned int cpu)
+ set_cpu_state(CPU_STATE_DEAD);
+ }
+
++static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
++{
++ unsigned long linear = (unsigned long)ptr, pfn;
++ unsigned int flags;
++ l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) +
++ l3_table_offset(linear);
++ l2_pgentry_t *pl2e;
++ l1_pgentry_t *pl1e;
++
++ if ( linear < DIRECTMAP_VIRT_START )
++ return 0;
++
++ flags = l3e_get_flags(*pl3e);
++ ASSERT(flags & _PAGE_PRESENT);
++ if ( flags & _PAGE_PSE )
++ {
++ pfn = (l3e_get_pfn(*pl3e) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1)) |
++ (PFN_DOWN(linear) & ((1UL << (2 * PAGETABLE_ORDER)) - 1));
++ flags &= ~_PAGE_PSE;
++ }
++ else
++ {
++ pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(linear);
++ flags = l2e_get_flags(*pl2e);
++ ASSERT(flags & _PAGE_PRESENT);
++ if ( flags & _PAGE_PSE )
++ {
++ pfn = (l2e_get_pfn(*pl2e) & ~((1UL << PAGETABLE_ORDER) - 1)) |
++ (PFN_DOWN(linear) & ((1UL << PAGETABLE_ORDER) - 1));
++ flags &= ~_PAGE_PSE;
++ }
++ else
++ {
++ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear);
++ flags = l1e_get_flags(*pl1e);
++ if ( !(flags & _PAGE_PRESENT) )
++ return 0;
++ pfn = l1e_get_pfn(*pl1e);
++ }
++ }
++
++ if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) )
++ {
++ pl3e = alloc_xen_pagetable();
++ if ( !pl3e )
++ return -ENOMEM;
++ clear_page(pl3e);
++ l4e_write(&rpt[root_table_offset(linear)],
++ l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
++ }
++ else
++ pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]);
++
++ pl3e += l3_table_offset(linear);
++
++ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
++ {
++ pl2e = alloc_xen_pagetable();
++ if ( !pl2e )
++ return -ENOMEM;
++ clear_page(pl2e);
++ l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
++ }
++ else
++ {
++ ASSERT(!(l3e_get_flags(*pl3e) & _PAGE_PSE));
++ pl2e = l3e_to_l2e(*pl3e);
++ }
++
++ pl2e += l2_table_offset(linear);
++
++ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
++ {
++ pl1e = alloc_xen_pagetable();
++ if ( !pl1e )
++ return -ENOMEM;
++ clear_page(pl1e);
++ l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR));
++ }
++ else
++ {
++ ASSERT(!(l2e_get_flags(*pl2e) & _PAGE_PSE));
++ pl1e = l2e_to_l1e(*pl2e);
++ }
++
++ pl1e += l1_table_offset(linear);
++
++ if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT )
++ {
++ ASSERT(l1e_get_pfn(*pl1e) == pfn);
++ ASSERT(l1e_get_flags(*pl1e) == flags);
++ }
++ else
++ l1e_write(pl1e, l1e_from_pfn(pfn, flags));
++
++ return 0;
++}
++
++DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
++
++static int setup_cpu_root_pgt(unsigned int cpu)
++{
++ root_pgentry_t *rpt = alloc_xen_pagetable();
++ unsigned int off;
++ int rc;
++
++ if ( !rpt )
++ return -ENOMEM;
++
++ clear_page(rpt);
++ per_cpu(root_pgt, cpu) = rpt;
++
++ rpt[root_table_offset(RO_MPT_VIRT_START)] =
++ idle_pg_table[root_table_offset(RO_MPT_VIRT_START)];
++ /* SH_LINEAR_PT inserted together with guest mappings. */
++ /* PERDOMAIN inserted during context switch. */
++ rpt[root_table_offset(XEN_VIRT_START)] =
++ idle_pg_table[root_table_offset(XEN_VIRT_START)];
++
++ /* Install direct map page table entries for stack, IDT, and TSS. */
++ for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE )
++ rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt);
++
++ if ( !rc )
++ rc = clone_mapping(idt_tables[cpu], rpt);
++ if ( !rc )
++ rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
++
++ return rc;
++}
++
++static void cleanup_cpu_root_pgt(unsigned int cpu)
++{
++ root_pgentry_t *rpt = per_cpu(root_pgt, cpu);
++ unsigned int r;
++
++ if ( !rpt )
++ return;
++
++ per_cpu(root_pgt, cpu) = NULL;
++
++ for ( r = root_table_offset(DIRECTMAP_VIRT_START);
++ r < root_table_offset(HYPERVISOR_VIRT_END); ++r )
++ {
++ l3_pgentry_t *l3t;
++ unsigned int i3;
++
++ if ( !(root_get_flags(rpt[r]) & _PAGE_PRESENT) )
++ continue;
++
++ l3t = l4e_to_l3e(rpt[r]);
++
++ for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; ++i3 )
++ {
++ l2_pgentry_t *l2t;
++ unsigned int i2;
++
++ if ( !(l3e_get_flags(l3t[i3]) & _PAGE_PRESENT) )
++ continue;
++
++ ASSERT(!(l3e_get_flags(l3t[i3]) & _PAGE_PSE));
++ l2t = l3e_to_l2e(l3t[i3]);
++
++ for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; ++i2 )
++ {
++ if ( !(l2e_get_flags(l2t[i2]) & _PAGE_PRESENT) )
++ continue;
++
++ ASSERT(!(l2e_get_flags(l2t[i2]) & _PAGE_PSE));
++ free_xen_pagetable(l2e_to_l1e(l2t[i2]));
++ }
++
++ free_xen_pagetable(l2t);
++ }
++
++ free_xen_pagetable(l3t);
++ }
++
++ free_xen_pagetable(rpt);
++}
++
+ static void cpu_smpboot_free(unsigned int cpu)
+ {
+ unsigned int order, socket = cpu_to_socket(cpu);
+@@ -673,6 +857,8 @@ static void cpu_smpboot_free(unsigned int cpu)
+ free_domheap_page(mfn_to_page(mfn));
+ }
+
++ cleanup_cpu_root_pgt(cpu);
++
+ order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+ free_xenheap_pages(per_cpu(gdt_table, cpu), order);
+
+@@ -728,6 +914,9 @@ static int cpu_smpboot_alloc(unsigned int cpu)
+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE);
+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
+
++ if ( setup_cpu_root_pgt(cpu) )
++ goto oom;
++
+ for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
+ i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
+ if ( cpu_online(i) && cpu_to_node(i) == node )
+@@ -783,6 +972,8 @@ static struct notifier_block cpu_smpboot_nfb = {
+
+ void __init smp_prepare_cpus(unsigned int max_cpus)
+ {
++ int rc;
++
+ register_cpu_notifier(&cpu_smpboot_nfb);
+
+ mtrr_aps_sync_begin();
+@@ -796,6 +987,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+
+ stack_base[0] = stack_start;
+
++ rc = setup_cpu_root_pgt(0);
++ if ( rc )
++ panic("Error %d setting up PV root page table\n", rc);
++ get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
++
+ set_nr_sockets();
+
+ socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets);
+@@ -864,6 +1060,8 @@ void __init smp_prepare_boot_cpu(void)
+ #if NR_CPUS > 2 * BITS_PER_LONG
+ per_cpu(scratch_cpumask, cpu) = &scratch_cpu0mask;
+ #endif
++
++ get_cpu_info()->xen_cr3 = 0;
+ }
+
+ static void
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index e136af6b99..b1a4310974 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -137,6 +137,8 @@ void __dummy__(void)
+ OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
+ OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+ OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
++ OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
++ OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
+ DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+ BLANK();
+
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index 3fea54ee9d..e668f00c36 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -199,6 +199,17 @@ ENTRY(cstar_enter)
+ pushq $0
+ movl $TRAP_syscall, 4(%rsp)
+ SAVE_ALL
++
++ GET_STACK_END(bx)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++ neg %rcx
++ jz .Lcstar_cr3_okay
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++ neg %rcx
++ write_cr3 rcx, rdi, rsi
++ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lcstar_cr3_okay:
++
+ GET_CURRENT(bx)
+ movq VCPU_domain(%rbx),%rcx
+ cmpb $0,DOMAIN_is_32bit_pv(%rcx)
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 1dd9ccf6a2..fc38874b1f 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -37,6 +37,32 @@ ENTRY(switch_to_kernel)
+ /* %rbx: struct vcpu, interrupts disabled */
+ restore_all_guest:
+ ASSERT_INTERRUPTS_DISABLED
++
++ /* Copy guest mappings and switch to per-CPU root page table. */
++ mov %cr3, %r9
++ GET_STACK_END(dx)
++ mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
++ movabs $PADDR_MASK & PAGE_MASK, %rsi
++ movabs $DIRECTMAP_VIRT_START, %rcx
++ mov %rdi, %rax
++ and %rsi, %rdi
++ and %r9, %rsi
++ add %rcx, %rdi
++ add %rcx, %rsi
++ mov $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
++ mov root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
++ mov %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)
++ rep movsq
++ mov $ROOT_PAGETABLE_ENTRIES - \
++ ROOT_PAGETABLE_LAST_XEN_SLOT - 1, %ecx
++ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
++ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rsi
++ sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
++ ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
++ rep movsq
++ mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
++ write_cr3 rax, rdi, rsi
++
+ RESTORE_ALL
+ testw $TRAP_syscall,4(%rsp)
+ jz iret_exit_to_guest
+@@ -71,6 +97,22 @@ iret_exit_to_guest:
+ ALIGN
+ /* No special register assumptions. */
+ restore_all_xen:
++ /*
++ * Check whether we need to switch to the per-CPU page tables, in
++ * case we return to late PV exit code (from an NMI or #MC).
++ */
++ GET_STACK_END(ax)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx
++ mov STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax
++ test %rdx, %rdx
++ /*
++ * Ideally the condition would be "nsz", but such doesn't exist,
++ * so "g" will have to do.
++ */
++UNLIKELY_START(g, exit_cr3)
++ write_cr3 rax, rdi, rsi
++UNLIKELY_END(exit_cr3)
++
+ RESTORE_ALL adj=8
+ iretq
+
+@@ -100,7 +142,18 @@ ENTRY(lstar_enter)
+ pushq $0
+ movl $TRAP_syscall, 4(%rsp)
+ SAVE_ALL
+- GET_CURRENT(bx)
++
++ GET_STACK_END(bx)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++ neg %rcx
++ jz .Llstar_cr3_okay
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++ neg %rcx
++ write_cr3 rcx, rdi, rsi
++ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Llstar_cr3_okay:
++
++ __GET_CURRENT(bx)
+ testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
+ jz switch_to_kernel
+
+@@ -192,7 +245,18 @@ GLOBAL(sysenter_eflags_saved)
+ pushq $0
+ movl $TRAP_syscall, 4(%rsp)
+ SAVE_ALL
+- GET_CURRENT(bx)
++
++ GET_STACK_END(bx)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++ neg %rcx
++ jz .Lsyse_cr3_okay
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++ neg %rcx
++ write_cr3 rcx, rdi, rsi
++ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lsyse_cr3_okay:
++
++ __GET_CURRENT(bx)
+ cmpb $0,VCPU_sysenter_disables_events(%rbx)
+ movq VCPU_sysenter_addr(%rbx),%rax
+ setne %cl
+@@ -228,13 +292,23 @@ ENTRY(int80_direct_trap)
+ movl $0x80, 4(%rsp)
+ SAVE_ALL
+
++ GET_STACK_END(bx)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++ neg %rcx
++ jz .Lint80_cr3_okay
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++ neg %rcx
++ write_cr3 rcx, rdi, rsi
++ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lint80_cr3_okay:
++
+ cmpb $0,untrusted_msi(%rip)
+ UNLIKELY_START(ne, msi_check)
+ movl $0x80,%edi
+ call check_for_unexpected_msi
+ UNLIKELY_END(msi_check)
+
+- GET_CURRENT(bx)
++ __GET_CURRENT(bx)
+
+ /* Check that the callback is non-null. */
+ leaq VCPU_int80_bounce(%rbx),%rdx
+@@ -391,9 +465,27 @@ ENTRY(dom_crash_sync_extable)
+
+ ENTRY(common_interrupt)
+ SAVE_ALL CLAC
++
++ GET_STACK_END(14)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++ mov %rcx, %r15
++ neg %rcx
++ jz .Lintr_cr3_okay
++ jns .Lintr_cr3_load
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++ neg %rcx
++.Lintr_cr3_load:
++ write_cr3 rcx, rdi, rsi
++ xor %ecx, %ecx
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++ testb $3, UREGS_cs(%rsp)
++ cmovnz %rcx, %r15
++.Lintr_cr3_okay:
++
+ CR4_PV32_RESTORE
+ movq %rsp,%rdi
+ callq do_IRQ
++ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+ jmp ret_from_intr
+
+ /* No special register assumptions. */
+@@ -411,6 +503,23 @@ ENTRY(page_fault)
+ /* No special register assumptions. */
+ GLOBAL(handle_exception)
+ SAVE_ALL CLAC
++
++ GET_STACK_END(14)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++ mov %rcx, %r15
++ neg %rcx
++ jz .Lxcpt_cr3_okay
++ jns .Lxcpt_cr3_load
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++ neg %rcx
++.Lxcpt_cr3_load:
++ write_cr3 rcx, rdi, rsi
++ xor %ecx, %ecx
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++ testb $3, UREGS_cs(%rsp)
++ cmovnz %rcx, %r15
++.Lxcpt_cr3_okay:
++
+ handle_exception_saved:
+ GET_CURRENT(bx)
+ testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
+@@ -475,6 +584,7 @@ handle_exception_saved:
+ leaq exception_table(%rip),%rdx
+ PERFC_INCR(exceptions, %rax, %rbx)
+ callq *(%rdx,%rax,8)
++ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+ testb $3,UREGS_cs(%rsp)
+ jz restore_all_xen
+ leaq VCPU_trap_bounce(%rbx),%rdx
+@@ -507,6 +617,7 @@ exception_with_ints_disabled:
+ rep; movsq # make room for ec/ev
+ 1: movq UREGS_error_code(%rsp),%rax # ec/ev
+ movq %rax,UREGS_kernel_sizeof(%rsp)
++ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+ jmp restore_all_xen # return to fixup code
+
+ /* No special register assumptions. */
+@@ -585,6 +696,17 @@ ENTRY(double_fault)
+ movl $TRAP_double_fault,4(%rsp)
+ /* Set AC to reduce chance of further SMAP faults */
+ SAVE_ALL STAC
++
++ GET_STACK_END(bx)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx
++ test %rbx, %rbx
++ jz .Ldblf_cr3_okay
++ jns .Ldblf_cr3_load
++ neg %rbx
++.Ldblf_cr3_load:
++ write_cr3 rbx, rdi, rsi
++.Ldblf_cr3_okay:
++
+ movq %rsp,%rdi
+ call do_double_fault
+ BUG /* do_double_fault() shouldn't return. */
+@@ -603,10 +725,28 @@ ENTRY(nmi)
+ movl $TRAP_nmi,4(%rsp)
+ handle_ist_exception:
+ SAVE_ALL CLAC
++
++ GET_STACK_END(14)
++ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++ mov %rcx, %r15
++ neg %rcx
++ jz .List_cr3_okay
++ jns .List_cr3_load
++ mov %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++ neg %rcx
++.List_cr3_load:
++ write_cr3 rcx, rdi, rsi
++ movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++.List_cr3_okay:
++
+ CR4_PV32_RESTORE
+ testb $3,UREGS_cs(%rsp)
+ jz 1f
+- /* Interrupted guest context. Copy the context to stack bottom. */
++ /*
++ * Interrupted guest context. Clear the restore value for xen_cr3
++ * and copy the context to stack bottom.
++ */
++ xor %r15, %r15
+ GET_CPUINFO_FIELD(guest_cpu_user_regs,di)
+ movq %rsp,%rsi
+ movl $UREGS_kernel_sizeof/8,%ecx
+@@ -616,6 +756,7 @@ handle_ist_exception:
+ movzbl UREGS_entry_vector(%rsp),%eax
+ leaq exception_table(%rip),%rdx
+ callq *(%rdx,%rax,8)
++ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+ cmpb $TRAP_nmi,UREGS_entry_vector(%rsp)
+ jne ret_from_intr
+
+diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
+index 98192eb4e6..fb0fee9286 100644
+--- a/xen/include/asm-x86/asm_defns.h
++++ b/xen/include/asm-x86/asm_defns.h
+@@ -93,9 +93,30 @@ void ret_from_intr(void);
+ UNLIKELY_DONE(mp, tag); \
+ __UNLIKELY_END(tag)
+
++ .equ .Lrax, 0
++ .equ .Lrcx, 1
++ .equ .Lrdx, 2
++ .equ .Lrbx, 3
++ .equ .Lrsp, 4
++ .equ .Lrbp, 5
++ .equ .Lrsi, 6
++ .equ .Lrdi, 7
++ .equ .Lr8, 8
++ .equ .Lr9, 9
++ .equ .Lr10, 10
++ .equ .Lr11, 11
++ .equ .Lr12, 12
++ .equ .Lr13, 13
++ .equ .Lr14, 14
++ .equ .Lr15, 15
++
+ #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field)
+ #define GET_STACK_END(reg) \
++ .if .Lr##reg > 8; \
++ movq $STACK_SIZE-1, %r##reg; \
++ .else; \
+ movl $STACK_SIZE-1, %e##reg; \
++ .endif; \
+ orq %rsp, %r##reg
+
+ #define GET_CPUINFO_FIELD(field, reg) \
+@@ -177,6 +198,15 @@ void ret_from_intr(void);
+ #define ASM_STAC ASM_AC(STAC)
+ #define ASM_CLAC ASM_AC(CLAC)
+
++.macro write_cr3 val:req, tmp1:req, tmp2:req
++ mov %cr4, %\tmp1
++ mov %\tmp1, %\tmp2
++ and $~X86_CR4_PGE, %\tmp1
++ mov %\tmp1, %cr4
++ mov %\val, %cr3
++ mov %\tmp2, %cr4
++.endm
++
+ #define CR4_PV32_RESTORE \
+ 667: ASM_NOP5; \
+ .pushsection .altinstr_replacement, "ax"; \
+diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
+index 89849929eb..b929c48c85 100644
+--- a/xen/include/asm-x86/current.h
++++ b/xen/include/asm-x86/current.h
+@@ -41,6 +41,18 @@ struct cpu_info {
+ struct vcpu *current_vcpu;
+ unsigned long per_cpu_offset;
+ unsigned long cr4;
++ /*
++ * Of the two following fields the latter is being set to the CR3 value
++ * to be used on the given pCPU for loading whenever 64-bit PV guest
++ * context is being entered. The value never changes once set.
++ * The former is the value to restore when re-entering Xen, if any. IOW
++ * its value being zero means there's nothing to restore. However, its
++ * value can also be negative, indicating to the exit-to-Xen code that
++ * restoring is not necessary, but allowing any nested entry code paths
++ * to still know the value to put back into CR3.
++ */
++ unsigned long xen_cr3;
++ unsigned long pv_cr3;
+ /* get_stack_bottom() must be 16-byte aligned */
+ };
+
+diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
+index 41a8d8c32f..2962e83464 100644
+--- a/xen/include/asm-x86/processor.h
++++ b/xen/include/asm-x86/processor.h
+@@ -462,6 +462,7 @@ extern idt_entry_t idt_table[];
+ extern idt_entry_t *idt_tables[];
+
+ DECLARE_PER_CPU(struct tss_struct, init_tss);
++DECLARE_PER_CPU(root_pgentry_t *, root_pgt);
+
+ extern void init_int80_direct_trap(struct vcpu *v);
+
+diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h
+index 6fb7cd5553..05a0334893 100644
+--- a/xen/include/asm-x86/x86_64/page.h
++++ b/xen/include/asm-x86/x86_64/page.h
+@@ -24,8 +24,8 @@
+ /* These are architectural limits. Current CPUs support only 40-bit phys. */
+ #define PADDR_BITS 52
+ #define VADDR_BITS 48
+-#define PADDR_MASK ((1UL << PADDR_BITS)-1)
+-#define VADDR_MASK ((1UL << VADDR_BITS)-1)
++#define PADDR_MASK ((_AC(1,UL) << PADDR_BITS) - 1)
++#define VADDR_MASK ((_AC(1,UL) << VADDR_BITS) - 1)
+
+ #define VADDR_TOP_BIT (1UL << (VADDR_BITS - 1))
+ #define CANONICAL_MASK (~0UL & ~VADDR_MASK)
+@@ -107,6 +107,7 @@ typedef l4_pgentry_t root_pgentry_t;
+ : (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \
+ ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT)))
+
++#define root_table_offset l4_table_offset
+ #define root_get_pfn l4e_get_pfn
+ #define root_get_flags l4e_get_flags
+ #define root_get_intpte l4e_get_intpte
+--
+2.14.3
+
+
+From 7cccd6f748ec724cf9408cec6b3ec8e54a8a2c1f Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich(a)suse.com>
+Date: Wed, 17 Jan 2018 16:57:33 +0100
+Subject: [PATCH 5/5] x86: allow Meltdown band-aid to be disabled
+
+First of all we don't need it on AMD systems. Additionally allow its use
+to be controlled by command line option. For best backportability, this
+intentionally doesn't use alternative instruction patching to achieve
+the intended effect - while we likely want it, this will be later
+follow-up.
+
+Signed-off-by: Jan Beulich <jbeulich(a)suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3(a)citrix.com>
+master commit: e871e80c38547d9faefc6604532ba3e985e65873
+master date: 2018-01-16 17:50:59 +0100
+---
+ docs/misc/xen-command-line.markdown | 12 ++++++++++++
+ xen/arch/x86/domain.c | 7 +++++--
+ xen/arch/x86/mm.c | 2 +-
+ xen/arch/x86/smpboot.c | 17 ++++++++++++++---
+ xen/arch/x86/x86_64/entry.S | 2 ++
+ 5 files changed, 34 insertions(+), 6 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index 781110d4b2..49539b4d1c 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -1849,6 +1849,18 @@ In the case that x2apic is in use, this option switches between
physical and
+ clustered mode. The default, given no hint from the **FADT**, is cluster
+ mode.
+
++### xpti
++> `= <boolean>`
++
++> Default: `false` on AMD hardware
++> Default: `true` everywhere else
++
++Override default selection of whether to isolate 64-bit PV guest page
++tables.
++
++** WARNING: Not yet a complete isolation implementation, but better than
++nothing. **
++
+ ### xsave
+ > `= <boolean>`
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index f4a3d7445b..b357b60f73 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1505,10 +1505,13 @@ void paravirt_ctxt_switch_from(struct vcpu *v)
+
+ void paravirt_ctxt_switch_to(struct vcpu *v)
+ {
++ root_pgentry_t *root_pgt = this_cpu(root_pgt);
+ unsigned long cr4;
+
+- this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] =
+- l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW);
++ if ( root_pgt )
++ root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
++ l4e_from_page(v->domain->arch.perdomain_l3_pg,
++ __PAGE_HYPERVISOR_RW);
+
+ cr4 = pv_guest_cr4_to_real_cr4(v);
+ if ( unlikely(cr4 != read_cr4()) )
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 6c7d12034b..53295f85b7 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -3665,7 +3665,7 @@ long do_mmu_update(
+ rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+ if ( !rc )
+- sync_guest = true;
++ sync_guest = this_cpu(root_pgt);
+ break;
+ case PGT_writable_page:
+ perfc_incr(writable_mmu_updates);
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index b1fbb57a81..edf607f5a2 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -328,7 +328,7 @@ void start_secondary(void *unused)
+ spin_debug_disable();
+
+ get_cpu_info()->xen_cr3 = 0;
+- get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
++ get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0;
+
+ load_system_tables();
+
+@@ -736,14 +736,20 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
+ return 0;
+ }
+
++static __read_mostly int8_t opt_xpti = -1;
++boolean_param("xpti", opt_xpti);
+ DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
+
+ static int setup_cpu_root_pgt(unsigned int cpu)
+ {
+- root_pgentry_t *rpt = alloc_xen_pagetable();
++ root_pgentry_t *rpt;
+ unsigned int off;
+ int rc;
+
++ if ( !opt_xpti )
++ return 0;
++
++ rpt = alloc_xen_pagetable();
+ if ( !rpt )
+ return -ENOMEM;
+
+@@ -987,10 +993,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+
+ stack_base[0] = stack_start;
+
++ if ( opt_xpti < 0 )
++ opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
++
+ rc = setup_cpu_root_pgt(0);
+ if ( rc )
+ panic("Error %d setting up PV root page table\n", rc);
+- get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
++ if ( per_cpu(root_pgt, 0) )
++ get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
+
+ set_nr_sockets();
+
+@@ -1062,6 +1072,7 @@ void __init smp_prepare_boot_cpu(void)
+ #endif
+
+ get_cpu_info()->xen_cr3 = 0;
++ get_cpu_info()->pv_cr3 = 0;
+ }
+
+ static void
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index fc38874b1f..a8825c89df 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -46,6 +46,7 @@ restore_all_guest:
+ movabs $DIRECTMAP_VIRT_START, %rcx
+ mov %rdi, %rax
+ and %rsi, %rdi
++ jz .Lrag_keep_cr3
+ and %r9, %rsi
+ add %rcx, %rdi
+ add %rcx, %rsi
+@@ -62,6 +63,7 @@ restore_all_guest:
+ rep movsq
+ mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
+ write_cr3 rax, rdi, rsi
++.Lrag_keep_cr3:
+
+ RESTORE_ALL
+ testw $TRAP_syscall,4(%rsp)
+--
+2.14.3
+