The package rpms/xen.git has added or updated architecture specific content in its spec file (ExclusiveArch/ExcludeArch or %ifarch/%ifnarch) in commit(s): https://src.fedoraproject.org/cgit/rpms/xen.git/commit/?id=ed166e2fa92d49f41....
Change: +%ifarch %{ix86} x86_64
Thanks.
Full change: ============
commit ed166e2fa92d49f414aa1b960117dbad443d7d7c Author: Michael Young m.a.young@durham.ac.uk Date: Thu Oct 31 20:47:20 2019 +0000
6 security fixes
VCPUOP_initialise DoS [XSA-296, CVE-2019-18420] missing descriptor table limit checking in x86 PV emulation [XSA-298, CVE-2019-18425] Issues with restartable PV type change operations [XSA-299, CVE-2019-18421] add-to-physmap can be abused to DoS Arm hosts [XSA-301, CVE-2019-18423] passed through PCI devices may corrupt host memory after deassignment [XSA-302, CVE-2019-18424] ARM: Interrupts are unconditionally unmasked in exception handlers [XSA-303, CVE-2019-18422]
diff --git a/xen.spec b/xen.spec index d4fba3f..cafbc00 100644 --- a/xen.spec +++ b/xen.spec @@ -58,7 +58,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.12.1 -Release: 3%{?dist} +Release: 4%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%%7Bversion%7D/xen-%%7Bversion%... @@ -114,6 +114,18 @@ Patch41: xen.python.env.patch Patch42: xen.gcc9.fixes.patch Patch43: xen.python3.patch Patch54: xen.python38.patch +Patch55: xsa296.patch +Patch56: xsa298.patch +Patch57: xsa299-4.12-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch +Patch58: xsa301-master-1.patch +Patch59: xsa301-master-2.patch +Patch60: xsa301-master-3.patch +Patch61: xsa302-4.12-0001-IOMMU-add-missing-HVM-check.patch +Patch62: xsa302-4.12-0002-passthrough-quarantine-PCI-devices.patch +Patch63: xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch +Patch64: xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch +Patch65: xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch +Patch66: xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
%if %build_qemutrad @@ -312,6 +324,20 @@ manage Xen virtual machines. %patch42 -p1 %patch43 -p1 %patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 +%patch60 -p1 +%ifarch %{ix86} x86_64 +%patch61 -p1 +%patch62 -p1 +%endif +%patch63 -p1 +%patch64 -p1 +%patch65 -p1 +%patch66 -p1
# qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -875,6 +901,17 @@ fi %endif
%changelog +* Thu Oct 31 2019 Michael Young m.a.young@durham.ac.uk - 4.12.1-4 +- VCPUOP_initialise DoS [XSA-296, CVE-2019-18420] +- missing descriptor table limit checking in x86 PV emulation [XSA-298, + CVE-2019-18425] +- Issues with restartable PV type change operations [XSA-299, CVE-2019-18421] +- add-to-physmap can be abused to DoS Arm hosts [XSA-301, CVE-2019-18423] +- passed through PCI devices may corrupt host memory after deassignment + [XSA-302, CVE-2019-18424] +- ARM: Interrupts are unconditionally unmasked in exception handlers + [XSA-303, CVE-2019-18422] + * Thu Oct 03 2019 Miro Hronok mhroncok@redhat.com - 4.12.1-3 - Rebuilt for Python 3.8.0rc1 (#1748018)
diff --git a/xsa296.patch b/xsa296.patch new file mode 100644 index 0000000..e71ea7f --- /dev/null +++ b/xsa296.patch @@ -0,0 +1,195 @@ +From: Andrew Cooper andrew.cooper3@citrix.com +Subject: xen/hypercall: Don't use BUG() for parameter checking in hypercall_create_continuation() + +Since c/s 1d429034 "hypercall: update vcpu_op to take an unsigned vcpuid", +which incorrectly swapped 'i' for 'u' in the parameter type list, guests have +been able to hit the BUG() in next_args()'s default case. + +Correct these back to 'i'. + +In addition, make adjustments to prevent this class of issue from occurring in +the future - crashing Xen is not an appropriate form of parameter checking. + +Capitalise NEXT_ARG() to catch all uses, to highlight that it is a macro doing +non-function-like things behind the scenes, and undef it when appropriate. +Implement a bad_fmt: block which prints an error, asserts unreachable, and +crashes the guest. + +On the ARM side, drop all parameter checking of p. It is asymmetric with the +x86 side, and akin to expecting memcpy() or sprintf() to check their src/fmt +parameter before use. A caller passing "" or something other than a string +literal will be obvious during code review. + +This is XSA-296. + +Signed-off-by: Andrew Cooper andrew.cooper3@citrix.com +Acked-by: Julien Grall julien.grall@arm.com + +diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c +index 941bbff4fe..a3da8e9c08 100644 +--- a/xen/arch/arm/domain.c ++++ b/xen/arch/arm/domain.c +@@ -383,14 +383,15 @@ void sync_vcpu_execstate(struct vcpu *v) + /* Nothing to do -- no lazy switching */ + } + +-#define next_arg(fmt, args) ({ \ ++#define NEXT_ARG(fmt, args) \ ++({ \ + unsigned long __arg; \ + switch ( *(fmt)++ ) \ + { \ + case 'i': __arg = (unsigned long)va_arg(args, unsigned int); break; \ + case 'l': __arg = (unsigned long)va_arg(args, unsigned long); break; \ + case 'h': __arg = (unsigned long)va_arg(args, void *); break; \ +- default: __arg = 0; BUG(); \ ++ default: goto bad_fmt; \ + } \ + __arg; \ + }) +@@ -405,9 +406,6 @@ unsigned long hypercall_create_continuation( + unsigned int i; + va_list args; + +- /* All hypercalls take at least one argument */ +- BUG_ON( !p || *p == '\0' ); +- + current->hcall_preempted = true; + + va_start(args, format); +@@ -415,7 +413,7 @@ unsigned long hypercall_create_continuation( + if ( mcs->flags & MCSF_in_multicall ) + { + for ( i = 0; *p != '\0'; i++ ) +- mcs->call.args[i] = next_arg(p, args); ++ mcs->call.args[i] = NEXT_ARG(p, args); + + /* Return value gets written back to mcs->call.result */ + rc = mcs->call.result; +@@ -431,7 +429,7 @@ unsigned long hypercall_create_continuation( + + for ( i = 0; *p != '\0'; i++ ) + { +- arg = next_arg(p, args); ++ arg = NEXT_ARG(p, args); + + switch ( i ) + { +@@ -454,7 +452,7 @@ unsigned long hypercall_create_continuation( + + for ( i = 0; *p != '\0'; i++ ) + { +- arg = next_arg(p, args); ++ arg = NEXT_ARG(p, args); + + switch ( i ) + { +@@ -475,8 +473,16 @@ unsigned long hypercall_create_continuation( + va_end(args); + + return rc; ++ ++ bad_fmt: ++ gprintk(XENLOG_ERR, "Bad hypercall continuation format '%c'\n", *p); ++ ASSERT_UNREACHABLE(); ++ domain_crash(current->domain); ++ return 0; + } + ++#undef NEXT_ARG ++ + void startup_cpu_idle_loop(void) + { + struct vcpu *v = current; +diff --git a/xen/arch/x86/hypercall.c b/xen/arch/x86/hypercall.c +index d483dbaa6b..4643e5eb43 100644 +--- a/xen/arch/x86/hypercall.c ++++ b/xen/arch/x86/hypercall.c +@@ -80,14 +80,15 @@ const hypercall_args_t hypercall_args_table[NR_hypercalls] = + #undef COMP + #undef ARGS + +-#define next_arg(fmt, args) ({ \ ++#define NEXT_ARG(fmt, args) \ ++({ \ + unsigned long __arg; \ + switch ( *(fmt)++ ) \ + { \ + case 'i': __arg = (unsigned long)va_arg(args, unsigned int); break; \ + case 'l': __arg = (unsigned long)va_arg(args, unsigned long); break; \ + case 'h': __arg = (unsigned long)va_arg(args, void *); break; \ +- default: __arg = 0; BUG(); \ ++ default: goto bad_fmt; \ + } \ + __arg; \ + }) +@@ -109,7 +110,7 @@ unsigned long hypercall_create_continuation( + if ( mcs->flags & MCSF_in_multicall ) + { + for ( i = 0; *p != '\0'; i++ ) +- mcs->call.args[i] = next_arg(p, args); ++ mcs->call.args[i] = NEXT_ARG(p, args); + } + else + { +@@ -121,7 +122,7 @@ unsigned long hypercall_create_continuation( + { + for ( i = 0; *p != '\0'; i++ ) + { +- arg = next_arg(p, args); ++ arg = NEXT_ARG(p, args); + switch ( i ) + { + case 0: regs->rdi = arg; break; +@@ -137,7 +138,7 @@ unsigned long hypercall_create_continuation( + { + for ( i = 0; *p != '\0'; i++ ) + { +- arg = next_arg(p, args); ++ arg = NEXT_ARG(p, args); + switch ( i ) + { + case 0: regs->rbx = arg; break; +@@ -154,8 +155,16 @@ unsigned long hypercall_create_continuation( + va_end(args); + + return op; ++ ++ bad_fmt: ++ gprintk(XENLOG_ERR, "Bad hypercall continuation format '%c'\n", *p); ++ ASSERT_UNREACHABLE(); ++ domain_crash(curr->domain); ++ return 0; + } + ++#undef NEXT_ARG ++ + int hypercall_xlat_continuation(unsigned int *id, unsigned int nr, + unsigned int mask, ...) + { +diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c +index 39877b3ab2..2531fa7421 100644 +--- a/xen/common/compat/domain.c ++++ b/xen/common/compat/domain.c +@@ -81,7 +81,7 @@ int compat_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) ar + } + + if ( rc == -ERESTART ) +- rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh", ++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", + cmd, vcpuid, arg); + + break; +diff --git a/xen/common/domain.c b/xen/common/domain.c +index 2308588052..65bcd85e34 100644 +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -1411,7 +1411,7 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg) + + rc = arch_initialise_vcpu(v, arg); + if ( rc == -ERESTART ) +- rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh", ++ rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih", + cmd, vcpuid, arg); + + break; diff --git a/xsa298.patch b/xsa298.patch new file mode 100644 index 0000000..aa39042 --- /dev/null +++ b/xsa298.patch @@ -0,0 +1,89 @@ +From: Jan Beulich jbeulich@suse.com +Subject: x86/PV: check GDT/LDT limits during emulation + +Accesses beyond the LDT limit originating from emulation would trigger +the ASSERT() in pv_map_ldt_shadow_page(). On production builds such +accesses would cause an attempt to promote the touched page (offset from +the present LDT base address) to a segment descriptor one. If this +happens to succeed, guest user mode would be able to elevate its +privileges to that of the guest kernel. This is particularly easy when +there's no LDT at all, in which case the LDT base stored internally to +Xen is simply zero. + +Also adjust the ASSERT() that was triggering: It was off by one to +begin with, and for production builds we also better use +ASSERT_UNREACHABLE() instead with suitable recovery code afterwards. + +This is XSA-298. + +Reported-by: Andrew Cooper andrew.cooper3@citrix.com +Signed-off-by: Jan Beulich jbeulich@suse.com +Reviewed-by: Andrew Cooper andrew.cooper3@citrix.com +--- +v2: Correct 64-bit-only limit check (by folding into the common one). + +--- a/xen/arch/x86/pv/emul-gate-op.c ++++ b/xen/arch/x86/pv/emul-gate-op.c +@@ -51,7 +51,13 @@ static int read_gate_descriptor(unsigned + const seg_desc_t *pdesc = gdt_ldt_desc_ptr(gate_sel); + + if ( (gate_sel < 4) || +- ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) || ++ /* ++ * We're interested in call gates only, which occupy a single ++ * seg_desc_t for 32-bit and a consecutive pair of them for 64-bit. ++ */ ++ ((gate_sel >> 3) + !is_pv_32bit_vcpu(v) >= ++ (gate_sel & 4 ? v->arch.pv.ldt_ents ++ : v->arch.pv.gdt_ents)) || + __get_user(desc, pdesc) ) + return 0; + +@@ -70,7 +76,7 @@ static int read_gate_descriptor(unsigned + if ( !is_pv_32bit_vcpu(v) ) + { + if ( (*ar & 0x1f00) != 0x0c00 || +- (gate_sel >= FIRST_RESERVED_GDT_BYTE - 8 && !(gate_sel & 4)) || ++ /* Limit check done above already. */ + __get_user(desc, pdesc + 1) || + (desc.b & 0x1f00) ) + return 0; +--- a/xen/arch/x86/pv/emulate.c ++++ b/xen/arch/x86/pv/emulate.c +@@ -31,7 +31,14 @@ int pv_emul_read_descriptor(unsigned int + { + seg_desc_t desc; + +- if ( sel < 4) ++ if ( sel < 4 || ++ /* ++ * Don't apply the GDT limit here, as the selector may be a Xen ++ * provided one. __get_user() will fail (without taking further ++ * action) for ones falling in the gap between guest populated ++ * and Xen ones. ++ */ ++ ((sel & 4) && (sel >> 3) >= v->arch.pv.ldt_ents) ) + desc.b = desc.a = 0; + else if ( __get_user(desc, gdt_ldt_desc_ptr(sel)) ) + return 0; +--- a/xen/arch/x86/pv/mm.c ++++ b/xen/arch/x86/pv/mm.c +@@ -92,12 +92,16 @@ bool pv_map_ldt_shadow_page(unsigned int + BUG_ON(unlikely(in_irq())); + + /* +- * Hardware limit checking should guarantee this property. NB. This is ++ * Prior limit checking should guarantee this property. NB. This is + * safe as updates to the LDT can only be made by MMUEXT_SET_LDT to the + * current vcpu, and vcpu_reset() will block until this vcpu has been + * descheduled before continuing. + */ +- ASSERT((offset >> 3) <= curr->arch.pv.ldt_ents); ++ if ( unlikely((offset >> 3) >= curr->arch.pv.ldt_ents) ) ++ { ++ ASSERT_UNREACHABLE(); ++ return false; ++ } + + if ( is_pv_32bit_domain(currd) ) + linear = (uint32_t)linear; diff --git a/xsa299-4.12-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch b/xsa299-4.12-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch new file mode 100644 index 0000000..fbb9d80 --- /dev/null +++ b/xsa299-4.12-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch @@ -0,0 +1,94 @@ +From 33d051917d5ef38f678b507a3c832afde48b9b49 Mon Sep 17 00:00:00 2001 +From: George Dunlap george.dunlap@citrix.com +Date: Thu, 10 Oct 2019 17:57:49 +0100 +Subject: [PATCH 01/11] x86/mm: L1TF checks don't leave a partial entry + +On detection of a potential L1TF issue, most validation code returns +-ERESTART to allow the switch to shadow mode to happen and cause the +original operation to be restarted. + +However, in the validation code, the return value -ERESTART has been +repurposed to indicate 1) the function has partially completed +something which needs to be undone, and 2) calling put_page_type() +should cleanly undo it. This causes problems in several places. + +For L1 tables, on receiving an -ERESTART return from alloc_l1_table(), +alloc_page_type() will set PGT_partial on the page. If for some +reason the original operation never restarts, then on domain +destruction, relinquish_memory() will call free_page_type() on the +page. + +Unfortunately, alloc_ and free_l1_table() aren't set up to deal with +PGT_partial. When returning a failure, alloc_l1_table() always +de-validates whatever it's validated so far, and free_l1_table() +always devalidates the whole page. This means that if +relinquish_memory() calls free_page_type() on an L1 that didn't +complete due to an L1TF, it will call put_page_from_l1e() on "page +entries" that have never been validated. + +For L2+ tables, setting rc to ERESTART causes the rest of the +alloc_lN_table() function to *think* that the entry in question will +have PGT_partial set. This will cause it to set partial_pte = 1. If +relinqush_memory() then calls free_page_type() on one of those pages, +then free_lN_table() will call put_page_from_lNe() on the entry when +it shouldn't. + +Rather than indicating -ERESTART, indicate -EINTR. This is the code +to indicate that nothing has changed from when you started the call +(which is effectively how alloc_l1_table() handles errors). + +mod_lN_entry() shouldn't have any of these types of problems, so leave +potential changes there for a clean-up patch later. + +This is part of XSA-299. + +Reported-by: George Dunlap george.dunlap@citrix.com +Signed-off-by: George Dunlap george.dunlap@citrix.com +Reviewed-by: Jan Beulich jbeulich@suse.com +--- + xen/arch/x86/mm.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 3557cd1178..a1b55c10ff 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1409,7 +1409,7 @@ static int alloc_l1_table(struct page_info *page) + { + if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) ) + { +- ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -ERESTART : 0; ++ ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -EINTR : 0; + if ( ret ) + goto out; + } +@@ -1517,7 +1517,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) + { + if ( !pv_l1tf_check_l2e(d, l2e) ) + continue; +- rc = -ERESTART; ++ rc = -EINTR; + } + else + rc = get_page_from_l2e(l2e, pfn, d, partial); +@@ -1603,7 +1603,7 @@ static int alloc_l3_table(struct page_info *page) + { + if ( !pv_l1tf_check_l3e(d, l3e) ) + continue; +- rc = -ERESTART; ++ rc = -EINTR; + } + else + rc = get_page_from_l3e(l3e, pfn, d, partial); +@@ -1783,7 +1783,7 @@ static int alloc_l4_table(struct page_info *page) + { + if ( !pv_l1tf_check_l4e(d, l4e) ) + continue; +- rc = -ERESTART; ++ rc = -EINTR; + } + else + rc = get_page_from_l4e(l4e, pfn, d, partial); +-- +2.23.0 + diff --git a/xsa301-master-1.patch b/xsa301-master-1.patch new file mode 100644 index 0000000..54cce2c --- /dev/null +++ b/xsa301-master-1.patch @@ -0,0 +1,80 @@ +From 19d6330f142cb941b6340a88592e8a294de0ff8c Mon Sep 17 00:00:00 2001 +From: Julien Grall julien.grall@arm.com +Date: Tue, 15 Oct 2019 17:10:40 +0100 +Subject: [PATCH 1/3] xen/arm: p2m: Avoid aliasing guest physical frame + +The P2M helpers implementation is quite lax and will end up to ignore +the unused top bits of a guest physical frame. + +This effectively means that p2m_set_entry() will create a mapping for a +different frame (it is always equal to gfn & (mask unused bits)). Yet +p2m->max_mapped_gfn will be updated using the original frame. + +At the moment, p2m_get_entry() and p2m_resolve_translation_fault() +assume that p2m_get_root_pointer() will always return a non-NULL pointer +when the GFN is smaller than p2m->max_mapped_gfn. + +Unfortunately, because of the aliasing described above, it would be +possible to set p2m->max_mapped_gfn high enough so it covers frame that +would lead p2m_get_root_pointer() to return NULL. + +As we don't sanity check the guest physical frame provided by a guest, a +malicious guest could craft a series of hypercalls that will hit the +BUG_ON() and therefore DoS Xen. + +To prevent aliasing, the function p2m_get_root_pointer() is now reworked +to return NULL If any of the unused top bits are not zero. The caller +can then decide what's the appropriate action to do. Since the two paths +(i.e. P2M_ROOT_PAGES == 1 and P2M_ROOT_PAGES != 1) are now very +similarly, take the opportunity to consolidate them making the code a +bit simpler. + +With this change, p2m_get_entry() will not try to insert a mapping as +the root pointer is invalid. + +Note that root_table is now switch to unsigned long as unsigned int is +not enough to hold part of a GFN. + +This is part of XSA-301. + +Reported-by: Julien Grall Julien.Grall@arm.com +Signed-off-by: Julien Grall julien.grall@arm.com +Reviewed-by: Stefano Stabellini sstabellini@kernel.org +--- + xen/arch/arm/p2m.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c +index a2749d9b6f..d0045a8b28 100644 +--- a/xen/arch/arm/p2m.c ++++ b/xen/arch/arm/p2m.c +@@ -229,21 +229,14 @@ void p2m_tlb_flush_sync(struct p2m_domain *p2m) + static lpae_t *p2m_get_root_pointer(struct p2m_domain *p2m, + gfn_t gfn) + { +- unsigned int root_table; +- +- if ( P2M_ROOT_PAGES == 1 ) +- return __map_domain_page(p2m->root); ++ unsigned long root_table; + + /* +- * Concatenated root-level tables. The table number will be the +- * offset at the previous level. It is not possible to +- * concatenate a level-0 root. ++ * While the root table index is the offset from the previous level, ++ * we can't use (P2M_ROOT_LEVEL - 1) because the root level might be ++ * 0. Yet we still want to check if all the unused bits are zeroed. + */ +- ASSERT(P2M_ROOT_LEVEL > 0); +- +- root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL - 1]); +- root_table &= LPAE_ENTRY_MASK; +- ++ root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL] + LPAE_SHIFT); + if ( root_table >= P2M_ROOT_PAGES ) + return NULL; + +-- +2.23.0 + diff --git a/xsa301-master-2.patch b/xsa301-master-2.patch new file mode 100644 index 0000000..baedc9c --- /dev/null +++ b/xsa301-master-2.patch @@ -0,0 +1,92 @@ +From 3b896936f7505e929dd869d14afcb185d0ee75f8 Mon Sep 17 00:00:00 2001 +From: Julien Grall julien.grall@arm.com +Date: Tue, 15 Oct 2019 17:10:41 +0100 +Subject: [PATCH 2/3] xen/arm: p2m: Avoid off-by-one check on + p2m->max_mapped_gfn + +The code base is using inconsistently the field p2m->max_mapped_gfn. +Some of the useres expect that p2m->max_guest_gfn contain the highest +mapped GFN while others expect highest + 1. + +p2m->max_guest_gfn is set as highest + 1, because of that the sanity +check on the GFN in p2m_resolved_translation_fault() and +p2m_get_entry() can be bypassed when GFN == p2m->max_guest_gfn. + +p2m_get_root_pointer(p2m->max_guest_gfn) may return NULL if it is +outside of address range supported and therefore the BUG_ON() could be +hit. + +The current value hold in p2m->max_mapped_gfn is inconsistent with the +expectation of the common code (see domain_get_maximum_gpfn()) and also +the documentation of the field. + +Rather than changing the check in p2m_translation_fault() and +p2m_get_entry(), p2m->max_mapped_gfn is now containing the highest +mapped GFN and the callers assuming "highest + 1" are now adjusted. + +Take the opportunity to use 1UL rather than 1 as page_order could +theoritically big enough to overflow a 32-bit integer. + +Lastly, the documentation of the field max_guest_gfn to reflect how it +is computed. + +This is part of XSA-301. + +Reported-by: Julien Grall Julien.Grall@arm.com +Signed-off-by: Julien Grall julien.grall@arm.com +Reviewed-by: Stefano Stabellini sstabellini@kernel.org +--- + xen/arch/arm/p2m.c | 6 +++--- + xen/include/asm-arm/p2m.h | 5 +---- + 2 files changed, 4 insertions(+), 7 deletions(-) + +diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c +index d0045a8b28..8d20d27961 100644 +--- a/xen/arch/arm/p2m.c ++++ b/xen/arch/arm/p2m.c +@@ -1041,7 +1041,7 @@ static int __p2m_set_entry(struct p2m_domain *p2m, + p2m_write_pte(entry, pte, p2m->clean_pte); + + p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn, +- gfn_add(sgfn, 1 << page_order)); ++ gfn_add(sgfn, (1UL << page_order) - 1)); + p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn); + } + +@@ -1572,7 +1572,7 @@ int relinquish_p2m_mapping(struct domain *d) + p2m_write_lock(p2m); + + start = p2m->lowest_mapped_gfn; +- end = p2m->max_mapped_gfn; ++ end = gfn_add(p2m->max_mapped_gfn, 1); + + for ( ; gfn_x(start) < gfn_x(end); + start = gfn_next_boundary(start, order) ) +@@ -1641,7 +1641,7 @@ int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end) + p2m_read_lock(p2m); + + start = gfn_max(start, p2m->lowest_mapped_gfn); +- end = gfn_min(end, p2m->max_mapped_gfn); ++ end = gfn_min(end, gfn_add(p2m->max_mapped_gfn, 1)); + + next_block_gfn = start; + +diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h +index 89f82df380..5fdb6e8183 100644 +--- a/xen/include/asm-arm/p2m.h ++++ b/xen/include/asm-arm/p2m.h +@@ -36,10 +36,7 @@ struct p2m_domain { + /* Current Translation Table Base Register for the p2m */ + uint64_t vttbr; + +- /* +- * Highest guest frame that's ever been mapped in the p2m +- * Only takes into account ram and foreign mapping +- */ ++ /* Highest guest frame that's ever been mapped in the p2m */ + gfn_t max_mapped_gfn; + + /* +-- +2.23.0 + diff --git a/xsa301-master-3.patch b/xsa301-master-3.patch new file mode 100644 index 0000000..9f137b8 --- /dev/null +++ b/xsa301-master-3.patch @@ -0,0 +1,67 @@ +From 060c2dd3b7c2674a019d94afb2b4ebf3663f6c6e Mon Sep 17 00:00:00 2001 +From: Julien Grall julien.grall@arm.com +Date: Tue, 15 Oct 2019 17:10:42 +0100 +Subject: [PATCH 3/3] xen/arm: p2m: Don't check the return of + p2m_get_root_pointer() with BUG_ON() + +It turns out that the BUG_ON() was actually reachable with well-crafted +hypercalls. The BUG_ON() is here to prevent catch logical error, so +crashing Xen is a bit over the top. + +While all the holes should now be fixed, it would be better to downgrade +the BUG_ON() to something less fatal to prevent any more DoS. + +The BUG_ON() in p2m_get_entry() is now replaced by ASSERT_UNREACHABLE() +to catch mistake in debug build and return INVALID_MFN for production +build. The interface also requires to set page_order to give an idea of +the size of "hole". So 'level' is now set so we report a hole of size of +the an entry of the root page-table. This stays inline with what happen +when the GFN is higher than p2m->max_mapped_gfn. + +The BUG_ON() in p2m_resolve_translation_fault() is now replaced by +ASSERT_UNREACHABLE() to catch mistake in debug build and just report a +fault for producion build. + +This is part of XSA-301. + +Reported-by: Julien Grall Julien.Grall@arm.com +Signed-off-by: Julien Grall julien.grall@arm.com +Reviewed-by: Stefano Stabellini sstabellini@kernel.org +--- + xen/arch/arm/p2m.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c +index 8d20d27961..ce59f2b503 100644 +--- a/xen/arch/arm/p2m.c ++++ b/xen/arch/arm/p2m.c +@@ -395,7 +395,12 @@ mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn, + * the table should always be non-NULL because the gfn is below + * p2m->max_mapped_gfn and the root table pages are always present. + */ +- BUG_ON(table == NULL); ++ if ( !table ) ++ { ++ ASSERT_UNREACHABLE(); ++ level = P2M_ROOT_LEVEL; ++ goto out; ++ } + + for ( level = P2M_ROOT_LEVEL; level < 3; level++ ) + { +@@ -1196,7 +1201,11 @@ bool p2m_resolve_translation_fault(struct domain *d, gfn_t gfn) + * The table should always be non-NULL because the gfn is below + * p2m->max_mapped_gfn and the root table pages are always present. + */ +- BUG_ON(table == NULL); ++ if ( !table ) ++ { ++ ASSERT_UNREACHABLE(); ++ goto out; ++ } + + /* + * Go down the page-tables until an entry has the valid bit unset or +-- +2.23.0 + diff --git a/xsa302-4.12-0001-IOMMU-add-missing-HVM-check.patch b/xsa302-4.12-0001-IOMMU-add-missing-HVM-check.patch new file mode 100644 index 0000000..5d52163 --- /dev/null +++ b/xsa302-4.12-0001-IOMMU-add-missing-HVM-check.patch @@ -0,0 +1,37 @@ +From 0c9c0fbb356e3210cb77b3d738be50981b26058a Mon Sep 17 00:00:00 2001 +From: Jan Beulich jbeulich@suse.com +Date: Wed, 2 Oct 2019 13:36:59 +0200 +Subject: [PATCH 1/2] IOMMU: add missing HVM check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix an unguarded d->arch.hvm access in assign_device(). + +Signed-off-by: Jan Beulich jbeulich@suse.com +Reviewed-by: Roger Pau Monn roger.pau@citrix.com +Acked-by: Andrew Cooper andrew.cooper3@citrix.com + +(cherry picked from commit 41fd1009cd7416b73d745a77c24b4e8d1a296fe6) +Signed-off-by: Ian Jackson ian.jackson@eu.citrix.com +--- + xen/drivers/passthrough/pci.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c +index 8108ed5f9a..d7420bd8bf 100644 +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -1452,7 +1452,8 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) + + /* Prevent device assign if mem paging or mem sharing have been + * enabled for this domain */ +- if ( unlikely(d->arch.hvm.mem_sharing_enabled || ++ if ( unlikely((is_hvm_domain(d) && ++ d->arch.hvm.mem_sharing_enabled) || + vm_event_check_ring(d->vm_event_paging) || + p2m_get_hostp2m(d)->global_logdirty) ) + return -EXDEV; +-- +2.11.0 + diff --git a/xsa302-4.12-0002-passthrough-quarantine-PCI-devices.patch b/xsa302-4.12-0002-passthrough-quarantine-PCI-devices.patch new file mode 100644 index 0000000..181ece3 --- /dev/null +++ b/xsa302-4.12-0002-passthrough-quarantine-PCI-devices.patch @@ -0,0 +1,499 @@ +From 278d8e585a9f110a1af0bd92a9fc43733c9c7227 Mon Sep 17 00:00:00 2001 +From: Paul Durrant paul.durrant@citrix.com +Date: Mon, 14 Oct 2019 17:52:59 +0100 +Subject: [PATCH 2/2] passthrough: quarantine PCI devices + +When a PCI device is assigned to an untrusted domain, it is possible for +that domain to program the device to DMA to an arbitrary address. The +IOMMU is used to protect the host from malicious DMA by making sure that +the device addresses can only target memory assigned to the guest. However, +when the guest domain is torn down the device is assigned back to dom0, +thus allowing any in-flight DMA to potentially target critical host data. + +This patch introduces a 'quarantine' for PCI devices using dom_io. When +the toolstack makes a device assignable (by binding it to pciback), it +will now also assign it to DOMID_IO and the device will only be assigned +back to dom0 when the device is made unassignable again. Whilst device is +assignable it will only ever transfer between dom_io and guest domains. +dom_io is actually only used as a sentinel domain for quarantining purposes; +it is not configured with any IOMMU mappings. Assignment to dom_io simply +means that the device's initiator (requestor) identifier is not present in +the IOMMU's device table and thus any DMA transactions issued will be +terminated with a fault condition. + +In addition, a fix to assignment handling is made for VT-d. Failure +during the assignment step should not lead to a device still being +associated with its prior owner. Hand the device to DomIO temporarily, +until the assignment step has completed successfully. Remove the PI +hooks from the source domain then earlier as well. + +Failure of the recovery reassign_device_ownership() may not go silent: +There e.g. may still be left over RMRR mappings in the domain assignment +to which has failed, and hence we can't allow that domain to continue +executing. + +NOTE: This patch also includes one printk() cleanup; the + "XEN_DOMCTL_assign_device: " tag is dropped in iommu_do_pci_domctl(), + since similar printk()-s elsewhere also don't log such a tag. + +This is XSA-302. + +Signed-off-by: Paul Durrant paul.durrant@citrix.com +Signed-off-by: Jan Beulich jbeulich@suse.com +Signed-off-by: Ian Jackson ian.jackson@eu.citrix.com +(cherry picked from commit ec99857f59f7f06236f11ca8b0b2303e5e745cc4) +--- + tools/libxl/libxl_pci.c | 25 +++++++++++- + xen/arch/x86/mm.c | 2 + + xen/common/domctl.c | 14 ++++++- + xen/drivers/passthrough/amd/pci_amd_iommu.c | 10 ++++- + xen/drivers/passthrough/iommu.c | 9 +++++ + xen/drivers/passthrough/pci.c | 59 ++++++++++++++++++++++------- + xen/drivers/passthrough/vtd/iommu.c | 40 ++++++++++++++++--- + xen/include/xen/pci.h | 3 ++ + 8 files changed, 138 insertions(+), 24 deletions(-) + +diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c +index 88c324ea23..d6a23fb5f8 100644 +--- a/tools/libxl/libxl_pci.c ++++ b/tools/libxl/libxl_pci.c +@@ -754,6 +754,7 @@ static int libxl__device_pci_assignable_add(libxl__gc *gc, + libxl_device_pci *pcidev, + int rebind) + { ++ libxl_ctx *ctx = libxl__gc_owner(gc); + unsigned dom, bus, dev, func; + char *spath, *driver_path = NULL; + int rc; +@@ -779,7 +780,7 @@ static int libxl__device_pci_assignable_add(libxl__gc *gc, + } + if ( rc ) { + LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func); +- return 0; ++ goto quarantine; + } + + /* Check to see if there's already a driver that we need to unbind from */ +@@ -810,6 +811,19 @@ static int libxl__device_pci_assignable_add(libxl__gc *gc, + return ERROR_FAIL; + } + ++quarantine: ++ /* ++ * DOMID_IO is just a sentinel domain, without any actual mappings, ++ * so always pass XEN_DOMCTL_DEV_RDM_RELAXED to avoid assignment being ++ * unnecessarily denied. ++ */ ++ rc = xc_assign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev), ++ XEN_DOMCTL_DEV_RDM_RELAXED); ++ if ( rc < 0 ) { ++ LOG(ERROR, "failed to quarantine "PCI_BDF, dom, bus, dev, func); ++ return ERROR_FAIL; ++ } ++ + return 0; + } + +@@ -817,9 +831,18 @@ static int libxl__device_pci_assignable_remove(libxl__gc *gc, + libxl_device_pci *pcidev, + int rebind) + { ++ libxl_ctx *ctx = libxl__gc_owner(gc); + int rc; + char *driver_path; + ++ /* De-quarantine */ ++ rc = xc_deassign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev)); ++ if ( rc < 0 ) { ++ LOG(ERROR, "failed to de-quarantine "PCI_BDF, pcidev->domain, pcidev->bus, ++ pcidev->dev, pcidev->func); ++ return ERROR_FAIL; ++ } ++ + /* Unbind from pciback */ + if ( (rc=pciback_dev_is_assigned(gc, pcidev)) < 0 ) { + return ERROR_FAIL; +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 3557cd1178..11d753d8d2 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -295,9 +295,11 @@ void __init arch_init_memory(void) + * Initialise our DOMID_IO domain. + * This domain owns I/O pages that are within the range of the page_info + * array. Mappings occur at the priv of the caller. ++ * Quarantined PCI devices will be associated with this domain. + */ + dom_io = domain_create(DOMID_IO, NULL, false); + BUG_ON(IS_ERR(dom_io)); ++ INIT_LIST_HEAD(&dom_io->arch.pdev_list); + + /* + * Initialise our COW domain. +diff --git a/xen/common/domctl.c b/xen/common/domctl.c +index d08b6274e2..e3c4be2b48 100644 +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c +@@ -391,6 +391,16 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) + + switch ( op->cmd ) + { ++ case XEN_DOMCTL_assign_device: ++ case XEN_DOMCTL_deassign_device: ++ if ( op->domain == DOMID_IO ) ++ { ++ d = dom_io; ++ break; ++ } ++ else if ( op->domain == DOMID_INVALID ) ++ return -ESRCH; ++ /* fall through */ + case XEN_DOMCTL_test_assign_device: + if ( op->domain == DOMID_INVALID ) + { +@@ -412,7 +422,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) + + if ( !domctl_lock_acquire() ) + { +- if ( d ) ++ if ( d && d != dom_io ) + rcu_unlock_domain(d); + return hypercall_create_continuation( + __HYPERVISOR_domctl, "h", u_domctl); +@@ -1074,7 +1084,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) + domctl_lock_release(); + + domctl_out_unlock_domonly: +- if ( d ) ++ if ( d && d != dom_io ) + rcu_unlock_domain(d); + + if ( copyback && __copy_to_guest(u_domctl, op, 1) ) +diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c +index 33a3798f36..15c13e1163 100644 +--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c ++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c +@@ -120,6 +120,10 @@ static void amd_iommu_setup_domain_device( + u8 bus = pdev->bus; + const struct domain_iommu *hd = dom_iommu(domain); + ++ /* dom_io is used as a sentinel for quarantined devices */ ++ if ( domain == dom_io ) ++ return; ++ + BUG_ON( !hd->arch.root_table || !hd->arch.paging_mode || + !iommu->dev_table.buffer ); + +@@ -277,6 +281,10 @@ void amd_iommu_disable_domain_device(struct domain *domain, + int req_id; + u8 bus = pdev->bus; + ++ /* dom_io is used as a sentinel for quarantined devices */ ++ if ( domain == dom_io ) ++ return; ++ + BUG_ON ( iommu->dev_table.buffer == NULL ); + req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn)); + dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); +@@ -363,7 +371,7 @@ static int amd_iommu_assign_device(struct domain *d, u8 devfn, + ivrs_mappings[req_id].read_permission); + } + +- return reassign_device(hardware_domain, d, devfn, pdev); ++ return reassign_device(pdev->domain, d, devfn, pdev); + } + + static void deallocate_next_page_table(struct page_info *pg, int level) +diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c +index a6697d58fb..2762e1342f 100644 +--- a/xen/drivers/passthrough/iommu.c ++++ b/xen/drivers/passthrough/iommu.c +@@ -232,6 +232,9 @@ void iommu_teardown(struct domain *d) + { + struct domain_iommu *hd = dom_iommu(d); + ++ if ( d == dom_io ) ++ return; ++ + hd->status = IOMMU_STATUS_disabled; + hd->platform_ops->teardown(d); + tasklet_schedule(&iommu_pt_cleanup_tasklet); +@@ -241,6 +244,9 @@ int iommu_construct(struct domain *d) + { + struct domain_iommu *hd = dom_iommu(d); + ++ if ( d == dom_io ) ++ return 0; ++ + if ( hd->status == IOMMU_STATUS_initialized ) + return 0; + +@@ -521,6 +527,9 @@ int __init iommu_setup(void) + printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis"); + if ( iommu_enabled ) + { ++ if ( iommu_domain_init(dom_io) ) ++ panic("Could not set up quarantine\n"); ++ + printk(" - Dom0 mode: %s\n", + iommu_hwdom_passthrough ? "Passthrough" : + iommu_hwdom_strict ? "Strict" : "Relaxed"); +diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c +index d7420bd8bf..d66a8a1daf 100644 +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -1426,19 +1426,29 @@ static int iommu_remove_device(struct pci_dev *pdev) + return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev)); + } + +-/* +- * If the device isn't owned by the hardware domain, it means it already +- * has been assigned to other domain, or it doesn't exist. +- */ + static int device_assigned(u16 seg, u8 bus, u8 devfn) + { + struct pci_dev *pdev; ++ int rc = 0; + + pcidevs_lock(); +- pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn); ++ ++ pdev = pci_get_pdev(seg, bus, devfn); ++ ++ if ( !pdev ) ++ rc = -ENODEV; ++ /* ++ * If the device exists and it is not owned by either the hardware ++ * domain or dom_io then it must be assigned to a guest, or be ++ * hidden (owned by dom_xen). ++ */ ++ else if ( pdev->domain != hardware_domain && ++ pdev->domain != dom_io ) ++ rc = -EBUSY; ++ + pcidevs_unlock(); + +- return pdev ? 0 : -EBUSY; ++ return rc; + } + + static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) +@@ -1452,7 +1462,8 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) + + /* Prevent device assign if mem paging or mem sharing have been + * enabled for this domain */ +- if ( unlikely((is_hvm_domain(d) && ++ if ( d != dom_io && ++ unlikely((is_hvm_domain(d) && + d->arch.hvm.mem_sharing_enabled) || + vm_event_check_ring(d->vm_event_paging) || + p2m_get_hostp2m(d)->global_logdirty) ) +@@ -1468,12 +1479,20 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) + return rc; + } + +- pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn); ++ pdev = pci_get_pdev(seg, bus, devfn); ++ ++ rc = -ENODEV; + if ( !pdev ) +- { +- rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV; + goto done; +- } ++ ++ rc = 0; ++ if ( d == pdev->domain ) ++ goto done; ++ ++ rc = -EBUSY; ++ if ( pdev->domain != hardware_domain && ++ pdev->domain != dom_io ) ++ goto done; + + if ( pdev->msix ) + msixtbl_init(d); +@@ -1496,6 +1515,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) + } + + done: ++ /* The device is assigned to dom_io so mark it as quarantined */ ++ if ( !rc && d == dom_io ) ++ pdev->quarantine = true; ++ + if ( !has_arch_pdevs(d) && has_iommu_pt(d) ) + iommu_teardown(d); + pcidevs_unlock(); +@@ -1508,6 +1531,7 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) + { + const struct domain_iommu *hd = dom_iommu(d); + struct pci_dev *pdev = NULL; ++ struct domain *target; + int ret = 0; + + if ( !iommu_enabled || !hd->platform_ops ) +@@ -1518,12 +1542,16 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) + if ( !pdev ) + return -ENODEV; + ++ /* De-assignment from dom_io should de-quarantine the device */ ++ target = (pdev->quarantine && pdev->domain != dom_io) ? ++ dom_io : hardware_domain; ++ + while ( pdev->phantom_stride ) + { + devfn += pdev->phantom_stride; + if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) + break; +- ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn, ++ ret = hd->platform_ops->reassign_device(d, target, devfn, + pci_to_dev(pdev)); + if ( !ret ) + continue; +@@ -1534,7 +1562,7 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) + } + + devfn = pdev->devfn; +- ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn, ++ ret = hd->platform_ops->reassign_device(d, target, devfn, + pci_to_dev(pdev)); + if ( ret ) + { +@@ -1544,6 +1572,9 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn) + return ret; + } + ++ if ( pdev->domain == hardware_domain ) ++ pdev->quarantine = false; ++ + pdev->fault.count = 0; + + if ( !has_arch_pdevs(d) && has_iommu_pt(d) ) +@@ -1722,7 +1753,7 @@ int iommu_do_pci_domctl( + ret = hypercall_create_continuation(__HYPERVISOR_domctl, + "h", u_domctl); + else if ( ret ) +- printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: " ++ printk(XENLOG_G_ERR + "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n", + seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + d->domain_id, ret); +diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c +index 1db1cd9f2d..a8d1baa064 100644 +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -1338,6 +1338,10 @@ int domain_context_mapping_one( + int agaw, rc, ret; + bool_t flush_dev_iotlb; + ++ /* dom_io is used as a sentinel for quarantined devices */ ++ if ( domain == dom_io ) ++ return 0; ++ + ASSERT(pcidevs_locked()); + spin_lock(&iommu->lock); + maddr = bus_to_context_maddr(iommu, bus); +@@ -1573,6 +1577,10 @@ int domain_context_unmap_one( + int iommu_domid, rc, ret; + bool_t flush_dev_iotlb; + ++ /* dom_io is used as a sentinel for quarantined devices */ ++ if ( domain == dom_io ) ++ return 0; ++ + ASSERT(pcidevs_locked()); + spin_lock(&iommu->lock); + +@@ -1705,6 +1713,10 @@ static int domain_context_unmap(struct domain *domain, u8 devfn, + goto out; + } + ++ /* dom_io is used as a sentinel for quarantined devices */ ++ if ( domain == dom_io ) ++ goto out; ++ + /* + * if no other devices under the same iommu owned by this domain, + * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp +@@ -2441,6 +2453,15 @@ static int reassign_device_ownership( + if ( ret ) + return ret; + ++ if ( devfn == pdev->devfn ) ++ { ++ list_move(&pdev->domain_list, &dom_io->arch.pdev_list); ++ pdev->domain = dom_io; ++ } ++ ++ if ( !has_arch_pdevs(source) ) ++ vmx_pi_hooks_deassign(source); ++ + if ( !has_arch_pdevs(target) ) + vmx_pi_hooks_assign(target); + +@@ -2459,15 +2480,13 @@ static int reassign_device_ownership( + pdev->domain = target; + } + +- if ( !has_arch_pdevs(source) ) +- vmx_pi_hooks_deassign(source); +- + return ret; + } + + static int intel_iommu_assign_device( + struct domain *d, u8 devfn, struct pci_dev *pdev, u32 flag) + { ++ struct domain *s = pdev->domain; + struct acpi_rmrr_unit *rmrr; + int ret = 0, i; + u16 bdf, seg; +@@ -2510,8 +2529,8 @@ static int intel_iommu_assign_device( + } + } + +- ret = reassign_device_ownership(hardware_domain, d, devfn, pdev); +- if ( ret ) ++ ret = reassign_device_ownership(s, d, devfn, pdev); ++ if ( ret || d == dom_io ) + return ret; + + /* Setup rmrr identity mapping */ +@@ -2524,11 +2543,20 @@ static int intel_iommu_assign_device( + ret = rmrr_identity_mapping(d, 1, rmrr, flag); + if ( ret ) + { +- reassign_device_ownership(d, hardware_domain, devfn, pdev); ++ int rc; ++ ++ rc = reassign_device_ownership(d, s, devfn, pdev); + printk(XENLOG_G_ERR VTDPREFIX + " cannot map reserved region (%"PRIx64",%"PRIx64"] for Dom%d (%d)\n", + rmrr->base_address, rmrr->end_address, + d->domain_id, ret); ++ if ( rc ) ++ { ++ printk(XENLOG_ERR VTDPREFIX ++ " failed to reclaim %04x:%02x:%02x.%u from %pd (%d)\n", ++ seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), d, rc); ++ domain_crash(d); ++ } + break; + } + } +diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h +index 8b21e8dc84..a031fd6020 100644 +--- a/xen/include/xen/pci.h ++++ b/xen/include/xen/pci.h +@@ -88,6 +88,9 @@ struct pci_dev { + + nodeid_t node; /* NUMA node */ + ++ /* Device to be quarantined, don't automatically re-assign to dom0 */ ++ bool quarantine; ++ + /* Device with errata, ignore the BARs. */ + bool ignore_bars; + +-- +2.11.0 + diff --git a/xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch b/xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch new file mode 100644 index 0000000..afb1096 --- /dev/null +++ b/xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch @@ -0,0 +1,74 @@ +From c8cb33fa64c9ccbfa2a494a9dad2e0a763c09176 Mon Sep 17 00:00:00 2001 +From: Julien Grall julien.grall@arm.com +Date: Tue, 1 Oct 2019 13:07:53 +0100 +Subject: [PATCH 1/4] xen/arm32: entry: Split __DEFINE_ENTRY_TRAP in two + +The preprocessing macro __DEFINE_ENTRY_TRAP is used to generate trap +entry function. While the macro is fairly small today, follow-up patches +will increase the size signicantly. + +In general, assembly macros are more readable as they allow you to name +parameters and avoid ''. So the actual implementation of the trap is +now switched to an assembly macro. + +This is part of XSA-303. + +Reported-by: Julien Grall Julien.Grall@arm.com +Signed-off-by: Julien Grall julien.grall@arm.com +Reviewed-by: Stefano Stabellini sstabellini@kernel.org +Reviewed-by: Andre Przywara andre.przywara@arm.com +--- + xen/arch/arm/arm32/entry.S | 34 +++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 15 deletions(-) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 0b4cd19abd..4a762e04f1 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -126,24 +126,28 @@ abort_guest_exit_end: + skip_check: + mov pc, lr + +-/* +- * Macro to define trap entry. The iflags corresponds to the list of +- * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask. +- */ ++ /* ++ * Macro to define trap entry. The iflags corresponds to the list of ++ * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask. ++ */ ++ .macro vector trap, iflags ++ SAVE_ALL ++ cpsie \iflags ++ adr lr, return_from_trap ++ mov r0, sp ++ /* ++ * Save the stack pointer in r11. It will be restored after the ++ * trap has been handled (see return_from_trap). ++ */ ++ mov r11, sp ++ bic sp, #7 /* Align the stack pointer (noop on guest trap) */ ++ b do_trap_\trap ++ .endm ++ + #define __DEFINE_TRAP_ENTRY(trap, iflags) \ + ALIGN; \ + trap_##trap: \ +- SAVE_ALL; \ +- cpsie iflags; \ +- adr lr, return_from_trap; \ +- mov r0, sp; \ +- /* \ +- * Save the stack pointer in r11. It will be restored after the \ +- * trap has been handled (see return_from_trap). \ +- */ \ +- mov r11, sp; \ +- bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \ +- b do_trap_##trap ++ vector trap, iflags + + /* Trap handler which unmask IRQ/Abort, keep FIQ masked */ + #define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai) +-- +2.11.0 + diff --git a/xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch b/xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch new file mode 100644 index 0000000..35f9c04 --- /dev/null +++ b/xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch @@ -0,0 +1,97 @@ +From be7379207c83fa74f8a6c22a8ea213f02714776f Mon Sep 17 00:00:00 2001 +From: Julien Grall julien.grall@arm.com +Date: Tue, 1 Oct 2019 13:15:48 +0100 +Subject: [PATCH 2/4] xen/arm32: entry: Fold the macro SAVE_ALL in the macro + vector + +Follow-up rework will require the macro vector to distinguish between +a trap from a guest vs while in the hypervisor. + +The macro SAVE_ALL already has code to distinguish between the two and +it is only called by the vector macro. So fold the former into the +latter. This will help to avoid duplicating the check. + +This is part of XSA-303. + +Reported-by: Julien Grall Julien.Grall@arm.com +Signed-off-by: Julien Grall julien.grall@arm.com +Reviewed-by: Stefano Stabellini sstabellini@kernel.org +Reviewed-by: Andre Przywara andre.przywara@arm.com +--- + xen/arch/arm/arm32/entry.S | 46 +++++++++++++++++++++++----------------------- + 1 file changed, 23 insertions(+), 23 deletions(-) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 4a762e04f1..150cbc0b4b 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -13,27 +13,6 @@ + #define RESTORE_BANKED(mode) \ + RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; RESTORE_ONE_BANKED(SPSR_##mode) + +-#define SAVE_ALL \ +- sub sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */ \ +- push {r0-r12}; /* Save R0-R12 */ \ +- \ +- mrs r11, ELR_hyp; /* ELR_hyp is return address. */\ +- str r11, [sp, #UREGS_pc]; \ +- \ +- str lr, [sp, #UREGS_lr]; \ +- \ +- add r11, sp, #UREGS_kernel_sizeof+4; \ +- str r11, [sp, #UREGS_sp]; \ +- \ +- mrc CP32(r11, HSR); /* Save exception syndrome */ \ +- str r11, [sp, #UREGS_hsr]; \ +- \ +- mrs r11, SPSR_hyp; \ +- str r11, [sp, #UREGS_cpsr]; \ +- and r11, #PSR_MODE_MASK; \ +- cmp r11, #PSR_MODE_HYP; \ +- blne save_guest_regs +- + save_guest_regs: + #ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR + /* +@@ -52,7 +31,7 @@ save_guest_regs: + ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor frames. */ + str r11, [sp, #UREGS_sp] + SAVE_ONE_BANKED(SP_usr) +- /* LR_usr is the same physical register as lr and is saved in SAVE_ALL */ ++ /* LR_usr is the same physical register as lr and is saved by the caller */ + SAVE_BANKED(svc) + SAVE_BANKED(abt) + SAVE_BANKED(und) +@@ -131,7 +110,28 @@ skip_check: + * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask. + */ + .macro vector trap, iflags +- SAVE_ALL ++ /* Save registers in the stack */ ++ sub sp, #(UREGS_SP_usr - UREGS_sp) /* SP, LR, SPSR, PC */ ++ push {r0-r12} /* Save R0-R12 */ ++ mrs r11, ELR_hyp /* ELR_hyp is return address */ ++ str r11, [sp, #UREGS_pc] ++ ++ str lr, [sp, #UREGS_lr] ++ ++ add r11, sp, #(UREGS_kernel_sizeof + 4) ++ ++ str r11, [sp, #UREGS_sp] ++ ++ mrc CP32(r11, HSR) /* Save exception syndrome */ ++ str r11, [sp, #UREGS_hsr] ++ ++ mrs r11, SPSR_hyp ++ str r11, [sp, #UREGS_cpsr] ++ and r11, #PSR_MODE_MASK ++ cmp r11, #PSR_MODE_HYP ++ blne save_guest_regs ++ ++ /* We are ready to handle the trap, setup the registers and jump. */ + cpsie \iflags + adr lr, return_from_trap + mov r0, sp +-- +2.11.0 + diff --git a/xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch b/xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch new file mode 100644 index 0000000..5168452 --- /dev/null +++ b/xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch @@ -0,0 +1,226 @@ +From 098fe877967870ffda2dfd9629a5fd272f6aacdc Mon Sep 17 00:00:00 2001 +From: Julien Grall julien.grall@arm.com +Date: Fri, 11 Oct 2019 17:49:28 +0100 +Subject: [PATCH 3/4] xen/arm32: Don't blindly unmask interrupts on trap + without a change of level + +Exception vectors will unmask interrupts regardless the state of them in +the interrupted context. + +One of the consequences is IRQ will be unmasked when receiving an +undefined instruction exception (used by WARN*) from the hypervisor. +This could result to unexpected behavior such as deadlock (if a lock was +shared with interrupts). + +In a nutshell, interrupts should only be unmasked when it is safe to do. +Xen only unmask IRQ and Abort interrupts, so the logic can stay simple. + +As vectors exceptions may be shared between guest and hypervisor, we now +need to have a different policy for the interrupts. + +On exception from hypervisor, each vector will select the list of +interrupts to inherit from the interrupted context. Any interrupts not +listed will be kept masked. + +On exception from the guest, the Abort and IRQ will be unmasked +depending on the exact vector. + +The interrupts will be kept unmasked when the vector cannot used by +either guest or hypervisor. + +Note that each vector is not anymore preceded by ALIGN. This is fine +because the alignment is already bigger than what we need. + +This is part of XSA-303. + +Reported-by: Julien Grall Julien.Grall@arm.com +Signed-off-by: Julien Grall julien.grall@arm.com +Reviewed-by: Stefano Stabellini sstabellini@kernel.org +Reviewed-by: Andre Przywara andre.przywara@arm.com +--- + xen/arch/arm/arm32/entry.S | 138 +++++++++++++++++++++++++++++++++++---------- + 1 file changed, 109 insertions(+), 29 deletions(-) + +diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S +index 150cbc0b4b..ec90cca093 100644 +--- a/xen/arch/arm/arm32/entry.S ++++ b/xen/arch/arm/arm32/entry.S +@@ -4,6 +4,17 @@ + #include <asm/alternative.h> + #include <public/xen.h> + ++/* ++ * Short-hands to defined the interrupts (A, I, F) ++ * ++ * _ means the interrupt state will not change ++ * X means the state of interrupt X will change ++ * ++ * To be used with msr cpsr_* only ++ */ ++#define IFLAGS_AIF PSR_ABT_MASK | PSR_IRQ_MASK | PSR_FIQ_MASK ++#define IFLAGS_A_F PSR_ABT_MASK | PSR_FIQ_MASK ++ + #define SAVE_ONE_BANKED(reg) mrs r11, reg; str r11, [sp, #UREGS_##reg] + #define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11 + +@@ -106,10 +117,18 @@ skip_check: + mov pc, lr + + /* +- * Macro to define trap entry. The iflags corresponds to the list of +- * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask. ++ * Macro to define a trap entry. ++ * ++ * @guest_iflags: Optional list of interrupts to unmask when ++ * entering from guest context. As this is used with cpsie, ++ * the letter (a, i, f) should be used. ++ * ++ * @hyp_iflags: Optional list of interrupts to inherit when ++ * entering from hypervisor context. Any interrupts not ++ * listed will be kept unchanged. As this is used with cpsr_*, ++ * IFLAGS_* short-hands should be used. + */ +- .macro vector trap, iflags ++ .macro vector trap, guest_iflags=n, hyp_iflags=0 + /* Save registers in the stack */ + sub sp, #(UREGS_SP_usr - UREGS_sp) /* SP, LR, SPSR, PC */ + push {r0-r12} /* Save R0-R12 */ +@@ -127,12 +146,39 @@ skip_check: + + mrs r11, SPSR_hyp + str r11, [sp, #UREGS_cpsr] +- and r11, #PSR_MODE_MASK +- cmp r11, #PSR_MODE_HYP +- blne save_guest_regs + ++ /* ++ * We need to distinguish whether we came from guest or ++ * hypervisor context. ++ */ ++ and r0, r11, #PSR_MODE_MASK ++ cmp r0, #PSR_MODE_HYP ++ ++ bne 1f ++ /* ++ * Trap from the hypervisor ++ * ++ * Inherit the state of the interrupts from the hypervisor ++ * context. For that we need to use SPSR (stored in r11) and ++ * modify CPSR accordingly. ++ * ++ * CPSR = (CPSR & ~hyp_iflags) | (SPSR & hyp_iflags) ++ */ ++ mrs r10, cpsr ++ bic r10, r10, #\hyp_iflags ++ and r11, r11, #\hyp_iflags ++ orr r10, r10, r11 ++ msr cpsr_cx, r10 ++ b 2f ++ ++1: ++ /* Trap from the guest */ ++ bl save_guest_regs ++ .if \guest_iflags != n ++ cpsie \guest_iflags ++ .endif ++2: + /* We are ready to handle the trap, setup the registers and jump. */ +- cpsie \iflags + adr lr, return_from_trap + mov r0, sp + /* +@@ -144,20 +190,6 @@ skip_check: + b do_trap_\trap + .endm + +-#define __DEFINE_TRAP_ENTRY(trap, iflags) \ +- ALIGN; \ +-trap_##trap: \ +- vector trap, iflags +- +-/* Trap handler which unmask IRQ/Abort, keep FIQ masked */ +-#define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai) +- +-/* Trap handler which unmask Abort, keep IRQ/FIQ masked */ +-#define DEFINE_TRAP_ENTRY_NOIRQ(trap) __DEFINE_TRAP_ENTRY(trap, a) +- +-/* Trap handler which unmask IRQ, keep Abort/FIQ masked */ +-#define DEFINE_TRAP_ENTRY_NOABORT(trap) __DEFINE_TRAP_ENTRY(trap, i) +- + .align 5 + GLOBAL(hyp_traps_vector) + b trap_reset /* 0x00 - Reset */ +@@ -228,14 +260,62 @@ decode_vectors: + + #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + +-DEFINE_TRAP_ENTRY(reset) +-DEFINE_TRAP_ENTRY(undefined_instruction) +-DEFINE_TRAP_ENTRY(hypervisor_call) +-DEFINE_TRAP_ENTRY(prefetch_abort) +-DEFINE_TRAP_ENTRY(guest_sync) +-DEFINE_TRAP_ENTRY_NOIRQ(irq) +-DEFINE_TRAP_ENTRY_NOIRQ(fiq) +-DEFINE_TRAP_ENTRY_NOABORT(data_abort) ++/* Vector not used by the Hypervisor. */ ++trap_reset: ++ vector reset ++ ++/* ++ * Vector only used by the Hypervisor. ++ * ++ * While the exception can be executed with all the interrupts (e.g. ++ * IRQ) unmasked, the interrupted context may have purposefully masked ++ * some of them. So we want to inherit the state from the interrupted ++ * context. ++ */ ++trap_undefined_instruction: ++ vector undefined_instruction, hyp_iflags=IFLAGS_AIF ++ ++/* We should never reach this trap */ ++trap_hypervisor_call: ++ vector hypervisor_call ++ ++/* ++ * Vector only used by the hypervisor. ++ * ++ * While the exception can be executed with all the interrupts (e.g. ++ * IRQ) unmasked, the interrupted context may have purposefully masked ++ * some of them. So we want to inherit the state from the interrupted ++ * context. ++ */ ++trap_prefetch_abort: ++ vector prefetch_abort, hyp_iflags=IFLAGS_AIF ++ ++/* ++ * Vector only used by the hypervisor. ++ * ++ * Data Abort should be rare and most likely fatal. It is best to not ++ * unmask any interrupts to limit the amount of code that can run before ++ * the Data Abort is treated. ++ */ ++trap_data_abort: ++ vector data_abort ++ ++/* Vector only used by the guest. We can unmask Abort/IRQ. */ ++trap_guest_sync: ++ vector guest_sync, guest_iflags=ai ++ ++ ++/* Vector used by the hypervisor and the guest. */ ++trap_irq: ++ vector irq, guest_iflags=a, hyp_iflags=IFLAGS_A_F ++ ++/* ++ * Vector used by the hypervisor and the guest. ++ * ++ * FIQ are not meant to happen, so we don't unmask any interrupts. ++ */ ++trap_fiq: ++ vector fiq + + return_from_trap: + /* +-- +2.11.0 + diff --git a/xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch b/xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch new file mode 100644 index 0000000..106cbf9 --- /dev/null +++ b/xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch @@ -0,0 +1,114 @@ +From c6d290ce157a044dec417fdda8db71e41a37d744 Mon Sep 17 00:00:00 2001 +From: Julien Grall julien.grall@arm.com +Date: Mon, 7 Oct 2019 18:10:56 +0100 +Subject: [PATCH 4/4] xen/arm64: Don't blindly unmask interrupts on trap + without a change of level + +Some of the traps without a change of the level (i.e. hypervisor -> +hypervisor) will unmask interrupts regardless the state of them in the +interrupted context. + +One of the consequences is IRQ will be unmasked when receiving a +synchronous exception (used by WARN*()). This could result to unexpected +behavior such as deadlock (if a lock was shared with interrupts). + +In a nutshell, interrupts should only be unmasked when it is safe to +do. Xen only unmask IRQ and Abort interrupts, so the logic can stay +simple: + - hyp_error: All the interrupts are now kept masked. SError should + be pretty rare and if ever happen then we most likely want to + avoid any other interrupts to be generated. The potential main + "caller" is during virtual SError synchronization on the exit + path from the guest (see check_pending_vserror). + + - hyp_sync: The interrupts state is inherited from the interrupted + context. + + - hyp_irq: All the interrupts but IRQ state are inherited from the + interrupted context. IRQ is kept masked. + +This is part of XSA-303. + +Reported-by: Julien Grall Julien.Grall@arm.com +Signed-off-by: Julien Grall julien.grall@arm.com +Reviewed-by: Stefano Stabellini sstabellini@kernel.org +Reviewed-by: Andre Przywara andre.przywara@arm.com +--- + xen/arch/arm/arm64/entry.S | 47 ++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 43 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S +index 2d9a2713a1..3e41ba65b6 100644 +--- a/xen/arch/arm/arm64/entry.S ++++ b/xen/arch/arm/arm64/entry.S +@@ -188,24 +188,63 @@ hyp_error_invalid: + entry hyp=1 + invalid BAD_ERROR + ++/* ++ * SError received while running in the hypervisor mode. ++ * ++ * Technically, we could unmask the IRQ if it were unmasked in the ++ * interrupted context. However, this require to check the PSTATE. For ++ * simplicity, as SError should be rare and potentially fatal, ++ * all interrupts are kept masked. ++ */ + hyp_error: + entry hyp=1 +- msr daifclr, #2 + mov x0, sp + bl do_trap_hyp_serror + exit hyp=1 + +-/* Traps taken in Current EL with SP_ELx */ ++/* ++ * Synchronous exception received while running in the hypervisor mode. ++ * ++ * While the exception could be executed with all the interrupts (e.g. ++ * IRQ) unmasked, the interrupted context may have purposefully masked ++ * some of them. So we want to inherit the state from the interrupted ++ * context. ++ */ + hyp_sync: + entry hyp=1 +- msr daifclr, #6 ++ ++ /* Inherit interrupts */ ++ mrs x0, SPSR_el2 ++ and x0, x0, #(PSR_DBG_MASK | PSR_ABT_MASK | PSR_IRQ_MASK | PSR_FIQ_MASK) ++ msr daif, x0 ++ + mov x0, sp + bl do_trap_hyp_sync + exit hyp=1 + ++/* ++ * IRQ received while running in the hypervisor mode. ++ * ++ * While the exception could be executed with all the interrupts but IRQ ++ * unmasked, the interrupted context may have purposefully masked some ++ * of them. So we want to inherit the state from the interrupt context ++ * and keep IRQ masked. ++ * ++ * XXX: We may want to consider an ordering between interrupts (e.g. if ++ * SError are masked, then IRQ should be masked too). However, this ++ * would require some rework in some paths (e.g. panic, livepatch) to ++ * ensure the ordering is enforced everywhere. ++ */ + hyp_irq: + entry hyp=1 +- msr daifclr, #4 ++ ++ /* Inherit D, A, F interrupts and keep I masked */ ++ mrs x0, SPSR_el2 ++ mov x1, #(PSR_DBG_MASK | PSR_ABT_MASK | PSR_FIQ_MASK) ++ and x0, x0, x1 ++ orr x0, x0, #PSR_IRQ_MASK ++ msr daif, x0 ++ + mov x0, sp + bl do_trap_irq + exit hyp=1 +-- +2.11.0 +
arch-excludes@lists.fedoraproject.org