With ARMv8.2-LPA architecture extension availability, arm64 hardware
which supports this extension can support upto 52-bit physical
addresses.
Since at the moment we enable the support of this extensions in the
kernel via a CONFIG flag (CONFIG_ARM64_PA_BITS_52), so there are no
clear mechanisms in user-space to determine this CONFIG
flag value and use it to determine the PA address range values.
'makedumpfile' can instead use 'MAX_PHYSMEM_BITS' values to
determine the maximum physical address supported by underlying kernel.
I have sent a kernel patch upstream to add 'MAX_PHYSMEM_BITS' to
vmcoreinfo for arm64.
This patch is in accordance with ARMv8 Architecture Reference Manual
version D.a and also works well for the existing
lower PA address values (e.g. 48-bit PA addresses).
[0].
http://lists.infradead.org/pipermail/kexec/2019-February/022411.html
Signed-off-by: Bhupesh Sharma <bhsharma(a)redhat.com>
---
arch/arm64.c | 332 +++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 267 insertions(+), 65 deletions(-)
diff --git a/arch/arm64.c b/arch/arm64.c
index 053519359cbc..a1db7dc63107 100644
--- a/arch/arm64.c
+++ b/arch/arm64.c
@@ -39,72 +39,276 @@ typedef struct {
unsigned long pte;
} pte_t;
+#define __pte(x) ((pte_t) { (x) } )
+#define __pmd(x) ((pmd_t) { (x) } )
+#define __pud(x) ((pud_t) { (x) } )
+#define __pgd(x) ((pgd_t) { (x) } )
+
+static int lpa_52_bit_support_available;
static int pgtable_level;
static int va_bits;
static unsigned long kimage_voffset;
-#define SZ_4K (4 * 1024)
-#define SZ_16K (16 * 1024)
-#define SZ_64K (64 * 1024)
-#define SZ_128M (128 * 1024 * 1024)
+#define SZ_4K 4096
+#define SZ_16K 16384
+#define SZ_64K 65536
-#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36)
-#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39)
-#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42)
-#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47)
-#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48)
+#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36)
+#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39)
+#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42)
+#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47)
+#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48)
+#define PAGE_OFFSET_52 ((0xffffffffffffffffUL) << 52)
#define pgd_val(x) ((x).pgd)
#define pud_val(x) (pgd_val((x).pgd))
#define pmd_val(x) (pud_val((x).pud))
#define pte_val(x) ((x).pte)
-#define PAGE_MASK (~(PAGESIZE() - 1))
-#define PGDIR_SHIFT ((PAGESHIFT() - 3) * pgtable_level + 3)
-#define PTRS_PER_PGD (1 << (va_bits - PGDIR_SHIFT))
-#define PUD_SHIFT get_pud_shift_arm64()
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE - 1))
-#define PTRS_PER_PTE (1 << (PAGESHIFT() - 3))
-#define PTRS_PER_PUD PTRS_PER_PTE
-#define PMD_SHIFT ((PAGESHIFT() - 3) * 2 + 3)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE - 1))
+/* See 'include/uapi/linux/const.h' for definitions below */
+#define __AC(X,Y) (X##Y)
+#define _AC(X,Y) __AC(X,Y)
+#define _AT(T,X) ((T)(X))
+
+/* See 'include/asm/pgtable-types.h' for definitions below */
+typedef unsigned long pteval_t;
+typedef unsigned long pmdval_t;
+typedef unsigned long pudval_t;
+typedef unsigned long pgdval_t;
+
+#define PAGE_SIZE get_pagesize_arm64()
+#define PAGE_SHIFT PAGESHIFT()
+
+/* See 'arch/arm64/include/asm/pgtable-hwdef.h' for definitions below */
+
+/*
+ * Number of page-table levels required to address 'va_bits' wide
+ * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
+ * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ *
+ * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ *
+ * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
+ *
+ * We cannot include linux/kernel.h which defines DIV_ROUND_UP here
+ * due to build issues. So we open code DIV_ROUND_UP here:
+ *
+ * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ *
+ * which gets simplified as :
+ */
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+
+/*
+ * Size mapped by an entry at level n ( 0 <= n <= 3)
+ * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * in the final page. The maximum number of translation levels supported by
+ * the architecture is 4. Hence, starting at at level n, we have further
+ * ((4 - n) - 1) levels of translation excluding the offset within the page.
+ * So, the total number of bits mapped by an entry at level n is :
+ *
+ * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ *
+ * Rearranging it a bit we get :
+ * (4 - n) * (PAGE_SHIFT - 3) + 3
+ */
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+
+#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
+
+/*
+ * PMD_SHIFT determines the size a level 2 page table entry can map.
+ */
+#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
+#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD PTRS_PER_PTE
-#define PAGE_PRESENT (1 << 0)
+/*
+ * PUD_SHIFT determines the size a level 1 page table entry can map.
+ */
+#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PTRS_PER_PUD PTRS_PER_PTE
+
+static inline int
+get_page_table_level_arm64(void)
+{
+ return pgtable_level;
+}
+
+/*
+ * PGDIR_SHIFT determines the size a top-level page table entry can map
+ * (depending on the configuration, this level can be 0, 1 or 2).
+ */
+#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (get_page_table_level_arm64()))
+#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD (1 << ((va_bits) - PGDIR_SHIFT))
+
+/*
+ * Section address mask and size definitions.
+ */
+#define SECTION_SHIFT PMD_SHIFT
+#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
+#define SECTION_MASK (~(SECTION_SIZE-1))
+
#define SECTIONS_SIZE_BITS 30
-/* Highest possible physical address supported */
-#define PHYS_MASK_SHIFT 48
-#define PHYS_MASK ((1UL << PHYS_MASK_SHIFT) - 1)
+
+/*
+ * Hardware page table definitions.
+ *
+ * Level 1 descriptor (PUD).
+ */
+#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
+#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1)
+#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
+#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
+
+/*
+ * Level 2 descriptor (PMD).
+ */
+#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
+#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
+#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
+#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
+#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
+
+/*
+ * Section
+ */
+#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
+#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
+#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
+#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
+#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
+#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52)
+#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
+#define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PMD_ATTRINDX(t) (_AT(pmdval_t, (t)) << 2)
+#define PMD_ATTRINDX_MASK (_AT(pmdval_t, 7) << 2)
+
/*
- * Remove the highest order bits that are not a part of the
- * physical address in a section
+ * Level 3 descriptor (PTE).
*/
-#define PMD_SECTION_MASK ((1UL << 40) - 1)
+#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
+#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
+#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
+#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
+#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
+#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
+#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
+#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
+#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
+#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
+#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
+#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
+#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */
+
+#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) <<
PAGE_SHIFT)
+#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
+
+static inline unsigned long
+get_pte_addr_mask_arm64(void)
+{
+ if (lpa_52_bit_support_available)
+ return (PTE_ADDR_LOW | PTE_ADDR_HIGH);
+ else
+ return PTE_ADDR_LOW;
+}
+
+#define PTE_ADDR_MASK get_pte_addr_mask_arm64()
+
+#define PAGE_MASK (~(PAGESIZE() - 1))
+#define PAGE_PRESENT (1 << 0)
+
+/* Highest possible physical address supported */
+static inline int
+get_phy_mask_shift_arm64(void)
+{
+ int pa_bits = 48 /* default: 48-bit PA */;
+
+ if (NUMBER(MAX_PHYSMEM_BITS) != NOT_FOUND_NUMBER)
+ pa_bits = NUMBER(MAX_PHYSMEM_BITS);
+
+ return pa_bits;
+}
-#define PMD_TYPE_MASK 3
-#define PMD_TYPE_SECT 1
-#define PMD_TYPE_TABLE 3
+#define PHYS_MASK_SHIFT get_phy_mask_shift_arm64()
-#define PUD_TYPE_MASK 3
-#define PUD_TYPE_SECT 1
-#define PUD_TYPE_TABLE 3
+/* Helper API to convert between a physical address and its placement
+ * in a page table entry, taking care of 52-bit addresses.
+ */
+static inline unsigned long
+__pte_to_phys(pte_t pte)
+{
+ if (lpa_52_bit_support_available)
+ return ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH)
<< 36));
+ else
+ return (pte_val(pte) & PTE_ADDR_MASK);
+}
+/* Find an entry in a page-table-directory */
#define pgd_index(vaddr) (((vaddr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-#define pgd_offset(pgdir, vaddr) ((pgd_t *)(pgdir) + pgd_index(vaddr))
-#define pte_index(vaddr) (((vaddr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
-#define pmd_page_paddr(pmd) (pmd_val(pmd) & PHYS_MASK & (int32_t)PAGE_MASK)
-#define pte_offset(dir, vaddr) ((pte_t*)pmd_page_paddr((*dir)) + pte_index(vaddr))
+static inline pte_t
+pgd_pte(pgd_t pgd)
+{
+ return __pte(pgd_val(pgd));
+}
+
+#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd))
+#define pgd_offset(pgd, vaddr) ((pgd_t *)(pgd) + pgd_index(vaddr))
-#define pmd_index(vaddr) (((vaddr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-#define pud_page_paddr(pud) (pud_val(pud) & PHYS_MASK & (int32_t)PAGE_MASK)
-#define pmd_offset_pgtbl_lvl_2(pud, vaddr) ((pmd_t *)pud)
-#define pmd_offset_pgtbl_lvl_3(pud, vaddr) ((pmd_t *)pud_page_paddr((*pud)) +
pmd_index(vaddr))
+static inline pte_t pud_pte(pud_t pud)
+{
+ return __pte(pud_val(pud));
+}
+static inline unsigned long
+pgd_page_paddr(pgd_t pgd)
+{
+ return __pgd_to_phys(pgd);
+}
+
+/* Find an entry in the first-level page table. */
#define pud_index(vaddr) (((vaddr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-#define pgd_page_paddr(pgd) (pgd_val(pgd) & PHYS_MASK & (int32_t)PAGE_MASK)
+#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
+
+static inline unsigned long
+pud_page_paddr(pud_t pud)
+{
+ return __pud_to_phys(pud);
+}
+
+/* Find an entry in the second-level page table. */
+#define pmd_index(vaddr) (((vaddr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pmd_offset_pgtbl_lvl_2(dir, vaddr) ((pmd_t *)dir)
+#define pmd_offset_pgtbl_lvl_3(dir, vaddr) (pud_page_paddr((*(dir))) + pmd_index(vaddr) *
sizeof(pmd_t))
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+ return __pte(pmd_val(pmd));
+}
+
+#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
+
+static inline unsigned long
+pmd_page_paddr(pmd_t pmd)
+{
+ return __pmd_to_phys(pmd);
+}
+
+/* Find an entry in the third-level page table. */
+#define pte_index(vaddr) (((vaddr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, vaddr) (pmd_page_paddr((*dir)) + pte_index(vaddr) *
sizeof(pte_t))
static unsigned long long
__pa(unsigned long vaddr)
@@ -116,30 +320,20 @@ __pa(unsigned long vaddr)
return (vaddr - kimage_voffset);
}
-static int
-get_pud_shift_arm64(void)
-{
- if (pgtable_level == 4)
- return ((PAGESHIFT() - 3) * 3 + 3);
- else
- return PGDIR_SHIFT;
-}
-
static pmd_t *
pmd_offset(pud_t *puda, pud_t *pudv, unsigned long vaddr)
{
- if (pgtable_level == 2) {
- return pmd_offset_pgtbl_lvl_2(puda, vaddr);
- } else {
- return pmd_offset_pgtbl_lvl_3(pudv, vaddr);
- }
+ if (pgtable_level > 2)
+ return (pmd_t *)pmd_offset_pgtbl_lvl_3(pudv, vaddr);
+ else
+ return (pmd_t *)pmd_offset_pgtbl_lvl_2(puda, vaddr);
}
static pud_t *
pud_offset(pgd_t *pgda, pgd_t *pgdv, unsigned long vaddr)
{
- if (pgtable_level == 4)
- return ((pud_t *)pgd_page_paddr((*pgdv)) + pud_index(vaddr));
+ if (pgtable_level > 3)
+ return (pud_t *)(pgd_page_paddr((*pgdv)) + pud_index(vaddr) * sizeof(pud_t));
else
return (pud_t *)(pgda);
}
@@ -287,6 +481,15 @@ get_stext_symbol(void)
int
get_machdep_info_arm64(void)
{
+ int pa_bits;
+
+ /* Determine if the PA address range is 52-bits: ARMv8.2-LPA */
+ pa_bits = get_phy_mask_shift_arm64();
+ DEBUG_MSG("pa_bits : %d\n", pa_bits);
+
+ if (pa_bits == 52)
+ lpa_52_bit_support_available = 1;
+
/* Check if va_bits is still not initialized. If still 0, call
* get_versiondep_info() to initialize the same.
*/
@@ -303,8 +506,8 @@ get_machdep_info_arm64(void)
info->section_size_bits = SECTIONS_SIZE_BITS;
DEBUG_MSG("kimage_voffset : %lx\n", kimage_voffset);
- DEBUG_MSG("max_physmem_bits : %lx\n", info->max_physmem_bits);
- DEBUG_MSG("section_size_bits: %lx\n", info->section_size_bits);
+ DEBUG_MSG("max_physmem_bits : %ld\n", info->max_physmem_bits);
+ DEBUG_MSG("section_size_bits: %ld\n", info->section_size_bits);
return TRUE;
}
@@ -401,7 +604,7 @@ vaddr_to_paddr_arm64(unsigned long vaddr)
if ((pud_val(pudv) & PUD_TYPE_MASK) == PUD_TYPE_SECT) {
/* 1GB section for Page Table level = 4 and Page Size = 4KB */
- paddr = (pud_val(pudv) & (PUD_MASK & PMD_SECTION_MASK))
+ paddr = (pud_val(pudv) & (PUD_MASK & SECTION_MASK))
+ (vaddr & (PUD_SIZE - 1));
return paddr;
}
@@ -414,7 +617,7 @@ vaddr_to_paddr_arm64(unsigned long vaddr)
switch (pmd_val(pmdv) & PMD_TYPE_MASK) {
case PMD_TYPE_TABLE:
- ptea = pte_offset(&pmdv, vaddr);
+ ptea = (pte_t *)pte_offset(&pmdv, vaddr);
/* 64k page */
if (!readmem(PADDR, (unsigned long long)ptea, &ptev, sizeof(ptev))) {
ERRMSG("Can't read pte\n");
@@ -425,14 +628,13 @@ vaddr_to_paddr_arm64(unsigned long vaddr)
ERRMSG("Can't get a valid pte.\n");
return NOT_PADDR;
} else {
-
- paddr = (PAGEBASE(pte_val(ptev)) & PHYS_MASK)
+ paddr = (PAGEBASE(pte_val(ptev)) & PTE_ADDR_MASK)
+ (vaddr & (PAGESIZE() - 1));
}
break;
case PMD_TYPE_SECT:
/* 512MB section for Page Table level = 3 and Page Size = 64KB*/
- paddr = (pmd_val(pmdv) & (PMD_MASK & PMD_SECTION_MASK))
+ paddr = (pmd_val(pmdv) & (PMD_MASK & SECTION_MASK))
+ (vaddr & (PMD_SIZE - 1));
break;
}
--
2.7.4