From: Mitchel Humpherys Date: Tue, 14 Jul 2015 23:41:29 +0000 (-0700) Subject: iommu/io-pgtable-arm: Don't leave dangling table entries X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=d1f111a6f98c2e86471b4cc8c987f0affde1f5ae;p=sagit-ice-cold%2Fkernel_xiaomi_msm8998.git iommu/io-pgtable-arm: Don't leave dangling table entries Currently, when all of the 4K PTEs beneath a 2M table entry are unmapped, that 2M table entry is left intact, even though it doesn't point to any valid 4K mappings anymore. This results in a warning if a subsequent block mapping lands on top of the dangling table entry, since we require empty page table entries when we map. It also causes the page at which that the stomped-on table was pointing to be leaked. Fix this by keeping track of how many entries are currently mapped beneath a table. When the map count goes to zero (in unmap), free up the page the table is pointing at and zero out the table entry. Change-Id: I470e6ffb2206a09fe7c24253e3fd64a744337a7f Signed-off-by: Mitchel Humpherys --- diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 56c31715683a..3c48d95b959f 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -163,8 +163,8 @@ #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 /* IOPTE accessors */ -#define iopte_deref(pte,d) \ - (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ +#define iopte_deref(pte, d) \ + (__va(iopte_val(pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ & ~((1ULL << (d)->pg_shift) - 1))) #define iopte_type(pte,l) \ @@ -196,12 +196,82 @@ struct arm_lpae_io_pgtable { typedef u64 arm_lpae_iopte; +/* + * We'll use some ignored bits in table entries to keep track of the number + * of page mappings beneath the table. The maximum number of entries + * beneath any table mapping in armv8 is 8192 (which is possible at the + * 2nd- and 3rd-level when using a 64K granule size). The bits at our + * disposal are: + * + * 4k granule: [58..52], [11..2] + * 64k granule: [58..52], [15..2] + * + * [58..52], [11..2] is enough bits for tracking table mappings at any + * level for any granule, so we'll use those. + */ +#define BOTTOM_IGNORED_MASK 0x3ff +#define BOTTOM_IGNORED_SHIFT 2 +#define BOTTOM_IGNORED_NUM_BITS 10 +#define TOP_IGNORED_MASK 0x7fULL +#define TOP_IGNORED_SHIFT 52 +#define IOPTE_RESERVED_MASK ((BOTTOM_IGNORED_MASK << BOTTOM_IGNORED_SHIFT) | \ + (TOP_IGNORED_MASK << TOP_IGNORED_SHIFT)) + +static arm_lpae_iopte iopte_val(arm_lpae_iopte table_pte) +{ + return table_pte & ~IOPTE_RESERVED_MASK; +} + +static arm_lpae_iopte _iopte_bottom_ignored_val(arm_lpae_iopte table_pte) +{ + return (table_pte & (BOTTOM_IGNORED_MASK << BOTTOM_IGNORED_SHIFT)) + >> BOTTOM_IGNORED_SHIFT; +} + +static arm_lpae_iopte _iopte_top_ignored_val(arm_lpae_iopte table_pte) +{ + return (table_pte & (TOP_IGNORED_MASK << TOP_IGNORED_SHIFT)) + >> TOP_IGNORED_SHIFT; +} + +static int iopte_tblcnt(arm_lpae_iopte table_pte) +{ + return (_iopte_bottom_ignored_val(table_pte) | + (_iopte_top_ignored_val(table_pte) << BOTTOM_IGNORED_NUM_BITS)); +} + +static void iopte_tblcnt_set(arm_lpae_iopte *table_pte, int val) +{ + arm_lpae_iopte pte = iopte_val(*table_pte); + + pte |= ((val & BOTTOM_IGNORED_MASK) << BOTTOM_IGNORED_SHIFT) | + (((val & (TOP_IGNORED_MASK << BOTTOM_IGNORED_NUM_BITS)) + >> BOTTOM_IGNORED_NUM_BITS) << TOP_IGNORED_SHIFT); + *table_pte = pte; +} + +static void iopte_tblcnt_sub(arm_lpae_iopte *table_ptep, int cnt) +{ + arm_lpae_iopte current_cnt = iopte_tblcnt(*table_ptep); + + current_cnt -= cnt; + iopte_tblcnt_set(table_ptep, current_cnt); +} + +static void iopte_tblcnt_add(arm_lpae_iopte *table_ptep, int cnt) +{ + arm_lpae_iopte current_cnt = iopte_tblcnt(*table_ptep); + + current_cnt += cnt; + iopte_tblcnt_set(table_ptep, current_cnt); +} + static bool suppress_map_failures; static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, - arm_lpae_iopte *ptep) + arm_lpae_iopte *ptep, arm_lpae_iopte *prev_ptep) { arm_lpae_iopte pte = prot; @@ -224,12 +294,17 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, *ptep = pte; data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie); + + if (prev_ptep) + iopte_tblcnt_add(prev_ptep, 1); + return 0; } static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, size_t size, arm_lpae_iopte prot, - int lvl, arm_lpae_iopte *ptep) + int lvl, arm_lpae_iopte *ptep, + arm_lpae_iopte *prev_ptep) { arm_lpae_iopte *cptep, pte; void *cookie = data->iop.cookie; @@ -240,7 +315,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, /* If we can install a leaf entry at this level, then do so */ if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) - return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); + return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep, + prev_ptep); /* We can't allocate tables at the final level */ if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) @@ -267,7 +343,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, } /* Rinse, repeat */ - return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep); + return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, + ptep); } static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, @@ -323,7 +400,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, return 0; prot = arm_lpae_prot_to_pte(data, iommu_prot); - return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); + return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, NULL); } static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova, @@ -365,7 +442,7 @@ static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova, size_t pgsize = iommu_pgsize( data->iop.cfg.pgsize_bitmap, iova | phys, size); ret = __arm_lpae_map(data, iova, phys, pgsize, prot, - lvl, ptep); + lvl, ptep, NULL); if (ret) goto out_err; @@ -428,7 +505,8 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop) static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, arm_lpae_iopte prot, int lvl, - arm_lpae_iopte *ptep, size_t blk_size) + arm_lpae_iopte *ptep, + arm_lpae_iopte *prev_ptep, size_t blk_size) { unsigned long blk_start, blk_end; phys_addr_t blk_paddr; @@ -450,7 +528,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, /* __arm_lpae_map expects a pointer to the start of the table */ tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, - tablep) < 0) { + tablep, prev_ptep) < 0) { if (table) { /* Free the table we allocated */ tablep = iopte_deref(table, data); @@ -467,7 +545,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, unsigned long iova, size_t size, int lvl, - arm_lpae_iopte *ptep) + arm_lpae_iopte *ptep, arm_lpae_iopte *prev_ptep) { arm_lpae_iopte pte; const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; @@ -495,6 +573,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, return size; } else if ((lvl == ARM_LPAE_MAX_LEVELS - 2) && !iopte_leaf(pte, lvl)) { arm_lpae_iopte *table = iopte_deref(pte, data); + arm_lpae_iopte *table_base = table; int tl_offset = ARM_LPAE_LVL_IDX(iova, lvl + 1, data); int entry_size = (1 << data->pg_shift); int max_entries = ARM_LPAE_BLOCK_SIZE(lvl, data) / entry_size; @@ -514,6 +593,15 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, memset(table, 0, table_len); tlb->flush_pgtable(table, table_len, cookie); + iopte_tblcnt_sub(ptep, entries); + if (!iopte_tblcnt(*ptep)) { + /* no valid mappings left under this table. free it. */ + *ptep = 0; + tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + io_pgtable_free_pages_exact( + table_base, max_entries * sizeof(*table_base)); + } + return entries * entry_size; } else if (iopte_leaf(pte, lvl)) { /* @@ -522,12 +610,14 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, */ return arm_lpae_split_blk_unmap(data, iova, size, iopte_prot(pte), lvl, ptep, + prev_ptep, blk_size); } /* Keep on walkin' */ + prev_ptep = ptep; ptep = iopte_deref(pte, data); - return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep); + return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep, prev_ptep); } static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, @@ -546,7 +636,8 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_to_unmap = remaining < SZ_2M ? remaining : iommu_pgsize(data->iop.cfg.pgsize_bitmap, iova, size); - ret = __arm_lpae_unmap(data, iova, size_to_unmap, lvl, ptep); + ret = __arm_lpae_unmap(data, iova, size_to_unmap, lvl, ptep, + NULL); if (ret == 0) break; unmapped += ret;