4460 lines
		
	
	
		
			145 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			4460 lines
		
	
	
		
			145 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 | |
| /*
 | |
|  *
 | |
|  * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
 | |
|  *
 | |
|  * This program is free software and is provided to you under the terms of the
 | |
|  * GNU General Public License version 2 as published by the Free Software
 | |
|  * Foundation, and any use by you of this program is subject to the terms
 | |
|  * of such GNU license.
 | |
|  *
 | |
|  * This program is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 | |
|  * GNU General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU General Public License
 | |
|  * along with this program; if not, you can access it online at
 | |
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * DOC: Base kernel MMU management.
 | |
|  */
 | |
| 
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/dma-mapping.h>
 | |
| #include <linux/migrate.h>
 | |
| #include <mali_kbase.h>
 | |
| #include <gpu/mali_kbase_gpu_fault.h>
 | |
| #include <hw_access/mali_kbase_hw_access_regmap.h>
 | |
| #include <tl/mali_kbase_tracepoints.h>
 | |
| #include <backend/gpu/mali_kbase_instr_defs.h>
 | |
| #include <mali_kbase_ctx_sched.h>
 | |
| #include <mali_kbase_debug.h>
 | |
| #include <mali_kbase_defs.h>
 | |
| #include <mali_kbase_hw.h>
 | |
| #include <mmu/mali_kbase_mmu_hw.h>
 | |
| #include <mali_kbase_mem.h>
 | |
| #include <mali_kbase_reset_gpu.h>
 | |
| #include <mmu/mali_kbase_mmu.h>
 | |
| #include <mmu/mali_kbase_mmu_internal.h>
 | |
| #include <device/mali_kbase_device.h>
 | |
| #include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
 | |
| #if !MALI_USE_CSF
 | |
| #include <mali_kbase_hwaccess_jm.h>
 | |
| #endif
 | |
| #include <linux/version_compat_defs.h>
 | |
| 
 | |
| #include <mali_kbase_trace_gpu_mem.h>
 | |
| #include <backend/gpu/mali_kbase_pm_internal.h>
 | |
| 
 | |
| /* Threshold used to decide whether to flush full caches or just a physical range */
 | |
| #define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20
 | |
| #define MGM_DEFAULT_PTE_GROUP (0)
 | |
| 
 | |
| /* Macro to convert updated PDGs to flags indicating levels skip in flush */
 | |
| #define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds)&0xF)
 | |
| 
 | |
| /**
 | |
|  * kmap_pgd() - Map a PGD page and return the address of it
 | |
|  *
 | |
|  * @p:           Pointer to the PGD page to be mapped.
 | |
|  * @pgd:         The physical address of the PGD. May not be PAGE_SIZE aligned but shall be
 | |
|  *               GPU_PAGE_SIZE aligned.
 | |
|  *
 | |
|  * Return: The mapped address of the @pgd, adjusted by the offset of @pgd from the start of page.
 | |
|  */
 | |
| static inline void *kmap_pgd(struct page *p, phys_addr_t pgd)
 | |
| {
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 	return kbase_kmap(p) + (pgd & ~PAGE_MASK);
 | |
| #else
 | |
| 	CSTD_UNUSED(pgd);
 | |
| 	return kbase_kmap(p);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kmap_atomic_pgd() - Variant of kmap_pgd for atomic mapping
 | |
|  *
 | |
|  * @p:           Pointer to the PGD page to be mapped.
 | |
|  * @pgd:         The physical address of the PGD. May not be PAGE_SIZE aligned but shall be
 | |
|  *               GPU_PAGE_SIZE aligned.
 | |
|  *
 | |
|  * Return: The mapped address of the @pgd.
 | |
|  */
 | |
| static inline void *kmap_atomic_pgd(struct page *p, phys_addr_t pgd)
 | |
| {
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 	return kbase_kmap_atomic(p) + (pgd & ~PAGE_MASK);
 | |
| #else
 | |
| 	CSTD_UNUSED(pgd);
 | |
| 	return kbase_kmap_atomic(p);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kunmap_pgd() - Unmap a PGD page
 | |
|  *
 | |
|  * @p:           Pointer to the PGD page to be unmapped.
 | |
|  * @pgd_address: The address of the PGD. May not be PAGE_SIZE aligned but shall be
 | |
|  *               GPU_PAGE_SIZE aligned.
 | |
|  */
 | |
| static inline void kunmap_pgd(struct page *p, void *pgd_address)
 | |
| {
 | |
| 	/* It is okay to not align pgd_address to PAGE_SIZE boundary */
 | |
| 	kbase_kunmap(p, pgd_address);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kunmap_atomic_pgd() - Variant of kunmap_pgd for atomic unmapping
 | |
|  *
 | |
|  * @pgd_address: The address of the PGD. May not be PAGE_SIZE aligned but shall be
 | |
|  *               GPU_PAGE_SIZE aligned.
 | |
|  */
 | |
| static inline void kunmap_atomic_pgd(void *pgd_address)
 | |
| {
 | |
| 	/* It is okay to not align pgd_address to PAGE_SIZE boundary */
 | |
| 	kbase_kunmap_atomic(pgd_address);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * pgd_dma_addr() - Return dma addr of a PGD
 | |
|  *
 | |
|  * @p:       Pointer to the PGD page.
 | |
|  * @pgd:     The physical address of the PGD.
 | |
|  *
 | |
|  * Return:   DMA address of the PGD
 | |
|  */
 | |
| static inline dma_addr_t pgd_dma_addr(struct page *p, phys_addr_t pgd)
 | |
| {
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 	return kbase_page_private(p)->dma_addr + (pgd & ~PAGE_MASK);
 | |
| #else
 | |
| 	CSTD_UNUSED(pgd);
 | |
| 	return kbase_dma_addr(p);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * get_pgd_sub_page_index() - Return the index of a sub PGD page in the PGD page.
 | |
|  *
 | |
|  * @pgd:         The physical address of the PGD.
 | |
|  *
 | |
|  * Return:       The index value ranging from 0 to (GPU_PAGES_PER_CPU_PAGE - 1)
 | |
|  */
 | |
| static inline u32 get_pgd_sub_page_index(phys_addr_t pgd)
 | |
| {
 | |
| 	return (pgd & ~PAGE_MASK) / GPU_PAGE_SIZE;
 | |
| }
 | |
| 
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| /**
 | |
|  * alloc_pgd_page_metadata() - Allocate page metadata for a PGD.
 | |
|  *
 | |
|  * @kbdev:      Pointer to the instance of a kbase device.
 | |
|  * @mmut:       Structure holding details of the MMU table for a kcontext.
 | |
|  * @p:          PGD page.
 | |
|  *
 | |
|  * The PGD page, @p is linked to &kbase_mmu_table.pgd_pages_list for allocating
 | |
|  * sub PGD pages from the list.
 | |
|  *
 | |
|  * Return:      True on success.
 | |
|  */
 | |
| static bool alloc_pgd_page_metadata(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				    struct page *p)
 | |
| {
 | |
| 	struct kbase_page_metadata *page_md;
 | |
| 
 | |
| 	if (!kbase_is_page_migration_enabled()) {
 | |
| 		page_md = kmem_cache_zalloc(kbdev->page_metadata_slab, GFP_KERNEL);
 | |
| 		if (!page_md)
 | |
| 			return false;
 | |
| 
 | |
| 		page_md->dma_addr = kbase_dma_addr_as_priv(p);
 | |
| 		set_page_private(p, (unsigned long)page_md);
 | |
| 	} else {
 | |
| 		page_md = kbase_page_private(p);
 | |
| 	}
 | |
| 
 | |
| 	page_md->data.pt_mapped.num_allocated_sub_pages = 1;
 | |
| 	set_bit(0, page_md->data.pt_mapped.allocated_sub_pages);
 | |
| 	page_md->data.pt_mapped.pgd_page = p;
 | |
| 	list_add(&page_md->data.pt_mapped.pgd_link, &mmut->pgd_pages_list);
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * free_pgd_page_metadata() - Free page metadata for a PGD.
 | |
|  *
 | |
|  * @kbdev:      Pointer to the instance of a kbase device.
 | |
|  * @p:          PGD page where the metadata belongs to.
 | |
|  *
 | |
|  * The PGD page, @p is removed from &kbase_mmu_table.pgd_pages_list.
 | |
|  */
 | |
| static void free_pgd_page_metadata(struct kbase_device *kbdev, struct page *p)
 | |
| {
 | |
| 	struct kbase_page_metadata *page_md = kbase_page_private(p);
 | |
| 
 | |
| 	WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages);
 | |
| 	page_md->data.pt_mapped.pgd_page = NULL;
 | |
| 	list_del_init(&page_md->data.pt_mapped.pgd_link);
 | |
| 
 | |
| 	if (kbase_is_page_migration_enabled())
 | |
| 		return;
 | |
| 
 | |
| 	set_page_private(p, (unsigned long)page_md->dma_addr);
 | |
| 	kmem_cache_free(kbdev->page_metadata_slab, page_md);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * allocate_pgd_sub_page() - Allocate a PGD sub page
 | |
|  *
 | |
|  * @page_md:  Page metadata of a PGD page where a sub page is allocated from.
 | |
|  *
 | |
|  * Return:    Physical address of allocated PGD sub page on success.
 | |
|  *            KBASE_INVALID_PHYSICAL_ADDRESS on failure.
 | |
|  */
 | |
| static inline phys_addr_t allocate_pgd_sub_page(struct kbase_page_metadata *page_md)
 | |
| {
 | |
| 	unsigned long sub_page_index;
 | |
| 
 | |
| 	if (page_md->data.pt_mapped.num_allocated_sub_pages == GPU_PAGES_PER_CPU_PAGE)
 | |
| 		return KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| 	sub_page_index = find_first_zero_bit(page_md->data.pt_mapped.allocated_sub_pages,
 | |
| 					     GPU_PAGES_PER_CPU_PAGE);
 | |
| 
 | |
| #ifdef CONFIG_MALI_BIFROST_DEBUG
 | |
| 	if (WARN_ON_ONCE(sub_page_index >= GPU_PAGES_PER_CPU_PAGE))
 | |
| 		return KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| 	if (WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages > GPU_PAGES_PER_CPU_PAGE))
 | |
| 		return KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| #endif
 | |
| 	set_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages);
 | |
| 	page_md->data.pt_mapped.num_allocated_sub_pages++;
 | |
| 
 | |
| 	return (page_to_phys(page_md->data.pt_mapped.pgd_page) + (sub_page_index * GPU_PAGE_SIZE));
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * free_pgd_sub_page() - Free a PGD sub page
 | |
|  *
 | |
|  * @pgd:      Sub PGD to be freed.
 | |
|  *
 | |
|  * Return:    The number of remaining allocated sub pages in the PGD.
 | |
|  */
 | |
| static int free_pgd_sub_page(phys_addr_t pgd)
 | |
| {
 | |
| 	struct page *p = pfn_to_page(PFN_DOWN(pgd));
 | |
| 	struct kbase_page_metadata *page_md = kbase_page_private(p);
 | |
| 	const u32 sub_page_index = get_pgd_sub_page_index(pgd);
 | |
| 
 | |
| #ifdef CONFIG_MALI_BIFROST_DEBUG
 | |
| 	if (WARN_ON_ONCE(!test_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages)))
 | |
| 		return page_md->data.pt_mapped.num_allocated_sub_pages;
 | |
| #endif
 | |
| 	clear_bit(sub_page_index, page_md->data.pt_mapped.allocated_sub_pages);
 | |
| 	if (!WARN_ON_ONCE(page_md->data.pt_mapped.num_allocated_sub_pages <= 0))
 | |
| 		page_md->data.pt_mapped.num_allocated_sub_pages--;
 | |
| 
 | |
| 	return page_md->data.pt_mapped.num_allocated_sub_pages;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * allocate_from_pgd_pages_list() - Allocate a PGD from the PGD pages list
 | |
|  *
 | |
|  * @mmut:     Structure holding details of the MMU table for a kcontext.
 | |
|  *
 | |
|  * Return:    Physical address of the allocated PGD.
 | |
|  */
 | |
| static inline phys_addr_t allocate_from_pgd_pages_list(struct kbase_mmu_table *mmut)
 | |
| {
 | |
| 	struct list_head *entry;
 | |
| 	phys_addr_t pgd;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	if (unlikely(!mmut->num_free_pgd_sub_pages))
 | |
| 		return KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| 
 | |
| 	if (mmut->last_allocated_pgd_page) {
 | |
| 		pgd = allocate_pgd_sub_page(kbase_page_private(mmut->last_allocated_pgd_page));
 | |
| 		if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 			goto success;
 | |
| 	}
 | |
| 
 | |
| 	if (mmut->last_freed_pgd_page) {
 | |
| 		pgd = allocate_pgd_sub_page(kbase_page_private(mmut->last_freed_pgd_page));
 | |
| 		if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 			goto success;
 | |
| 	}
 | |
| 
 | |
| 	list_for_each(entry, &mmut->pgd_pages_list) {
 | |
| 		struct kbase_page_metadata *page_md =
 | |
| 			list_entry(entry, struct kbase_page_metadata, data.pt_mapped.pgd_link);
 | |
| 
 | |
| 		pgd = allocate_pgd_sub_page(page_md);
 | |
| 		if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 			goto success;
 | |
| 	}
 | |
| 
 | |
| 	return KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| success:
 | |
| 	mmut->num_free_pgd_sub_pages--;
 | |
| 	return pgd;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				     const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
 | |
| 				     unsigned long flags, int const group_id, u64 *dirty_pgds,
 | |
| 				     struct kbase_va_region *reg, bool ignore_page_migration);
 | |
| 
 | |
| /* Small wrapper function to factor out GPU-dependent context releasing */
 | |
| static void release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 | |
| {
 | |
| #if MALI_USE_CSF
 | |
| 	CSTD_UNUSED(kbdev);
 | |
| 	kbase_ctx_sched_release_ctx_lock(kctx);
 | |
| #else /* MALI_USE_CSF */
 | |
| 	kbasep_js_runpool_release_ctx(kbdev, kctx);
 | |
| #endif /* MALI_USE_CSF */
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done
 | |
|  * through GPU_CONTROL interface.
 | |
|  *
 | |
|  * @kbdev:         kbase device to check GPU model ID on.
 | |
|  *
 | |
|  * This function returns whether a cache flush for page table update should
 | |
|  * run through GPU_CONTROL interface or MMU_AS_CONTROL interface.
 | |
|  *
 | |
|  * Return: True if cache flush should be done on GPU command.
 | |
|  */
 | |
| static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
 | |
| {
 | |
| 	return kbdev->gpu_props.gpu_id.arch_major > 11;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_flush_pa_range() - Flush physical address range
 | |
|  *
 | |
|  * @kbdev:    kbase device to issue the MMU operation on.
 | |
|  * @phys:     Starting address of the physical range to start the operation on.
 | |
|  * @nr_bytes: Number of bytes to work on.
 | |
|  * @op:       Type of cache flush operation to perform.
 | |
|  *
 | |
|  * Issue a cache flush physical range command.
 | |
|  */
 | |
| #if MALI_USE_CSF
 | |
| static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes,
 | |
| 			       enum kbase_mmu_op_type op)
 | |
| {
 | |
| 	u32 flush_op;
 | |
| 
 | |
| 	lockdep_assert_held(&kbdev->hwaccess_lock);
 | |
| 
 | |
| 	/* Translate operation to command */
 | |
| 	if (op == KBASE_MMU_OP_FLUSH_PT)
 | |
| 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2;
 | |
| 	else if (op == KBASE_MMU_OP_FLUSH_MEM)
 | |
| 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
 | |
| 	else {
 | |
| 		dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op))
 | |
| 		dev_err(kbdev->dev, "Flush for physical address range did not complete");
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /**
 | |
|  * mmu_invalidate() - Perform an invalidate operation on MMU caches.
 | |
|  * @kbdev:      The Kbase device.
 | |
|  * @kctx:       The Kbase context.
 | |
|  * @as_nr:      GPU address space number for which invalidate is required.
 | |
|  * @op_param: Non-NULL pointer to struct containing information about the MMU
 | |
|  *            operation to perform.
 | |
|  *
 | |
|  * Perform an MMU invalidate operation on a particual address space
 | |
|  * by issuing a UNLOCK command.
 | |
|  */
 | |
| static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
 | |
| 			   const struct kbase_mmu_hw_op_param *op_param)
 | |
| {
 | |
| 	unsigned long flags;
 | |
| 
 | |
| 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 | |
| 
 | |
| 	if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) {
 | |
| 		as_nr = kctx ? kctx->as_nr : as_nr;
 | |
| 		if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param))
 | |
| 			dev_err(kbdev->dev,
 | |
| 				"Invalidate after GPU page table update did not complete");
 | |
| 	}
 | |
| 
 | |
| 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_invalidate_on_teardown() - Perform an invalidate operation on MMU caches on page
 | |
|  *                                table teardown.
 | |
|  * @kbdev:      The Kbase device.
 | |
|  * @kctx:       The Kbase context.
 | |
|  * @vpfn:       The virtual page frame number at which teardown is done.
 | |
|  * @num_pages:  The number of entries that were invalidated in top most level PGD, that
 | |
|  *              was affected by the teardown operation.
 | |
|  * @level:      The top most PGD level that was touched on teardown.
 | |
|  * @as_nr:      GPU address space number for which invalidate is required.
 | |
|  *
 | |
|  * Perform an MMU invalidate operation after the teardown of top most level PGD on a
 | |
|  * particular address space by issuing a UNLOCK command.
 | |
|  */
 | |
| static inline void mmu_invalidate_on_teardown(struct kbase_device *kbdev,
 | |
| 					      struct kbase_context *kctx, u64 vpfn,
 | |
| 					      size_t num_pages, int level, int as_nr)
 | |
| {
 | |
| 	u32 invalidate_range_num_pages = num_pages;
 | |
| 	u64 invalidate_range_start_vpfn = vpfn;
 | |
| 	struct kbase_mmu_hw_op_param op_param;
 | |
| 
 | |
| 	if (level != MIDGARD_MMU_BOTTOMLEVEL) {
 | |
| 		invalidate_range_num_pages = 1 << ((3 - level) * 9);
 | |
| 		invalidate_range_start_vpfn = vpfn - (vpfn & (invalidate_range_num_pages - 1));
 | |
| 	}
 | |
| 
 | |
| 	op_param = (struct kbase_mmu_hw_op_param){
 | |
| 		.vpfn = invalidate_range_start_vpfn,
 | |
| 		.nr = invalidate_range_num_pages,
 | |
| 		.mmu_sync_info = CALLER_MMU_ASYNC,
 | |
| 		.kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
 | |
| 		.flush_skip_levels = (1ULL << level) - 1,
 | |
| 	};
 | |
| 
 | |
| 	mmu_invalidate(kbdev, kctx, as_nr, &op_param);
 | |
| }
 | |
| 
 | |
| /* Perform a flush/invalidate on a particular address space
 | |
|  */
 | |
| static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
 | |
| 				    const struct kbase_mmu_hw_op_param *op_param)
 | |
| {
 | |
| 	unsigned long flags;
 | |
| 
 | |
| 	/* AS transaction begin */
 | |
| 	mutex_lock(&kbdev->mmu_hw_mutex);
 | |
| 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 | |
| 
 | |
| 	if (kbdev->pm.backend.gpu_ready && kbase_mmu_hw_do_flush(kbdev, as, op_param))
 | |
| 		dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
 | |
| 
 | |
| 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 | |
| 	mutex_unlock(&kbdev->mmu_hw_mutex);
 | |
| 	/* AS transaction end */
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_flush_invalidate() - Perform a flush operation on GPU caches.
 | |
|  * @kbdev:      The Kbase device.
 | |
|  * @kctx:       The Kbase context.
 | |
|  * @as_nr:      GPU address space number for which flush + invalidate is required.
 | |
|  * @op_param: Non-NULL pointer to struct containing information about the MMU
 | |
|  *            operation to perform.
 | |
|  *
 | |
|  * This function performs the cache flush operation described by @op_param.
 | |
|  * The function retains a reference to the given @kctx and releases it
 | |
|  * after performing the flush operation.
 | |
|  *
 | |
|  * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue
 | |
|  * a cache flush + invalidate to the L2 caches and invalidate the TLBs.
 | |
|  *
 | |
|  * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue
 | |
|  * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as
 | |
|  * invalidating the TLBs.
 | |
|  */
 | |
| static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
 | |
| 				 const struct kbase_mmu_hw_op_param *op_param)
 | |
| {
 | |
| 	bool ctx_is_in_runpool;
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (op_param->nr == 0)
 | |
| 		return;
 | |
| 
 | |
| 	/* If no context is provided then MMU operation is performed on address
 | |
| 	 * space which does not belong to user space context. Otherwise, retain
 | |
| 	 * refcount to context provided and release after flush operation.
 | |
| 	 */
 | |
| 	if (!kctx) {
 | |
| 		mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param);
 | |
| 	} else {
 | |
| #if !MALI_USE_CSF
 | |
| 		mutex_lock(&kbdev->js_data.queue_mutex);
 | |
| 		ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
 | |
| 		mutex_unlock(&kbdev->js_data.queue_mutex);
 | |
| #else
 | |
| 		ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
 | |
| #endif /* !MALI_USE_CSF */
 | |
| 
 | |
| 		if (ctx_is_in_runpool) {
 | |
| 			KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
 | |
| 
 | |
| 			mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param);
 | |
| 
 | |
| 			release_ctx(kbdev, kctx);
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via
 | |
|  *                                    the GPU_CONTROL interface
 | |
|  * @kbdev:      The Kbase device.
 | |
|  * @kctx:       The Kbase context.
 | |
|  * @as_nr:      GPU address space number for which flush + invalidate is required.
 | |
|  * @op_param: Non-NULL pointer to struct containing information about the MMU
 | |
|  *            operation to perform.
 | |
|  *
 | |
|  * Perform a flush/invalidate on a particular address space via the GPU_CONTROL
 | |
|  * interface.
 | |
|  */
 | |
| static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
 | |
| 					     int as_nr,
 | |
| 					     const struct kbase_mmu_hw_op_param *op_param)
 | |
| {
 | |
| 	unsigned long flags;
 | |
| 
 | |
| 	/* AS transaction begin */
 | |
| 	mutex_lock(&kbdev->mmu_hw_mutex);
 | |
| 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 | |
| 
 | |
| 	if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) {
 | |
| 		as_nr = kctx ? kctx->as_nr : as_nr;
 | |
| 		if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param))
 | |
| 			dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
 | |
| 	}
 | |
| 
 | |
| 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 | |
| 	mutex_unlock(&kbdev->mmu_hw_mutex);
 | |
| }
 | |
| 
 | |
| static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx,
 | |
| 				   phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op)
 | |
| {
 | |
| 	kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op);
 | |
| }
 | |
| 
 | |
| static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
 | |
| {
 | |
| 	/* In non-coherent system, ensure the GPU can read
 | |
| 	 * the pages from memory
 | |
| 	 */
 | |
| 	if (kbdev->system_coherency == COHERENCY_NONE)
 | |
| 		dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kbase_mmu_sync_pgd() - sync page directory to memory when needed.
 | |
|  * @kbdev:    Device pointer.
 | |
|  * @kctx:     Context pointer.
 | |
|  * @phys:     Starting physical address of the destination region.
 | |
|  * @handle:   Address of DMA region.
 | |
|  * @size:     Size of the region to sync.
 | |
|  * @flush_op: MMU cache flush operation to perform on the physical address
 | |
|  *            range, if GPU control is available.
 | |
|  *
 | |
|  * This function is called whenever the association between a virtual address
 | |
|  * range and a physical address range changes, because a mapping is created or
 | |
|  * destroyed.
 | |
|  * One of the effects of this operation is performing an MMU cache flush
 | |
|  * operation only on the physical address range affected by this function, if
 | |
|  * GPU control is available.
 | |
|  *
 | |
|  * This should be called after each page directory update.
 | |
|  */
 | |
| static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx,
 | |
| 			       phys_addr_t phys, dma_addr_t handle, size_t size,
 | |
| 			       enum kbase_mmu_op_type flush_op)
 | |
| {
 | |
| 	kbase_mmu_sync_pgd_cpu(kbdev, handle, size);
 | |
| 	kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Definitions:
 | |
|  * - PGD: Page Directory.
 | |
|  * - PTE: Page Table Entry. A 64bit value pointing to the next
 | |
|  *        level of translation
 | |
|  * - ATE: Address Translation Entry. A 64bit value pointing to
 | |
|  *        a 4kB physical page.
 | |
|  */
 | |
| 
 | |
| static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 					   u64 vpfn, struct tagged_addr *phys, size_t nr,
 | |
| 					   unsigned long flags, int group_id, u64 *dirty_pgds);
 | |
| 
 | |
| /**
 | |
|  * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
 | |
|  *                                           free memory of the page directories
 | |
|  *
 | |
|  * @kbdev:    Device pointer.
 | |
|  * @mmut:     GPU MMU page table.
 | |
|  * @pgds:     Physical addresses of page directories to be freed.
 | |
|  * @vpfn:     The virtual page frame number.
 | |
|  * @level:    The level of MMU page table that needs to be updated.
 | |
|  * @flush_op: The type of MMU flush operation to perform.
 | |
|  * @dirty_pgds: Flags to track every level where a PGD has been updated.
 | |
|  * @as_nr:     GPU address space number for which invalidate is required.
 | |
|  */
 | |
| static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 | |
| 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
 | |
| 						  u64 vpfn, int level,
 | |
| 						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
 | |
| 						  int as_nr);
 | |
| 
 | |
| static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 | |
| {
 | |
| 	atomic_sub(1, &kbdev->memdev.used_pages);
 | |
| 
 | |
| 	/* If MMU tables belong to a context then pages will have been accounted
 | |
| 	 * against it, so we must decrement the usage counts here.
 | |
| 	 */
 | |
| 	if (mmut->kctx) {
 | |
| 		kbase_process_page_usage_dec(mmut->kctx, 1);
 | |
| 		atomic_sub(1, &mmut->kctx->used_pages);
 | |
| 	}
 | |
| 
 | |
| 	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
 | |
| }
 | |
| 
 | |
| static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
 | |
| 					       struct kbase_mmu_table *mmut, struct page *p)
 | |
| {
 | |
| 	struct kbase_page_metadata *page_md = kbase_page_private(p);
 | |
| 	bool page_is_isolated = false;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	if (!kbase_is_page_migration_enabled())
 | |
| 		return false;
 | |
| 
 | |
| 	spin_lock(&page_md->migrate_lock);
 | |
| 	if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) {
 | |
| 		WARN_ON_ONCE(!mmut->kctx);
 | |
| 		if (IS_PAGE_ISOLATED(page_md->status)) {
 | |
| 			page_md->status =
 | |
| 				PAGE_STATUS_SET(page_md->status, FREE_PT_ISOLATED_IN_PROGRESS);
 | |
| 			page_md->data.free_pt_isolated.kbdev = kbdev;
 | |
| 			page_is_isolated = true;
 | |
| 		} else {
 | |
| 			page_md->status = PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
 | |
| 		}
 | |
| 	} else if ((PAGE_STATUS_GET(page_md->status) == FREE_IN_PROGRESS) ||
 | |
| 		   (PAGE_STATUS_GET(page_md->status) == ALLOCATE_IN_PROGRESS)) {
 | |
| 		/* Nothing to do - fall through */
 | |
| 	} else {
 | |
| 		WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
 | |
| 	}
 | |
| 	spin_unlock(&page_md->migrate_lock);
 | |
| 
 | |
| 	if (unlikely(page_is_isolated)) {
 | |
| 		/* Do the CPU cache flush and accounting here for the isolated
 | |
| 		 * PGD page, which is done inside kbase_mmu_free_pgd() for the
 | |
| 		 * PGD page that did not get isolated.
 | |
| 		 */
 | |
| 		dma_sync_single_for_device(kbdev->dev, pgd_dma_addr(p, page_to_phys(p)), PAGE_SIZE,
 | |
| 					   DMA_BIDIRECTIONAL);
 | |
| 		kbase_mmu_account_freed_pgd(kbdev, mmut);
 | |
| 	}
 | |
| 
 | |
| 	return page_is_isolated;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kbase_mmu_free_pgd() - Free memory of the page directory
 | |
|  *
 | |
|  * @kbdev:   Device pointer.
 | |
|  * @mmut:    GPU MMU page table.
 | |
|  * @pgd:     Physical address of page directory to be freed.
 | |
|  *
 | |
|  * This function is supposed to be called with mmu_lock held and after
 | |
|  * ensuring that the GPU won't be able to access the page.
 | |
|  */
 | |
| static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 			       phys_addr_t pgd)
 | |
| {
 | |
| 	struct page *p;
 | |
| 	bool page_is_isolated = false;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	p = pfn_to_page(PFN_DOWN(pgd));
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 	if (free_pgd_sub_page(pgd)) {
 | |
| 		mmut->num_free_pgd_sub_pages++;
 | |
| 		mmut->last_freed_pgd_page = p;
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	mmut->num_free_pgd_sub_pages -= (GPU_PAGES_PER_CPU_PAGE - 1);
 | |
| 	if (p == mmut->last_freed_pgd_page)
 | |
| 		mmut->last_freed_pgd_page = NULL;
 | |
| 	if (p == mmut->last_allocated_pgd_page)
 | |
| 		mmut->last_allocated_pgd_page = NULL;
 | |
| 	free_pgd_page_metadata(kbdev, p);
 | |
| #endif
 | |
| 	page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
 | |
| 
 | |
| 	if (likely(!page_is_isolated)) {
 | |
| 		kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
 | |
| 		kbase_mmu_account_freed_pgd(kbdev, mmut);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list
 | |
|  *
 | |
|  * @kbdev:          Device pointer.
 | |
|  * @mmut:           GPU MMU page table.
 | |
|  *
 | |
|  * This function will call kbase_mmu_free_pgd() on each page directory page
 | |
|  * present in the list of free PGDs inside @mmut.
 | |
|  *
 | |
|  * The function is supposed to be called after the GPU cache and MMU TLB has
 | |
|  * been invalidated post the teardown loop.
 | |
|  *
 | |
|  * The mmu_lock shall be held prior to calling the function.
 | |
|  */
 | |
| static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 | |
| {
 | |
| 	size_t i;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
 | |
| 		kbase_mmu_free_pgd(kbdev, mmut, mmut->scratch_mem.free_pgds.pgds[i]);
 | |
| 
 | |
| 	mmut->scratch_mem.free_pgds.head_index = 0;
 | |
| }
 | |
| 
 | |
| static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, phys_addr_t pgd)
 | |
| {
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
 | |
| 		return;
 | |
| 
 | |
| 	mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = pgd;
 | |
| }
 | |
| 
 | |
| static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
 | |
| {
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	mmut->scratch_mem.free_pgds.head_index = 0;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
 | |
|  *                               a region on a GPU page fault
 | |
|  * @kbdev:         KBase device
 | |
|  * @reg:           The region that will be backed with more pages
 | |
|  * @fault_rel_pfn: PFN of the fault relative to the start of the region
 | |
|  *
 | |
|  * This calculates how much to increase the backing of a region by, based on
 | |
|  * where a GPU page fault occurred and the flags in the region.
 | |
|  *
 | |
|  * This can be more than the minimum number of pages that would reach
 | |
|  * @fault_rel_pfn, for example to reduce the overall rate of page fault
 | |
|  * interrupts on a region, or to ensure that the end address is aligned.
 | |
|  *
 | |
|  * Return: the number of backed pages to increase by
 | |
|  */
 | |
| static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, struct kbase_va_region *reg,
 | |
| 					size_t fault_rel_pfn)
 | |
| {
 | |
| 	size_t multiple = reg->extension;
 | |
| 	size_t reg_current_size = kbase_reg_current_backed_size(reg);
 | |
| 	size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
 | |
| 	size_t remainder;
 | |
| 
 | |
| 	if (!multiple) {
 | |
| 		dev_warn(
 | |
| 			kbdev->dev,
 | |
| 			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW",
 | |
| 			((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
 | |
| 		return minimum_extra;
 | |
| 	}
 | |
| 
 | |
| 	/* Calculate the remainder to subtract from minimum_extra to make it
 | |
| 	 * the desired (rounded down) multiple of the extension.
 | |
| 	 * Depending on reg's flags, the base used for calculating multiples is
 | |
| 	 * different
 | |
| 	 */
 | |
| 
 | |
| 	/* multiple is based from the current backed size, even if the
 | |
| 	 * current backed size/pfn for end of committed memory are not
 | |
| 	 * themselves aligned to multiple
 | |
| 	 */
 | |
| 	remainder = minimum_extra % multiple;
 | |
| 
 | |
| #if !MALI_USE_CSF
 | |
| 	if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
 | |
| 		/* multiple is based from the top of the initial commit, which
 | |
| 		 * has been allocated in such a way that (start_pfn +
 | |
| 		 * initial_commit) is already aligned to multiple. Hence the
 | |
| 		 * pfn for the end of committed memory will also be aligned to
 | |
| 		 * multiple
 | |
| 		 */
 | |
| 		size_t initial_commit = reg->initial_commit;
 | |
| 
 | |
| 		if (fault_rel_pfn < initial_commit) {
 | |
| 			/* this case is just to catch in case it's been
 | |
| 			 * recommitted by userspace to be smaller than the
 | |
| 			 * initial commit
 | |
| 			 */
 | |
| 			minimum_extra = initial_commit - reg_current_size;
 | |
| 			remainder = 0;
 | |
| 		} else {
 | |
| 			/* same as calculating
 | |
| 			 * (fault_rel_pfn - initial_commit + 1)
 | |
| 			 */
 | |
| 			size_t pages_after_initial =
 | |
| 				minimum_extra + reg_current_size - initial_commit;
 | |
| 
 | |
| 			remainder = pages_after_initial % multiple;
 | |
| 		}
 | |
| 	}
 | |
| #endif /* !MALI_USE_CSF */
 | |
| 
 | |
| 	if (remainder == 0)
 | |
| 		return minimum_extra;
 | |
| 
 | |
| 	return minimum_extra + multiple - remainder;
 | |
| }
 | |
| 
 | |
| #ifdef CONFIG_MALI_CINSTR_GWT
 | |
| static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
 | |
| 						   struct kbase_as *faulting_as, u64 start_pfn,
 | |
| 						   size_t nr, u32 kctx_id, u64 dirty_pgds)
 | |
| {
 | |
| 	/* Calls to this function are inherently synchronous, with respect to
 | |
| 	 * MMU operations.
 | |
| 	 */
 | |
| 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
 | |
| 	struct kbase_mmu_hw_op_param op_param;
 | |
| 	unsigned long irq_flags;
 | |
| 	int ret = 0;
 | |
| 
 | |
| 	kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 
 | |
| 	/* flush L2 and unlock the VA (resumes the MMU) */
 | |
| 	op_param.vpfn = start_pfn;
 | |
| 	op_param.nr = nr;
 | |
| 	op_param.op = KBASE_MMU_OP_FLUSH_PT;
 | |
| 	op_param.kctx_id = kctx_id;
 | |
| 	op_param.mmu_sync_info = mmu_sync_info;
 | |
| 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 | |
| 	if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
 | |
| 		op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds);
 | |
| 		ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
 | |
| 	} else {
 | |
| 		ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
 | |
| 	}
 | |
| 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 | |
| 
 | |
| 	if (ret)
 | |
| 		dev_err(kbdev->dev,
 | |
| 			"Flush for GPU page fault due to write access did not complete");
 | |
| 
 | |
| 	kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| }
 | |
| 
 | |
| static void set_gwt_element_page_addr_and_size(struct kbasep_gwt_list_element *element,
 | |
| 					       u64 fault_page_addr, struct tagged_addr fault_phys)
 | |
| {
 | |
| 	u64 fault_pfn = fault_page_addr >> PAGE_SHIFT;
 | |
| 	unsigned int vindex = fault_pfn & (NUM_PAGES_IN_2MB_LARGE_PAGE - 1);
 | |
| 
 | |
| 	/* If the fault address lies within a 2MB page, then consider
 | |
| 	 * the whole 2MB page for dumping to avoid incomplete dumps.
 | |
| 	 */
 | |
| 	if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) {
 | |
| 		element->page_addr = fault_page_addr & ~(SZ_2M - 1UL);
 | |
| 		element->num_pages = NUM_PAGES_IN_2MB_LARGE_PAGE;
 | |
| 	} else {
 | |
| 		element->page_addr = fault_page_addr;
 | |
| 		element->num_pages = 1;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 | |
| 					     struct kbase_as *faulting_as)
 | |
| {
 | |
| 	struct kbasep_gwt_list_element *pos;
 | |
| 	struct kbase_va_region *region;
 | |
| 	struct kbase_device *kbdev;
 | |
| 	struct tagged_addr *fault_phys_addr;
 | |
| 	struct kbase_fault *fault;
 | |
| 	u64 fault_pfn, pfn_offset;
 | |
| 	unsigned int as_no;
 | |
| 	u64 dirty_pgds = 0;
 | |
| 
 | |
| 	as_no = faulting_as->number;
 | |
| 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
 | |
| 	fault = &faulting_as->pf_data;
 | |
| 	fault_pfn = fault->addr >> PAGE_SHIFT;
 | |
| 
 | |
| 	kbase_gpu_vm_lock(kctx);
 | |
| 
 | |
| 	/* Find region and check if it should be writable. */
 | |
| 	region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr);
 | |
| 	if (kbase_is_region_invalid_or_free(region)) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 						"Memory is not mapped on the GPU",
 | |
| 						&faulting_as->pf_data);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	if (!(region->flags & KBASE_REG_GPU_WR)) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 						"Region does not have write permissions",
 | |
| 						&faulting_as->pf_data);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	if (unlikely(region->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(
 | |
| 			kctx, faulting_as, "Unexpected write permission fault on an alias region",
 | |
| 			&faulting_as->pf_data);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	pfn_offset = fault_pfn - region->start_pfn;
 | |
| 	fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset];
 | |
| 
 | |
| 	/* Capture addresses of faulting write location
 | |
| 	 * for job dumping if write tracking is enabled.
 | |
| 	 */
 | |
| 	if (kctx->gwt_enabled) {
 | |
| 		u64 fault_page_addr = fault->addr & PAGE_MASK;
 | |
| 		bool found = false;
 | |
| 		/* Check if this write was already handled. */
 | |
| 		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
 | |
| 			if (fault_page_addr == pos->page_addr) {
 | |
| 				found = true;
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if (!found) {
 | |
| 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
 | |
| 			if (pos) {
 | |
| 				pos->region = region;
 | |
| 				set_gwt_element_page_addr_and_size(pos, fault_page_addr,
 | |
| 								   *fault_phys_addr);
 | |
| 				list_add(&pos->link, &kctx->gwt_current_list);
 | |
| 			} else {
 | |
| 				dev_warn(kbdev->dev, "kmalloc failure");
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Now make this faulting page writable to GPU. */
 | |
| 	kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1,
 | |
| 					region->flags, region->gpu_alloc->group_id, &dirty_pgds);
 | |
| 
 | |
| 	kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1, kctx->id,
 | |
| 					       dirty_pgds);
 | |
| 
 | |
| 	kbase_gpu_vm_unlock(kctx);
 | |
| }
 | |
| 
 | |
| static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
 | |
| 						  struct kbase_as *faulting_as)
 | |
| {
 | |
| 	struct kbase_fault *fault = &faulting_as->pf_data;
 | |
| 
 | |
| 	switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) {
 | |
| 	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
 | |
| 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
 | |
| 		kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
 | |
| 		break;
 | |
| 	case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE:
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Execute Permission fault",
 | |
| 						fault);
 | |
| 		break;
 | |
| 	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Read Permission fault", fault);
 | |
| 		break;
 | |
| 	default:
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Unknown Permission fault",
 | |
| 						fault);
 | |
| 		break;
 | |
| 	}
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /**
 | |
|  * estimate_pool_space_required - Determine how much a pool should be grown by to support a future
 | |
|  * allocation
 | |
|  * @pool:           The memory pool to check, including its linked pools
 | |
|  * @pages_required: Number of small pages require for the pool to support a future allocation
 | |
|  *
 | |
|  * The value returned is accounting for the size of @pool and the size of each memory pool linked to
 | |
|  * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to
 | |
|  * allocate from.
 | |
|  *
 | |
|  * Note: this is only an estimate, because even during the calculation the memory pool(s) involved
 | |
|  * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an
 | |
|  * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller
 | |
|  * should keep attempting an allocation and then re-growing with a new value queried form this
 | |
|  * function until the allocation succeeds.
 | |
|  *
 | |
|  * Return: an estimate of the amount of extra small pages in @pool that are required to satisfy an
 | |
|  * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the
 | |
|  * allocation.
 | |
|  */
 | |
| static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required)
 | |
| {
 | |
| 	size_t pages_still_required;
 | |
| 
 | |
| 	for (pages_still_required = pages_required; pool != NULL && pages_still_required;
 | |
| 	     pool = pool->next_pool) {
 | |
| 		size_t pool_size_small;
 | |
| 
 | |
| 		kbase_mem_pool_lock(pool);
 | |
| 
 | |
| 		pool_size_small = kbase_mem_pool_size(pool) << pool->order;
 | |
| 		if (pool_size_small >= pages_still_required)
 | |
| 			pages_still_required = 0;
 | |
| 		else
 | |
| 			pages_still_required -= pool_size_small;
 | |
| 
 | |
| 		kbase_mem_pool_unlock(pool);
 | |
| 	}
 | |
| 	return pages_still_required;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * page_fault_try_alloc - Try to allocate memory from a context pool
 | |
|  * @kctx:          Context pointer
 | |
|  * @region:        Region to grow
 | |
|  * @new_pages:     Number of small pages to allocate
 | |
|  * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be
 | |
|  *                 either small or 2 MiB pages, depending on the number of pages requested.
 | |
|  * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for
 | |
|  *                 pool of small pages.
 | |
|  * @fallback_to_small:  Whether fallback to small pages or not
 | |
|  * @prealloc_sas:  Pointer to kbase_sub_alloc structures
 | |
|  *
 | |
|  * This function will try to allocate as many pages as possible from the context pool, then if
 | |
|  * required will try to allocate the remaining pages from the device pool.
 | |
|  *
 | |
|  * This function will not allocate any new memory beyond that is already present in the context or
 | |
|  * device pools. This is because it is intended to be called whilst the thread has acquired the
 | |
|  * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is
 | |
|  * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close().
 | |
|  *
 | |
|  * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB
 | |
|  * pages, otherwise it will be a count of small pages.
 | |
|  *
 | |
|  * Return: true if successful, false on failure
 | |
|  */
 | |
| static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_region *region,
 | |
| 				 size_t new_pages, size_t *pages_to_grow, bool *grow_2mb_pool,
 | |
| 				 bool fallback_to_small, struct kbase_sub_alloc **prealloc_sas)
 | |
| {
 | |
| 	size_t total_gpu_pages_alloced = 0;
 | |
| 	size_t total_cpu_pages_alloced = 0;
 | |
| 	struct kbase_mem_pool *pool, *root_pool;
 | |
| 	bool alloc_failed = false;
 | |
| 	size_t pages_still_required;
 | |
| 	size_t total_mempools_free_small = 0;
 | |
| 
 | |
| 	lockdep_assert_held(&kctx->reg_lock);
 | |
| 	lockdep_assert_held(&kctx->mem_partials_lock);
 | |
| 
 | |
| 	if (WARN_ON(region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
 | |
| 		/* Do not try to grow the memory pool */
 | |
| 		*pages_to_grow = 0;
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE &&
 | |
| 	    !fallback_to_small) {
 | |
| 		root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
 | |
| 		*grow_2mb_pool = true;
 | |
| 	} else {
 | |
| 		root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
 | |
| 		*grow_2mb_pool = false;
 | |
| 	}
 | |
| 
 | |
| 	if (region->gpu_alloc != region->cpu_alloc)
 | |
| 		new_pages *= 2;
 | |
| 
 | |
| 	/* Determine how many pages are in the pools before trying to allocate.
 | |
| 	 * Don't attempt to allocate & free if the allocation can't succeed.
 | |
| 	 */
 | |
| 	pages_still_required = estimate_pool_space_required(root_pool, new_pages);
 | |
| 
 | |
| 	if (pages_still_required) {
 | |
| 		/* Insufficient pages in pools. Don't try to allocate - just
 | |
| 		 * request a grow.
 | |
| 		 */
 | |
| 		*pages_to_grow = pages_still_required;
 | |
| 
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	/* Since we're not holding any of the mempool locks, the amount of memory in the pools may
 | |
| 	 * change between the above estimate and the actual allocation.
 | |
| 	 */
 | |
| 	pages_still_required = new_pages;
 | |
| 	for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) {
 | |
| 		size_t pool_size_small;
 | |
| 		size_t pages_to_alloc_small;
 | |
| 		size_t pages_to_alloc_small_per_alloc;
 | |
| 
 | |
| 		kbase_mem_pool_lock(pool);
 | |
| 
 | |
| 		/* Allocate as much as possible from this pool*/
 | |
| 		pool_size_small = kbase_mem_pool_size(pool) << pool->order;
 | |
| 		total_mempools_free_small += pool_size_small;
 | |
| 		pages_to_alloc_small = MIN(pages_still_required, pool_size_small);
 | |
| 		if (region->gpu_alloc == region->cpu_alloc)
 | |
| 			pages_to_alloc_small_per_alloc = pages_to_alloc_small;
 | |
| 		else
 | |
| 			pages_to_alloc_small_per_alloc = pages_to_alloc_small >> 1;
 | |
| 
 | |
| 		if (pages_to_alloc_small) {
 | |
| 			struct tagged_addr *gpu_pages = kbase_alloc_phy_pages_helper_locked(
 | |
| 				region->gpu_alloc, pool, pages_to_alloc_small_per_alloc,
 | |
| 				&prealloc_sas[0]);
 | |
| 
 | |
| 			if (!gpu_pages)
 | |
| 				alloc_failed = true;
 | |
| 			else
 | |
| 				total_gpu_pages_alloced += pages_to_alloc_small_per_alloc;
 | |
| 
 | |
| 			if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) {
 | |
| 				struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked(
 | |
| 					region->cpu_alloc, pool, pages_to_alloc_small_per_alloc,
 | |
| 					&prealloc_sas[1]);
 | |
| 
 | |
| 				if (!cpu_pages)
 | |
| 					alloc_failed = true;
 | |
| 				else
 | |
| 					total_cpu_pages_alloced += pages_to_alloc_small_per_alloc;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		kbase_mem_pool_unlock(pool);
 | |
| 
 | |
| 		if (alloc_failed) {
 | |
| 			WARN_ON(!pages_still_required);
 | |
| 			WARN_ON(pages_to_alloc_small >= pages_still_required);
 | |
| 			WARN_ON(pages_to_alloc_small_per_alloc >= pages_still_required);
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		pages_still_required -= pages_to_alloc_small;
 | |
| 	}
 | |
| 
 | |
| 	if (pages_still_required) {
 | |
| 		/* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation,
 | |
| 		 * so must in any case use kbase_free_phy_pages_helper() rather than
 | |
| 		 * kbase_free_phy_pages_helper_locked()
 | |
| 		 */
 | |
| 		if (total_gpu_pages_alloced > 0)
 | |
| 			kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced);
 | |
| 		if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0)
 | |
| 			kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced);
 | |
| 
 | |
| 		if (alloc_failed) {
 | |
| 			/* Note that in allocating from the above memory pools, we always ensure
 | |
| 			 * never to request more than is available in each pool with the pool's
 | |
| 			 * lock held. Hence failing to allocate in such situations would be unusual
 | |
| 			 * and we should cancel the growth instead (as re-growing the memory pool
 | |
| 			 * might not fix the situation)
 | |
| 			 */
 | |
| 			dev_warn(
 | |
| 				kctx->kbdev->dev,
 | |
| 				"Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available",
 | |
| 				new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced,
 | |
| 				total_mempools_free_small);
 | |
| 			*pages_to_grow = 0;
 | |
| 		} else {
 | |
| 			/* Tell the caller to try to grow the memory pool
 | |
| 			 *
 | |
| 			 * Freeing pages above may have spilled or returned them to the OS, so we
 | |
| 			 * have to take into account how many are still in the pool before giving a
 | |
| 			 * new estimate for growth required of the pool. We can just re-estimate a
 | |
| 			 * new value.
 | |
| 			 */
 | |
| 			pages_still_required = estimate_pool_space_required(root_pool, new_pages);
 | |
| 			if (pages_still_required) {
 | |
| 				*pages_to_grow = pages_still_required;
 | |
| 			} else {
 | |
| 				/* It's possible another thread could've grown the pool to be just
 | |
| 				 * big enough after we rolled back the allocation. Request at least
 | |
| 				 * one more page to ensure the caller doesn't fail the growth by
 | |
| 				 * conflating it with the alloc_failed case above
 | |
| 				 */
 | |
| 				*pages_to_grow = 1u;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	/* Allocation was successful. No pages to grow, return success. */
 | |
| 	*pages_to_grow = 0;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| void kbase_mmu_page_fault_worker(struct work_struct *data)
 | |
| {
 | |
| 	u64 fault_pfn;
 | |
| 	u32 fault_status;
 | |
| 	size_t new_pages;
 | |
| 	size_t fault_rel_pfn;
 | |
| 	struct kbase_as *faulting_as;
 | |
| 	unsigned int as_no;
 | |
| 	struct kbase_context *kctx;
 | |
| 	struct kbase_device *kbdev;
 | |
| 	struct kbase_va_region *region;
 | |
| 	struct kbase_fault *fault;
 | |
| 	int err;
 | |
| 	bool grown = false;
 | |
| 	size_t pages_to_grow;
 | |
| 	bool grow_2mb_pool = false;
 | |
| 	bool fallback_to_small = false;
 | |
| 	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
 | |
| 	int i;
 | |
| 	size_t current_backed_size;
 | |
| #if MALI_JIT_PRESSURE_LIMIT_BASE
 | |
| 	size_t pages_trimmed = 0;
 | |
| #endif
 | |
| 	unsigned long hwaccess_flags;
 | |
| 
 | |
| 	/* Calls to this function are inherently synchronous, with respect to
 | |
| 	 * MMU operations.
 | |
| 	 */
 | |
| 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
 | |
| 
 | |
| 	faulting_as = container_of(data, struct kbase_as, work_pagefault);
 | |
| 	fault = &faulting_as->pf_data;
 | |
| 	fault_pfn = fault->addr >> PAGE_SHIFT;
 | |
| 	as_no = faulting_as->number;
 | |
| 
 | |
| 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
 | |
| 	dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %u", __func__, (void *)data,
 | |
| 		fault_pfn, as_no);
 | |
| 
 | |
| 	/* Grab the context that was already refcounted in kbase_mmu_interrupt()
 | |
| 	 * Therefore, it cannot be scheduled out of this AS until we explicitly
 | |
| 	 * release it
 | |
| 	 */
 | |
| 	kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no);
 | |
| 	if (!kctx) {
 | |
| 		atomic_dec(&kbdev->faults_pending);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
 | |
| 
 | |
| #if MALI_JIT_PRESSURE_LIMIT_BASE
 | |
| #if !MALI_USE_CSF
 | |
| 	mutex_lock(&kctx->jctx.lock);
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
| 	/* check if we still have GPU */
 | |
| 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
 | |
| 		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	if (unlikely(fault->protected_mode)) {
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault);
 | |
| 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	fault_status = fault->status;
 | |
| 	switch (AS_FAULTSTATUS_EXCEPTION_TYPE_GET(fault_status)) {
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_3:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_4:
 | |
| #if !MALI_USE_CSF
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_IDENTITY:
 | |
| #endif
 | |
| 		/* need to check against the region to handle this one */
 | |
| 		break;
 | |
| 
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_0:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_PERMISSION_FAULT_3:
 | |
| #ifdef CONFIG_MALI_CINSTR_GWT
 | |
| 		/* If GWT was ever enabled then we need to handle
 | |
| 		 * write fault pages even if the feature was disabled later.
 | |
| 		 */
 | |
| 		if (kctx->gwt_was_enabled) {
 | |
| 			kbase_gpu_mmu_handle_permission_fault(kctx, faulting_as);
 | |
| 			goto fault_done;
 | |
| 		}
 | |
| #endif
 | |
| 
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault);
 | |
| 		goto fault_done;
 | |
| 
 | |
| #if !MALI_USE_CSF
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_0:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSTAB_BUS_FAULT_3:
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Translation table bus fault",
 | |
| 						fault);
 | |
| 		goto fault_done;
 | |
| #endif
 | |
| 
 | |
| #if !MALI_USE_CSF
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_0:
 | |
| 		fallthrough;
 | |
| #endif
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ACCESS_FLAG_3:
 | |
| 		/* nothing to do, but we don't expect this fault currently */
 | |
| 		dev_warn(kbdev->dev, "Access flag unexpectedly set");
 | |
| 		goto fault_done;
 | |
| 
 | |
| #if MALI_USE_CSF
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN:
 | |
| 		fallthrough;
 | |
| #else
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN0:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN3:
 | |
| 		fallthrough;
 | |
| #endif
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT0:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT3:
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Address size fault", fault);
 | |
| 		goto fault_done;
 | |
| 
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3:
 | |
| #if !MALI_USE_CSF
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_0:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_1:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_2:
 | |
| 		fallthrough;
 | |
| 	case AS_FAULTSTATUS_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_NONCACHEABLE_3:
 | |
| #endif
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory attributes fault",
 | |
| 						fault);
 | |
| 		goto fault_done;
 | |
| 
 | |
| 	default:
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Unknown fault code", fault);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| page_fault_retry:
 | |
| 	if (kbase_is_large_pages_enabled() && !fallback_to_small) {
 | |
| 		/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
 | |
| 		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
 | |
| 			if (!prealloc_sas[i]) {
 | |
| 				prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
 | |
| 
 | |
| 				if (!prealloc_sas[i]) {
 | |
| 					kbase_mmu_report_fault_and_kill(
 | |
| 						kctx, faulting_as,
 | |
| 						"Failed pre-allocating memory for sub-allocations' metadata",
 | |
| 						fault);
 | |
| 					goto fault_done;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* so we have a translation fault,
 | |
| 	 * let's see if it is for growable memory
 | |
| 	 */
 | |
| 	kbase_gpu_vm_lock(kctx);
 | |
| 
 | |
| 	region = kbase_region_tracker_find_region_enclosing_address(kctx, fault->addr);
 | |
| 	if (kbase_is_region_invalid_or_free(region)) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 						"Memory is not mapped on the GPU", fault);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 						"DMA-BUF is not mapped on the GPU", fault);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	if (unlikely(region->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 						"Unexpected page fault on an alias region",
 | |
| 						&faulting_as->pf_data);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Bad physical memory group ID",
 | |
| 						fault);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	if ((region->flags & GROWABLE_FLAGS_REQUIRED) != GROWABLE_FLAGS_REQUIRED) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Memory is not growable", fault);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	if ((region->flags & KBASE_REG_DONT_NEED)) {
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 						"Don't need memory can't be grown", fault);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == AS_FAULTSTATUS_ACCESS_TYPE_READ)
 | |
| 		dev_warn(kbdev->dev, "Grow on pagefault while reading");
 | |
| 
 | |
| 	/* find the size we need to grow it by
 | |
| 	 * we know the result fit in a size_t due to
 | |
| 	 * kbase_region_tracker_find_region_enclosing_address
 | |
| 	 * validating the fault_address to be within a size_t from the start_pfn
 | |
| 	 */
 | |
| 	fault_rel_pfn = fault_pfn - region->start_pfn;
 | |
| 
 | |
| 	current_backed_size = kbase_reg_current_backed_size(region);
 | |
| 
 | |
| 	if (fault_rel_pfn < current_backed_size) {
 | |
| 		struct kbase_mmu_hw_op_param op_param;
 | |
| 
 | |
| 		dev_dbg(kbdev->dev,
 | |
| 			"Page fault @ VA 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
 | |
| 			fault->addr, region->start_pfn, region->start_pfn + current_backed_size);
 | |
| 
 | |
| 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 		/* [1] in case another page fault occurred while we were
 | |
| 		 * handling the (duplicate) page fault we need to ensure we
 | |
| 		 * don't loose the other page fault as result of us clearing
 | |
| 		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
 | |
| 		 * an UNLOCK command that will retry any stalled memory
 | |
| 		 * transaction (which should cause the other page fault to be
 | |
| 		 * raised again).
 | |
| 		 */
 | |
| 		op_param.mmu_sync_info = mmu_sync_info;
 | |
| 		op_param.kctx_id = kctx->id;
 | |
| 		/* Usually it is safe to skip the MMU cache invalidate for all levels
 | |
| 		 * in case of duplicate page faults. But for the pathological scenario
 | |
| 		 * where the faulty VA gets mapped by the time page fault worker runs it
 | |
| 		 * becomes imperative to invalidate MMU cache for all levels, otherwise
 | |
| 		 * there is a possibility of repeated page faults on GPUs which supports
 | |
| 		 * fine grained MMU cache invalidation.
 | |
| 		 */
 | |
| 		op_param.flush_skip_levels = 0x0;
 | |
| 		op_param.vpfn = fault_pfn;
 | |
| 		op_param.nr = 1;
 | |
| 		spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 		err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param);
 | |
| 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 
 | |
| 		if (err) {
 | |
| 			dev_err(kbdev->dev,
 | |
| 				"Invalidation for MMU did not complete on handling page fault @ VA 0x%llx",
 | |
| 				fault->addr);
 | |
| 		}
 | |
| 
 | |
| 		kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
 | |
| 
 | |
| 	/* cap to max vsize */
 | |
| 	new_pages = min(new_pages, region->nr_pages - current_backed_size);
 | |
| 	dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages);
 | |
| 
 | |
| 	if (new_pages == 0) {
 | |
| 		struct kbase_mmu_hw_op_param op_param;
 | |
| 
 | |
| 		/* Duplicate of a fault we've already handled, nothing to do */
 | |
| 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 
 | |
| 		/* See comment [1] about UNLOCK usage */
 | |
| 		op_param.mmu_sync_info = mmu_sync_info;
 | |
| 		op_param.kctx_id = kctx->id;
 | |
| 		/* Usually it is safe to skip the MMU cache invalidate for all levels
 | |
| 		 * in case of duplicate page faults. But for the pathological scenario
 | |
| 		 * where the faulty VA gets mapped by the time page fault worker runs it
 | |
| 		 * becomes imperative to invalidate MMU cache for all levels, otherwise
 | |
| 		 * there is a possibility of repeated page faults on GPUs which supports
 | |
| 		 * fine grained MMU cache invalidation.
 | |
| 		 */
 | |
| 		op_param.flush_skip_levels = 0x0;
 | |
| 		op_param.vpfn = fault_pfn;
 | |
| 		op_param.nr = 1;
 | |
| 		spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 		err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param);
 | |
| 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 
 | |
| 		if (err) {
 | |
| 			dev_err(kbdev->dev,
 | |
| 				"Invalidation for MMU did not complete on handling page fault @ VA 0x%llx",
 | |
| 				fault->addr);
 | |
| 		}
 | |
| 
 | |
| 		kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 		goto fault_done;
 | |
| 	}
 | |
| 
 | |
| 	pages_to_grow = 0;
 | |
| 
 | |
| #if MALI_JIT_PRESSURE_LIMIT_BASE
 | |
| 	if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) {
 | |
| 		kbase_jit_request_phys_increase(kctx, new_pages);
 | |
| 		pages_trimmed = new_pages;
 | |
| 	}
 | |
| #endif
 | |
| 
 | |
| 	spin_lock(&kctx->mem_partials_lock);
 | |
| 	grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, &grow_2mb_pool,
 | |
| 				     fallback_to_small, prealloc_sas);
 | |
| 	spin_unlock(&kctx->mem_partials_lock);
 | |
| 
 | |
| 	if (grown) {
 | |
| 		u64 dirty_pgds = 0;
 | |
| 		u64 pfn_offset;
 | |
| 		struct kbase_mmu_hw_op_param op_param;
 | |
| 
 | |
| 		/* alloc success */
 | |
| 		WARN_ON(kbase_reg_current_backed_size(region) > region->nr_pages);
 | |
| 
 | |
| 		/* set up the new pages */
 | |
| 		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
 | |
| 		/*
 | |
| 		 * Note:
 | |
| 		 * Issuing an MMU operation will unlock the MMU and cause the
 | |
| 		 * translation to be replayed. If the page insertion fails then
 | |
| 		 * rather then trying to continue the context should be killed
 | |
| 		 * so the no_flush version of insert_pages is used which allows
 | |
| 		 * us to unlock the MMU as we see fit.
 | |
| 		 */
 | |
| 		err = mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
 | |
| 						&kbase_get_gpu_phy_pages(region)[pfn_offset],
 | |
| 						new_pages, region->flags,
 | |
| 						region->gpu_alloc->group_id, &dirty_pgds, region,
 | |
| 						false);
 | |
| 		if (err) {
 | |
| 			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
 | |
| 			if (region->gpu_alloc != region->cpu_alloc)
 | |
| 				kbase_free_phy_pages_helper(region->cpu_alloc, new_pages);
 | |
| 			kbase_gpu_vm_unlock(kctx);
 | |
| 			/* The locked VA region will be unlocked and the cache
 | |
| 			 * invalidated in here
 | |
| 			 */
 | |
| 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 							"Page table update failure", fault);
 | |
| 			goto fault_done;
 | |
| 		}
 | |
| 		KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages);
 | |
| 		if (kbase_reg_is_valid(kbdev, MMU_AS_OFFSET(as_no, FAULTEXTRA)))
 | |
| 			trace_mali_mmu_page_fault_extra_grow(region, fault, new_pages);
 | |
| 		else
 | |
| 			trace_mali_mmu_page_fault_grow(region, fault, new_pages);
 | |
| 
 | |
| 		/* AS transaction begin */
 | |
| 
 | |
| 		/* clear MMU interrupt - this needs to be done after updating
 | |
| 		 * the page tables but before issuing a FLUSH command. The
 | |
| 		 * FLUSH cmd has a side effect that it restarts stalled memory
 | |
| 		 * transactions in other address spaces which may cause
 | |
| 		 * another fault to occur. If we didn't clear the interrupt at
 | |
| 		 * this stage a new IRQ might not be raised when the GPU finds
 | |
| 		 * a MMU IRQ is already pending.
 | |
| 		 */
 | |
| 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 
 | |
| 		op_param.vpfn = region->start_pfn + pfn_offset;
 | |
| 		op_param.nr = new_pages;
 | |
| 		op_param.op = KBASE_MMU_OP_FLUSH_PT;
 | |
| 		op_param.kctx_id = kctx->id;
 | |
| 		op_param.mmu_sync_info = mmu_sync_info;
 | |
| 		spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 		if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
 | |
| 			/* Unlock to invalidate the TLB (and resume the MMU) */
 | |
| 			op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds);
 | |
| 			err = kbase_mmu_hw_do_unlock(kbdev, faulting_as, &op_param);
 | |
| 		} else {
 | |
| 			/* flush L2 and unlock the VA (resumes the MMU) */
 | |
| 			err = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
 | |
| 		}
 | |
| 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 
 | |
| 		if (err) {
 | |
| 			dev_err(kbdev->dev,
 | |
| 				"Flush for GPU page table update did not complete on handling page fault @ VA 0x%llx",
 | |
| 				fault->addr);
 | |
| 		}
 | |
| 
 | |
| 		/* AS transaction end */
 | |
| 
 | |
| 		/* reenable this in the mask */
 | |
| 		kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE);
 | |
| 
 | |
| #ifdef CONFIG_MALI_CINSTR_GWT
 | |
| 		if (kctx->gwt_enabled) {
 | |
| 			/* GWT also tracks growable regions. */
 | |
| 			struct kbasep_gwt_list_element *pos;
 | |
| 
 | |
| 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
 | |
| 			if (pos) {
 | |
| 				pos->region = region;
 | |
| 				pos->page_addr = (region->start_pfn + pfn_offset) << PAGE_SHIFT;
 | |
| 				pos->num_pages = new_pages;
 | |
| 				list_add(&pos->link, &kctx->gwt_current_list);
 | |
| 			} else {
 | |
| 				dev_warn(kbdev->dev, "kmalloc failure");
 | |
| 			}
 | |
| 		}
 | |
| #endif
 | |
| 
 | |
| #if MALI_JIT_PRESSURE_LIMIT_BASE
 | |
| 		if (pages_trimmed) {
 | |
| 			kbase_jit_done_phys_increase(kctx, pages_trimmed);
 | |
| 			pages_trimmed = 0;
 | |
| 		}
 | |
| #endif
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 	} else {
 | |
| 		int ret = -ENOMEM;
 | |
| 		const u8 group_id = region->gpu_alloc->group_id;
 | |
| 
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 
 | |
| 		/* If the memory pool was insufficient then grow it and retry.
 | |
| 		 * Otherwise fail the allocation.
 | |
| 		 */
 | |
| 		if (pages_to_grow > 0) {
 | |
| 			if (kbase_is_large_pages_enabled() && grow_2mb_pool) {
 | |
| 				/* Round page requirement up to nearest 2 MB */
 | |
| 				struct kbase_mem_pool *const lp_mem_pool =
 | |
| 					&kctx->mem_pools.large[group_id];
 | |
| 
 | |
| 				pages_to_grow =
 | |
| 					(pages_to_grow + ((1u << lp_mem_pool->order) - 1u)) >>
 | |
| 					lp_mem_pool->order;
 | |
| 
 | |
| 				ret = kbase_mem_pool_grow(lp_mem_pool, pages_to_grow, kctx->task);
 | |
| 				/* Retry handling the fault with small pages if required
 | |
| 				 * number of 2MB pages couldn't be allocated.
 | |
| 				 */
 | |
| 				if (ret < 0) {
 | |
| 					fallback_to_small = true;
 | |
| 					dev_dbg(kbdev->dev,
 | |
| 						"No room for 2MB pages, fallback to small pages");
 | |
| 					goto page_fault_retry;
 | |
| 				}
 | |
| 			} else {
 | |
| 				struct kbase_mem_pool *const mem_pool =
 | |
| 					&kctx->mem_pools.small[group_id];
 | |
| 
 | |
| 				ret = kbase_mem_pool_grow(mem_pool, pages_to_grow, kctx->task);
 | |
| 			}
 | |
| 		}
 | |
| 		if (ret < 0) {
 | |
| 			/* failed to extend, handle as a normal PF */
 | |
| 			if (unlikely(ret == -EPERM))
 | |
| 				kbase_ctx_flag_set(kctx, KCTX_PAGE_FAULT_REPORT_SKIP);
 | |
| 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 | |
| 							"Page allocation failure", fault);
 | |
| 		} else {
 | |
| 			dev_dbg(kbdev->dev, "Try again after pool_grow");
 | |
| 			goto page_fault_retry;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| fault_done:
 | |
| #if MALI_JIT_PRESSURE_LIMIT_BASE
 | |
| 	if (pages_trimmed) {
 | |
| 		kbase_gpu_vm_lock(kctx);
 | |
| 		kbase_jit_done_phys_increase(kctx, pages_trimmed);
 | |
| 		kbase_gpu_vm_unlock(kctx);
 | |
| 	}
 | |
| #if !MALI_USE_CSF
 | |
| 	mutex_unlock(&kctx->jctx.lock);
 | |
| #endif
 | |
| #endif
 | |
| 
 | |
| 	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
 | |
| 		kfree(prealloc_sas[i]);
 | |
| 
 | |
| 	/*
 | |
| 	 * By this point, the fault was handled in some way,
 | |
| 	 * so release the ctx refcount
 | |
| 	 */
 | |
| 	release_ctx(kbdev, kctx);
 | |
| 
 | |
| 	atomic_dec(&kbdev->faults_pending);
 | |
| 	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kbase_mmu_alloc_pgd() - Allocate a PGD
 | |
|  *
 | |
|  * @kbdev:    Pointer to the instance of a kbase device.
 | |
|  * @mmut:     Structure holding details of the MMU table for a kcontext.
 | |
|  *
 | |
|  * A 4KB sized PGD page is allocated for the PGD from the memory pool if PAGE_SIZE is 4KB.
 | |
|  * Otherwise PGD is sub-allocated from a page that is allocated from the memory pool or
 | |
|  * from one of the pages earlier allocated for the PGD of @mmut.
 | |
|  *
 | |
|  * Return:    Physical address of the allocated PGD.
 | |
|  */
 | |
| static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 | |
| {
 | |
| 	u64 *page;
 | |
| 	struct page *p;
 | |
| 	phys_addr_t pgd;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 	pgd = allocate_from_pgd_pages_list(mmut);
 | |
| 	if (pgd != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 		return pgd;
 | |
| #endif
 | |
| 
 | |
| 	p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
 | |
| 	if (!p)
 | |
| 		return KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| 
 | |
| 	page = kbase_kmap(p);
 | |
| 
 | |
| 	if (page == NULL)
 | |
| 		goto alloc_free;
 | |
| 
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 	if (!alloc_pgd_page_metadata(kbdev, mmut, p)) {
 | |
| 		kbase_kunmap(p, page);
 | |
| 		goto alloc_free;
 | |
| 	}
 | |
| 	mmut->num_free_pgd_sub_pages += (GPU_PAGES_PER_CPU_PAGE - 1);
 | |
| 	mmut->last_allocated_pgd_page = p;
 | |
| #endif
 | |
| 
 | |
| 	pgd = page_to_phys(p);
 | |
| 
 | |
| 	/* If the MMU tables belong to a context then account the memory usage
 | |
| 	 * to that context, otherwise the MMU tables are device wide and are
 | |
| 	 * only accounted to the device.
 | |
| 	 */
 | |
| 	if (mmut->kctx) {
 | |
| 		int new_page_count;
 | |
| 
 | |
| 		new_page_count = atomic_add_return(1, &mmut->kctx->used_pages);
 | |
| 		KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, (u64)new_page_count);
 | |
| 		kbase_process_page_usage_inc(mmut->kctx, 1);
 | |
| 	}
 | |
| 
 | |
| 	atomic_add(1, &kbdev->memdev.used_pages);
 | |
| 
 | |
| 	kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
 | |
| 
 | |
| 	kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES * GPU_PAGES_PER_CPU_PAGE);
 | |
| 
 | |
| 	/* As this page is newly created, therefore there is no content to
 | |
| 	 * clean or invalidate in the GPU caches.
 | |
| 	 */
 | |
| 	kbase_mmu_sync_pgd_cpu(kbdev, pgd_dma_addr(p, pgd), PAGE_SIZE);
 | |
| 
 | |
| 	kbase_kunmap(p, page);
 | |
| 	return pgd;
 | |
| 
 | |
| alloc_free:
 | |
| 	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false);
 | |
| 
 | |
| 	return KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1
 | |
|  *
 | |
|  * @kbdev:    Device pointer.
 | |
|  * @mmut:     GPU MMU page table.
 | |
|  * @pgd:      Physical addresse of level N page directory.
 | |
|  * @vpfn:     The virtual page frame number, in GPU_PAGE_SIZE units.
 | |
|  * @level:    The level of MMU page table (N).
 | |
|  *
 | |
|  * Return:
 | |
|  * * 0 - OK
 | |
|  * * -EFAULT - level N+1 PGD does not exist
 | |
|  * * -EINVAL - kmap() failed for level N PGD PFN
 | |
|  */
 | |
| static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 			    phys_addr_t *pgd, u64 vpfn, int level)
 | |
| {
 | |
| 	u64 *page;
 | |
| 	phys_addr_t target_pgd;
 | |
| 	struct page *p;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	/*
 | |
| 	 * Architecture spec defines level-0 as being the top-most.
 | |
| 	 * This is a bit unfortunate here, but we keep the same convention.
 | |
| 	 */
 | |
| 	vpfn >>= (3 - level) * 9;
 | |
| 	vpfn &= 0x1FF;
 | |
| 
 | |
| 	p = pfn_to_page(PFN_DOWN(*pgd));
 | |
| 	page = kmap_pgd(p, *pgd);
 | |
| 	if (page == NULL) {
 | |
| 		dev_err(kbdev->dev, "%s: kmap failure", __func__);
 | |
| 		return -EINVAL;
 | |
| 	}
 | |
| 
 | |
| 	if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
 | |
| 		dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
 | |
| 			vpfn);
 | |
| 		kunmap_pgd(p, page);
 | |
| 		return -EFAULT;
 | |
| 	} else {
 | |
| 		target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
 | |
| 			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 | |
| 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
 | |
| 	}
 | |
| 
 | |
| 	kunmap_pgd(p, page);
 | |
| 	*pgd = target_pgd;
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level
 | |
|  *
 | |
|  * @kbdev:    Device pointer.
 | |
|  * @mmut:     GPU MMU page table.
 | |
|  * @vpfn:     The virtual page frame number, in GPU_PAGE_SIZE units.
 | |
|  * @in_level:     The level of MMU page table (N).
 | |
|  * @out_level:    Set to the level of the lowest valid PGD found on success.
 | |
|  *                Invalid on error.
 | |
|  * @out_pgd:      Set to the lowest valid PGD found on success.
 | |
|  *                Invalid on error.
 | |
|  *
 | |
|  * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or
 | |
|  * closest to in_level
 | |
|  *
 | |
|  * Terminology:
 | |
|  * Level-0 = Top-level = highest
 | |
|  * Level-3 = Bottom-level = lowest
 | |
|  *
 | |
|  * Return:
 | |
|  * * 0 - OK
 | |
|  * * -EINVAL - kmap() failed during page table walk.
 | |
|  */
 | |
| static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				    u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd)
 | |
| {
 | |
| 	phys_addr_t pgd;
 | |
| 	int l;
 | |
| 	int err = 0;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 	pgd = mmut->pgd;
 | |
| 
 | |
| 	for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) {
 | |
| 		err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
 | |
| 
 | |
| 		/* Handle failure condition */
 | |
| 		if (err) {
 | |
| 			dev_dbg(kbdev->dev,
 | |
| 				"%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
 | |
| 				__func__, l + 1);
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	*out_pgd = pgd;
 | |
| 	*out_level = l;
 | |
| 
 | |
| 	/* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid.
 | |
| 	 * This implies that we have found the lowest valid pgd. Reset the error code.
 | |
| 	 */
 | |
| 	if (err == -EFAULT)
 | |
| 		err = 0;
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO);
 | |
| 
 | |
| /*
 | |
|  * On success, sets out_pgd to the PGD for the specified level of translation
 | |
|  * Returns -EFAULT if a valid PGD is not found
 | |
|  */
 | |
| static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 | |
| 				int level, phys_addr_t *out_pgd)
 | |
| {
 | |
| 	phys_addr_t pgd;
 | |
| 	int l;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 	pgd = mmut->pgd;
 | |
| 
 | |
| 	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
 | |
| 		int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
 | |
| 		/* Handle failure condition */
 | |
| 		if (err) {
 | |
| 			dev_err(kbdev->dev,
 | |
| 				"%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
 | |
| 				__func__, l + 1);
 | |
| 			return err;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	*out_pgd = pgd;
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 | |
| 					      struct kbase_mmu_table *mmut, u64 from_vpfn,
 | |
| 					      u64 to_vpfn, u64 *dirty_pgds,
 | |
| 					      struct tagged_addr *phys, bool ignore_page_migration)
 | |
| {
 | |
| 	u64 vpfn = from_vpfn;
 | |
| 	struct kbase_mmu_mode const *mmu_mode;
 | |
| 
 | |
| 	/* Both from_vpfn and to_vpfn are in GPU_PAGE_SIZE units */
 | |
| 
 | |
| 	/* 64-bit address range is the max */
 | |
| 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / GPU_PAGE_SIZE));
 | |
| 	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	mmu_mode = kbdev->mmu_mode;
 | |
| 	kbase_mmu_reset_free_pgds_list(mmut);
 | |
| 
 | |
| 	while (vpfn < to_vpfn) {
 | |
| 		unsigned int idx = vpfn & 0x1FF;
 | |
| 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
 | |
| 		unsigned int pcount = 0;
 | |
| 		unsigned int left = to_vpfn - vpfn;
 | |
| 		int level;
 | |
| 		u64 *page;
 | |
| 		phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
 | |
| 		phys_addr_t pgd = mmut->pgd;
 | |
| 		struct page *p = phys_to_page(pgd);
 | |
| 
 | |
| 		register unsigned int num_of_valid_entries;
 | |
| 
 | |
| 		if (count > left)
 | |
| 			count = left;
 | |
| 
 | |
| 		/* need to check if this is a 2MB page or a small page */
 | |
| 		for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
 | |
| 			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
 | |
| 			pgds[level] = pgd;
 | |
| 			page = kmap_pgd(p, pgd);
 | |
| 			if (mmu_mode->ate_is_valid(page[idx], level))
 | |
| 				break; /* keep the mapping */
 | |
| 			kunmap_pgd(p, page);
 | |
| 			pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 | |
| 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx]));
 | |
| 			p = phys_to_page(pgd);
 | |
| 		}
 | |
| 
 | |
| 		switch (level) {
 | |
| 		case MIDGARD_MMU_LEVEL(2):
 | |
| 			/* remap to single entry to update */
 | |
| 			pcount = 1;
 | |
| 			break;
 | |
| 		case MIDGARD_MMU_BOTTOMLEVEL:
 | |
| 			/* page count is the same as the logical count */
 | |
| 			pcount = count;
 | |
| 			break;
 | |
| 		default:
 | |
| 			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level);
 | |
| 			goto next;
 | |
| 		}
 | |
| 
 | |
| 		if (dirty_pgds && pcount > 0)
 | |
| 			*dirty_pgds |= 1ULL << level;
 | |
| 
 | |
| 		num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
 | |
| 		if (WARN_ON_ONCE(num_of_valid_entries < pcount))
 | |
| 			num_of_valid_entries = 0;
 | |
| 		else
 | |
| 			num_of_valid_entries -= pcount;
 | |
| 
 | |
| 		/* Invalidate the entries we added */
 | |
| 		mmu_mode->entries_invalidate(&page[idx], pcount);
 | |
| 
 | |
| 		if (!num_of_valid_entries) {
 | |
| 			mmu_mode->set_num_valid_entries(page, 0);
 | |
| 
 | |
| 			kunmap_pgd(p, page);
 | |
| 
 | |
| 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1,
 | |
| 							      KBASE_MMU_OP_NONE, dirty_pgds, 0);
 | |
| 
 | |
| 			/* No CPU and GPU cache maintenance is done here as caller would do the
 | |
| 			 * complete flush of GPU cache and invalidation of TLB before the PGD
 | |
| 			 * page is freed. CPU cache flush would be done when the PGD page is
 | |
| 			 * returned to the memory pool.
 | |
| 			 */
 | |
| 
 | |
| 			kbase_mmu_add_to_free_pgds_list(mmut, pgd);
 | |
| 
 | |
| 			vpfn += count;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
 | |
| 
 | |
| 		/* MMU cache flush strategy is NONE because GPU cache maintenance is
 | |
| 		 * going to be done by the caller
 | |
| 		 */
 | |
| 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
 | |
| 				   pgd_dma_addr(p, pgd) + sizeof(u64) * idx, sizeof(u64) * pcount,
 | |
| 				   KBASE_MMU_OP_NONE);
 | |
| 		kunmap_pgd(p, page);
 | |
| next:
 | |
| 		vpfn += count;
 | |
| 	}
 | |
| 
 | |
| 	/* If page migration is enabled: the only way to recover from failure
 | |
| 	 * is to mark all pages as not movable. It is not predictable what's
 | |
| 	 * going to happen to these pages at this stage. They might return
 | |
| 	 * movable once they are returned to a memory pool.
 | |
| 	 */
 | |
| 	if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys &&
 | |
| 	    !is_huge(*phys) && !is_partial(*phys)) {
 | |
| 		const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE;
 | |
| 		u64 i;
 | |
| 
 | |
| 		for (i = 0; i < num_pages; i++) {
 | |
| 			struct page *phys_page = as_page(phys[i]);
 | |
| 			struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
 | |
| 
 | |
| 			if (page_md) {
 | |
| 				spin_lock(&page_md->migrate_lock);
 | |
| 				page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
 | |
| 				spin_unlock(&page_md->migrate_lock);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
 | |
| 					      struct kbase_mmu_table *mmut, const u64 vpfn,
 | |
| 					      size_t nr, u64 dirty_pgds,
 | |
| 					      enum kbase_caller_mmu_sync_info mmu_sync_info,
 | |
| 					      bool insert_pages_failed)
 | |
| {
 | |
| 	struct kbase_mmu_hw_op_param op_param;
 | |
| 	int as_nr = 0;
 | |
| 
 | |
| 	op_param.vpfn = vpfn;
 | |
| 	op_param.nr = nr;
 | |
| 	op_param.op = KBASE_MMU_OP_FLUSH_PT;
 | |
| 	op_param.mmu_sync_info = mmu_sync_info;
 | |
| 	op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF;
 | |
| 	op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds);
 | |
| 
 | |
| #if MALI_USE_CSF
 | |
| 	as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR;
 | |
| #else
 | |
| 	WARN_ON(!mmut->kctx);
 | |
| #endif
 | |
| 
 | |
| 	/* MMU cache flush strategy depends on whether GPU control commands for
 | |
| 	 * flushing physical address ranges are supported. The new physical pages
 | |
| 	 * are not present in GPU caches therefore they don't need any cache
 | |
| 	 * maintenance, but PGDs in the page table may or may not be created anew.
 | |
| 	 *
 | |
| 	 * Operations that affect the whole GPU cache shall only be done if it's
 | |
| 	 * impossible to update physical ranges.
 | |
| 	 *
 | |
| 	 * On GPUs where flushing by physical address range is supported,
 | |
| 	 * full cache flush is done when an error occurs during
 | |
| 	 * insert_pages() to keep the error handling simpler.
 | |
| 	 */
 | |
| 	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed)
 | |
| 		mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
 | |
| 	else
 | |
| 		mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * update_parent_pgds() - Updates the page table from bottom level towards
 | |
|  *                        the top level to insert a new ATE
 | |
|  *
 | |
|  * @kbdev:    Device pointer.
 | |
|  * @mmut:     GPU MMU page table.
 | |
|  * @cur_level:    The level of MMU page table where the ATE needs to be added.
 | |
|  *                The bottom PGD level.
 | |
|  * @insert_level: The level of MMU page table where the chain of newly allocated
 | |
|  *                PGDs needs to be linked-in/inserted.
 | |
|  * @insert_vpfn:  The virtual page frame number, in GPU_PAGE_SIZE units, for the ATE.
 | |
|  * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
 | |
|  *                  the physical addresses of newly allocated PGDs from index
 | |
|  *                  insert_level+1 to cur_level, and an existing PGD at index
 | |
|  *                  insert_level.
 | |
|  *
 | |
|  * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
 | |
|  * at insert_level which already exists in the MMU Page Tables. Migration status is also
 | |
|  * updated for all the newly allocated PGD pages.
 | |
|  *
 | |
|  * Return:
 | |
|  * * 0 - OK
 | |
|  * * -EFAULT - level N+1 PGD does not exist
 | |
|  * * -EINVAL - kmap() failed for level N PGD PFN
 | |
|  */
 | |
| static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 			      int cur_level, int insert_level, u64 insert_vpfn,
 | |
| 			      phys_addr_t *pgds_to_insert)
 | |
| {
 | |
| 	int pgd_index;
 | |
| 	int err = 0;
 | |
| 
 | |
| 	/* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1)
 | |
| 	 * Loop runs from the bottom-most to the top-most level so that all entries in the chain
 | |
| 	 * are valid when they are inserted into the MMU Page table via the insert_level PGD.
 | |
| 	 */
 | |
| 	for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) {
 | |
| 		int parent_index = pgd_index - 1;
 | |
| 		phys_addr_t parent_pgd = pgds_to_insert[parent_index];
 | |
| 		unsigned int current_valid_entries;
 | |
| 		u64 pte;
 | |
| 		phys_addr_t target_pgd = pgds_to_insert[pgd_index];
 | |
| 		u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
 | |
| 		struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
 | |
| 		u64 *parent_page_va;
 | |
| 
 | |
| 		if (WARN_ON_ONCE(target_pgd == KBASE_INVALID_PHYSICAL_ADDRESS)) {
 | |
| 			err = -EFAULT;
 | |
| 			goto failure_recovery;
 | |
| 		}
 | |
| 
 | |
| 		parent_page_va = kmap_pgd(parent_page, parent_pgd);
 | |
| 
 | |
| 		if (unlikely(parent_page_va == NULL)) {
 | |
| 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 | |
| 			err = -EINVAL;
 | |
| 			goto failure_recovery;
 | |
| 		}
 | |
| 
 | |
| 		current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
 | |
| 
 | |
| 		kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
 | |
| 		parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
 | |
| 			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, PBHA_ID_DEFAULT, PTE_FLAGS_NONE,
 | |
| 			parent_index, pte);
 | |
| 		kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
 | |
| 		kunmap_pgd(parent_page, parent_page_va);
 | |
| 
 | |
| 		if (parent_index != insert_level) {
 | |
| 			/* Newly allocated PGDs */
 | |
| 			kbase_mmu_sync_pgd_cpu(kbdev,
 | |
| 					       pgd_dma_addr(parent_page, parent_pgd) +
 | |
| 						       (parent_vpfn * sizeof(u64)),
 | |
| 					       sizeof(u64));
 | |
| 		} else {
 | |
| 			/* A new valid entry is added to an existing PGD. Perform the
 | |
| 			 * invalidate operation for GPU cache as it could be having a
 | |
| 			 * cacheline that contains the entry (in an invalid form).
 | |
| 			 */
 | |
| 			kbase_mmu_sync_pgd(
 | |
| 				kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
 | |
| 				pgd_dma_addr(parent_page, parent_pgd) + (parent_vpfn * sizeof(u64)),
 | |
| 				sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
 | |
| 		}
 | |
| 
 | |
| 		/* Update the new target_pgd page to its stable state */
 | |
| 		if (kbase_is_page_migration_enabled()) {
 | |
| 			struct kbase_page_metadata *page_md =
 | |
| 				kbase_page_private(phys_to_page(target_pgd));
 | |
| 
 | |
| 			spin_lock(&page_md->migrate_lock);
 | |
| 
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 			page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
 | |
| #else
 | |
| 			WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
 | |
| 				     IS_PAGE_ISOLATED(page_md->status));
 | |
| 
 | |
| 			if (mmut->kctx) {
 | |
| 				page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
 | |
| 				page_md->data.pt_mapped.mmut = mmut;
 | |
| 				page_md->data.pt_mapped.pgd_vpfn_level =
 | |
| 					PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index);
 | |
| 			} else {
 | |
| 				page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
 | |
| 			}
 | |
| #endif
 | |
| 
 | |
| 			spin_unlock(&page_md->migrate_lock);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return 0;
 | |
| 
 | |
| failure_recovery:
 | |
| 	/* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */
 | |
| 	for (; pgd_index < cur_level; pgd_index++) {
 | |
| 		phys_addr_t pgd = pgds_to_insert[pgd_index];
 | |
| 		struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
 | |
| 		u64 *pgd_page_va = kmap_pgd(pgd_page, pgd);
 | |
| 		u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
 | |
| 
 | |
| 		kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
 | |
| 		kunmap_pgd(pgd_page, pgd_page_va);
 | |
| 	}
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to
 | |
|  *                           level_high (inclusive)
 | |
|  *
 | |
|  * @kbdev:    Device pointer.
 | |
|  * @mmut:     GPU MMU page table.
 | |
|  * @level_low:  The lower bound for the levels for which the PGD allocs are required
 | |
|  * @level_high: The higher bound for the levels for which the PGD allocs are required
 | |
|  * @new_pgds:   Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
 | |
|  *              newly allocated PGD addresses to.
 | |
|  * @pool_grown: True if new PGDs required the memory pool to grow to allocate more pages,
 | |
|  *              or false otherwise
 | |
|  *
 | |
|  * Numerically, level_low < level_high, not to be confused with top level and
 | |
|  * bottom level concepts for MMU PGDs. They are only used as low and high bounds
 | |
|  * in an incrementing for-loop.
 | |
|  *
 | |
|  * Return:
 | |
|  * * 0 - OK
 | |
|  * * -ENOMEM - allocation failed for a PGD.
 | |
|  */
 | |
| static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				 phys_addr_t *new_pgds, int level_low, int level_high,
 | |
| 				 bool *pool_grown)
 | |
| {
 | |
| 	int err = 0;
 | |
| 	int i;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	*pool_grown = false;
 | |
| 	for (i = level_low; i <= level_high; i++) {
 | |
| 		if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 			continue;
 | |
| 		do {
 | |
| 			new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
 | |
| 			if (new_pgds[i] != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 				break;
 | |
| 			mutex_unlock(&mmut->mmu_lock);
 | |
| 			err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
 | |
| 						  (size_t)level_high, NULL);
 | |
| 			mutex_lock(&mmut->mmu_lock);
 | |
| 			if (err) {
 | |
| 				dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
 | |
| 					__func__, err);
 | |
| 				return err;
 | |
| 			}
 | |
| 			*pool_grown = true;
 | |
| 		} while (1);
 | |
| 	}
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
 | |
| 					struct tagged_addr phys, size_t nr, unsigned long flags,
 | |
| 					int const group_id,
 | |
| 					enum kbase_caller_mmu_sync_info mmu_sync_info,
 | |
| 					bool ignore_page_migration)
 | |
| {
 | |
| 	phys_addr_t pgd;
 | |
| 	u64 *pgd_page;
 | |
| 	u64 insert_vpfn = start_vpfn;
 | |
| 	size_t remain = nr;
 | |
| 	int err;
 | |
| 	struct kbase_device *kbdev;
 | |
| 	u64 dirty_pgds = 0;
 | |
| 	unsigned int i;
 | |
| 	phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
 | |
| 	enum kbase_mmu_op_type flush_op;
 | |
| 	struct kbase_mmu_table *mmut = &kctx->mmu;
 | |
| 	int l, cur_level, insert_level;
 | |
| 	const phys_addr_t base_phys_address = as_phys_addr_t(phys);
 | |
| 
 | |
| 	if (WARN_ON(kctx == NULL))
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	lockdep_assert_held(&kctx->reg_lock);
 | |
| 
 | |
| 	/* 64-bit address range is the max */
 | |
| 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
 | |
| 
 | |
| 	kbdev = kctx->kbdev;
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (nr == 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	/* Convert to GPU_PAGE_SIZE units. */
 | |
| 	insert_vpfn *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 	remain *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 
 | |
| 	/* If page migration is enabled, pages involved in multiple GPU mappings
 | |
| 	 * are always treated as not movable.
 | |
| 	 */
 | |
| 	if (kbase_is_page_migration_enabled() && !ignore_page_migration) {
 | |
| 		struct page *phys_page = as_page(phys);
 | |
| 		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
 | |
| 
 | |
| 		if (page_md) {
 | |
| 			spin_lock(&page_md->migrate_lock);
 | |
| 			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
 | |
| 			spin_unlock(&page_md->migrate_lock);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	mutex_lock(&mmut->mmu_lock);
 | |
| 
 | |
| 	while (remain) {
 | |
| 		unsigned int vindex = insert_vpfn & 0x1FF;
 | |
| 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
 | |
| 		struct page *p;
 | |
| 		register unsigned int num_of_valid_entries;
 | |
| 		bool newly_created_pgd = false;
 | |
| 		bool pool_grown;
 | |
| 
 | |
| 		if (count > remain)
 | |
| 			count = remain;
 | |
| 
 | |
| 		cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 | |
| 		insert_level = cur_level;
 | |
| 
 | |
| 		for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
 | |
| 			new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| 
 | |
| repeat_page_table_walk:
 | |
| 		/*
 | |
| 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 | |
| 		 * suboptimal. We don't have to re-parse the whole tree
 | |
| 		 * each time (just cache the l0-l2 sequence).
 | |
| 		 * On the other hand, it's only a gain when we map more than
 | |
| 		 * 256 pages at once (on average). Do we really care?
 | |
| 		 */
 | |
| 		/* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
 | |
| 		err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
 | |
| 					       &pgd);
 | |
| 
 | |
| 		if (err) {
 | |
| 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
 | |
| 				__func__, err);
 | |
| 			goto fail_unlock_free_pgds;
 | |
| 		}
 | |
| 
 | |
| 		/* No valid pgd at cur_level */
 | |
| 		if (insert_level != cur_level) {
 | |
| 			/* Allocate new pgds for all missing levels from the required level
 | |
| 			 * down to the lowest valid pgd at insert_level
 | |
| 			 */
 | |
| 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
 | |
| 						    cur_level, &pool_grown);
 | |
| 			if (err)
 | |
| 				goto fail_unlock_free_pgds;
 | |
| 
 | |
| 			if (pool_grown)
 | |
| 				goto repeat_page_table_walk;
 | |
| 
 | |
| 			newly_created_pgd = true;
 | |
| 
 | |
| 			new_pgds[insert_level] = pgd;
 | |
| 
 | |
| 			/* If we didn't find an existing valid pgd at cur_level,
 | |
| 			 * we've now allocated one. The ATE in the next step should
 | |
| 			 * be inserted in this newly allocated pgd.
 | |
| 			 */
 | |
| 			pgd = new_pgds[cur_level];
 | |
| 		}
 | |
| 
 | |
| 		p = pfn_to_page(PFN_DOWN(pgd));
 | |
| 
 | |
| 		pgd_page = kmap_pgd(p, pgd);
 | |
| 		if (!pgd_page) {
 | |
| 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 | |
| 			err = -ENOMEM;
 | |
| 
 | |
| 			goto fail_unlock_free_pgds;
 | |
| 		}
 | |
| 
 | |
| 		num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
 | |
| 
 | |
| 		for (i = 0; i < count; i += GPU_PAGES_PER_CPU_PAGE) {
 | |
| 			unsigned int j;
 | |
| 
 | |
| 			for (j = 0; j < GPU_PAGES_PER_CPU_PAGE; j++) {
 | |
| 				unsigned int ofs = vindex + i + j;
 | |
| 				phys_addr_t page_address = base_phys_address + (j * GPU_PAGE_SIZE);
 | |
| 
 | |
| 				/* Fail if the current page is a valid ATE entry */
 | |
| 				WARN_ON_ONCE((pgd_page[ofs] & 1UL));
 | |
| 				pgd_page[ofs] = kbase_mmu_create_ate(kbdev, as_tagged(page_address),
 | |
| 								     flags, MIDGARD_MMU_BOTTOMLEVEL,
 | |
| 								     group_id);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries + count);
 | |
| 
 | |
| 		dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL);
 | |
| 
 | |
| 		/* MMU cache flush operation here will depend on whether bottom level
 | |
| 		 * PGD is newly created or not.
 | |
| 		 *
 | |
| 		 * If bottom level PGD is newly created then no GPU cache maintenance is
 | |
| 		 * required as the PGD will not exist in GPU cache. Otherwise GPU cache
 | |
| 		 * maintenance is required for existing PGD.
 | |
| 		 */
 | |
| 		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
 | |
| 
 | |
| 		kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
 | |
| 				   pgd_dma_addr(p, pgd) + (vindex * sizeof(u64)),
 | |
| 				   count * sizeof(u64), flush_op);
 | |
| 
 | |
| 		if (newly_created_pgd) {
 | |
| 			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
 | |
| 						 new_pgds);
 | |
| 			if (err) {
 | |
| 				dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
 | |
| 					__func__, err);
 | |
| 
 | |
| 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
 | |
| 
 | |
| 				kunmap_pgd(p, pgd_page);
 | |
| 				goto fail_unlock_free_pgds;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		insert_vpfn += count;
 | |
| 		remain -= count;
 | |
| 		kunmap_pgd(p, pgd_page);
 | |
| 	}
 | |
| 
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 
 | |
| 	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
 | |
| 					  false);
 | |
| 
 | |
| 	return 0;
 | |
| 
 | |
| fail_unlock_free_pgds:
 | |
| 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
 | |
| 	for (l = cur_level; l > insert_level; l--)
 | |
| 		if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 			kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
 | |
| 
 | |
| 	if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
 | |
| 		/* Invalidate the pages we have partially completed */
 | |
| 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,
 | |
| 						  insert_vpfn, &dirty_pgds, NULL, true);
 | |
| 	}
 | |
| 
 | |
| 	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
 | |
| 					  true);
 | |
| 	kbase_mmu_free_pgds_list(kbdev, mmut);
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
 | |
| 					  struct tagged_addr phys, size_t nr, unsigned long flags,
 | |
| 					  int const group_id,
 | |
| 					  enum kbase_caller_mmu_sync_info mmu_sync_info)
 | |
| {
 | |
| 	/* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
 | |
| 	return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
 | |
| 					    false);
 | |
| }
 | |
| 
 | |
| int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
 | |
| 					 struct tagged_addr phys, size_t nr, unsigned long flags,
 | |
| 					 int const group_id,
 | |
| 					 enum kbase_caller_mmu_sync_info mmu_sync_info)
 | |
| {
 | |
| 	/* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
 | |
| 	return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
 | |
| 					    false);
 | |
| }
 | |
| 
 | |
| static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
 | |
| 						   struct kbase_va_region *reg,
 | |
| 						   struct kbase_mmu_table *mmut, const u64 vpfn)
 | |
| {
 | |
| 	struct page *phys_page = as_page(phys);
 | |
| 	struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
 | |
| 
 | |
| 	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
 | |
| 		return;
 | |
| 
 | |
| 	spin_lock(&page_md->migrate_lock);
 | |
| 
 | |
| 	/* If no GPU va region is given: the metadata provided are
 | |
| 	 * invalid.
 | |
| 	 *
 | |
| 	 * If the page is already allocated and mapped: this is
 | |
| 	 * an additional GPU mapping, probably to create a memory
 | |
| 	 * alias, which means it is no longer possible to migrate
 | |
| 	 * the page easily because tracking all the GPU mappings
 | |
| 	 * would be too costly.
 | |
| 	 *
 | |
| 	 * In any case: the page becomes not movable. It is kept
 | |
| 	 * alive, but attempts to migrate it will fail. The page
 | |
| 	 * will be freed if it is still not movable when it returns
 | |
| 	 * to a memory pool. Notice that the movable flag is not
 | |
| 	 * cleared because that would require taking the page lock.
 | |
| 	 */
 | |
| 	if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) {
 | |
| 		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
 | |
| 	} else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) {
 | |
| 		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED);
 | |
| 		page_md->data.mapped.reg = reg;
 | |
| 		page_md->data.mapped.mmut = mmut;
 | |
| 		page_md->data.mapped.vpfn = vpfn;
 | |
| 	}
 | |
| 
 | |
| 	spin_unlock(&page_md->migrate_lock);
 | |
| }
 | |
| 
 | |
| static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
 | |
| 						     struct tagged_addr *phys, size_t requested_nr)
 | |
| {
 | |
| 	size_t i;
 | |
| 
 | |
| 	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
 | |
| 		return;
 | |
| 
 | |
| 	for (i = 0; i < requested_nr; i++) {
 | |
| 		struct page *phys_page = as_page(phys[i]);
 | |
| 		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
 | |
| 
 | |
| 		/* Skip the small page that is part of a large page, as the large page is
 | |
| 		 * excluded from the migration process.
 | |
| 		 */
 | |
| 		if (is_huge(phys[i]) || is_partial(phys[i]))
 | |
| 			continue;
 | |
| 
 | |
| 		if (page_md) {
 | |
| 			u8 status;
 | |
| 
 | |
| 			spin_lock(&page_md->migrate_lock);
 | |
| 			status = PAGE_STATUS_GET(page_md->status);
 | |
| 
 | |
| 			if (status == ALLOCATED_MAPPED) {
 | |
| 				if (IS_PAGE_ISOLATED(page_md->status)) {
 | |
| 					page_md->status = PAGE_STATUS_SET(
 | |
| 						page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS);
 | |
| 					page_md->data.free_isolated.kbdev = kbdev;
 | |
| 					/* At this point, we still have a reference
 | |
| 					 * to the page via its page migration metadata,
 | |
| 					 * and any page with the FREE_ISOLATED_IN_PROGRESS
 | |
| 					 * status will subsequently be freed in either
 | |
| 					 * kbase_page_migrate() or kbase_page_putback()
 | |
| 					 */
 | |
| 					phys[i] = as_tagged(KBASE_INVALID_PHYSICAL_ADDRESS);
 | |
| 				} else
 | |
| 					page_md->status = PAGE_STATUS_SET(page_md->status,
 | |
| 									  (u8)FREE_IN_PROGRESS);
 | |
| 			}
 | |
| 
 | |
| 			spin_unlock(&page_md->migrate_lock);
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, struct tagged_addr const phy,
 | |
| 			 unsigned long const flags, int const level, int const group_id)
 | |
| {
 | |
| 	u64 entry;
 | |
| 	unsigned int pte_flags = 0;
 | |
| 
 | |
| 	kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level);
 | |
| 
 | |
| 	if ((flags & KBASE_REG_GPU_CACHED) && !(flags & KBASE_REG_CPU_CACHED))
 | |
| 		pte_flags |= BIT(MMA_VIOLATION);
 | |
| 
 | |
| 	return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, (unsigned int)group_id,
 | |
| 						      kbdev->mma_wa_id, pte_flags, level, entry);
 | |
| }
 | |
| 
 | |
| static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				     u64 start_vpfn, struct tagged_addr *phys, size_t nr,
 | |
| 				     unsigned long flags, int const group_id, u64 *dirty_pgds,
 | |
| 				     struct kbase_va_region *reg, bool ignore_page_migration)
 | |
| {
 | |
| 	phys_addr_t pgd;
 | |
| 	u64 *pgd_page;
 | |
| 	u64 insert_vpfn = start_vpfn;
 | |
| 	size_t remain = nr;
 | |
| 	int err;
 | |
| 	struct kbase_mmu_mode const *mmu_mode;
 | |
| 	unsigned int i;
 | |
| 	phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
 | |
| 	int l, cur_level, insert_level;
 | |
| 	struct tagged_addr *start_phys = phys;
 | |
| 
 | |
| 	if (mmut->kctx)
 | |
| 		lockdep_assert_held(&mmut->kctx->reg_lock);
 | |
| 
 | |
| 	/* Note that 0 is a valid start_vpfn */
 | |
| 	/* 64-bit address range is the max */
 | |
| 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
 | |
| 
 | |
| 	mmu_mode = kbdev->mmu_mode;
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (nr == 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	/* Convert to GPU_PAGE_SIZE units. */
 | |
| 	insert_vpfn *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 	remain *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 	mutex_lock(&mmut->mmu_lock);
 | |
| 
 | |
| 	while (remain) {
 | |
| 		unsigned int vindex = insert_vpfn & 0x1FF;
 | |
| 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
 | |
| 		struct page *p;
 | |
| 		register unsigned int num_of_valid_entries;
 | |
| 		bool newly_created_pgd = false;
 | |
| 		enum kbase_mmu_op_type flush_op;
 | |
| 		bool pool_grown;
 | |
| 
 | |
| 		if (count > remain)
 | |
| 			count = remain;
 | |
| 
 | |
| 		/* There are 3 conditions to satisfy in order to create a level 2 ATE:
 | |
| 		 *
 | |
| 		 * - The GPU VA is aligned to 2 MB.
 | |
| 		 * - The physical address is tagged as the head of a 2 MB region,
 | |
| 		 *   which guarantees a contiguous physical address range.
 | |
| 		 * - There are actually 2 MB of virtual and physical pages to map,
 | |
| 		 *   i.e. 512 entries for the MMU page table.
 | |
| 		 */
 | |
| 		if (!vindex && is_huge_head(*phys) && (count == KBASE_MMU_PAGE_ENTRIES))
 | |
| 			cur_level = MIDGARD_MMU_LEVEL(2);
 | |
| 		else
 | |
| 			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 | |
| 
 | |
| 		insert_level = cur_level;
 | |
| 
 | |
| 		for (l = MIDGARD_MMU_TOPLEVEL + 1; l <= cur_level; l++)
 | |
| 			new_pgds[l] = KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| 
 | |
| repeat_page_table_walk:
 | |
| 		/*
 | |
| 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 | |
| 		 * suboptimal. We don't have to re-parse the whole tree
 | |
| 		 * each time (just cache the l0-l2 sequence).
 | |
| 		 * On the other hand, it's only a gain when we map more than
 | |
| 		 * 256 pages at once (on average). Do we really care?
 | |
| 		 */
 | |
| 		/* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
 | |
| 		err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
 | |
| 					       &pgd);
 | |
| 
 | |
| 		if (err) {
 | |
| 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
 | |
| 				__func__, err);
 | |
| 			goto fail_unlock_free_pgds;
 | |
| 		}
 | |
| 
 | |
| 		/* No valid pgd at cur_level */
 | |
| 		if (insert_level != cur_level) {
 | |
| 			/* Allocate new pgds for all missing levels from the required level
 | |
| 			 * down to the lowest valid pgd at insert_level
 | |
| 			 */
 | |
| 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
 | |
| 						    cur_level, &pool_grown);
 | |
| 			if (err)
 | |
| 				goto fail_unlock_free_pgds;
 | |
| 
 | |
| 			if (pool_grown)
 | |
| 				goto repeat_page_table_walk;
 | |
| 
 | |
| 			newly_created_pgd = true;
 | |
| 
 | |
| 			new_pgds[insert_level] = pgd;
 | |
| 
 | |
| 			/* If we didn't find an existing valid pgd at cur_level,
 | |
| 			 * we've now allocated one. The ATE in the next step should
 | |
| 			 * be inserted in this newly allocated pgd.
 | |
| 			 */
 | |
| 			pgd = new_pgds[cur_level];
 | |
| 		}
 | |
| 
 | |
| 		p = pfn_to_page(PFN_DOWN(pgd));
 | |
| 		pgd_page = kmap_pgd(p, pgd);
 | |
| 
 | |
| 		if (!pgd_page) {
 | |
| 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 | |
| 			err = -ENOMEM;
 | |
| 
 | |
| 			goto fail_unlock_free_pgds;
 | |
| 		}
 | |
| 
 | |
| 		num_of_valid_entries = mmu_mode->get_num_valid_entries(pgd_page);
 | |
| 
 | |
| 		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
 | |
| 			int level_index = (insert_vpfn >> 9) & 0x1FF;
 | |
| 			pgd_page[level_index] =
 | |
| 				kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id);
 | |
| 
 | |
| 			num_of_valid_entries++;
 | |
| 		} else {
 | |
| 			for (i = 0; i < count; i += GPU_PAGES_PER_CPU_PAGE) {
 | |
| 				struct tagged_addr base_tagged_addr =
 | |
| 					phys[i / GPU_PAGES_PER_CPU_PAGE];
 | |
| 				phys_addr_t base_phys_address = as_phys_addr_t(base_tagged_addr);
 | |
| 				unsigned int j;
 | |
| 
 | |
| 				for (j = 0; j < GPU_PAGES_PER_CPU_PAGE; j++) {
 | |
| 					unsigned int ofs = vindex + i + j;
 | |
| 					u64 *target = &pgd_page[ofs];
 | |
| 					phys_addr_t page_address =
 | |
| 						base_phys_address + (j * GPU_PAGE_SIZE);
 | |
| 
 | |
| 					/* Warn if the current page is a valid ATE
 | |
| 					 * entry. The page table shouldn't have anything
 | |
| 					 * in the place where we are trying to put a
 | |
| 					 * new entry. Modification to page table entries
 | |
| 					 * should be performed with
 | |
| 					 * kbase_mmu_update_pages()
 | |
| 					 */
 | |
| 					WARN_ON_ONCE((*target & 1UL) != 0);
 | |
| 
 | |
| 					*target = kbase_mmu_create_ate(kbdev,
 | |
| 								       as_tagged(page_address),
 | |
| 								       flags, cur_level, group_id);
 | |
| 				}
 | |
| 
 | |
| 				/* If page migration is enabled, this is the right time
 | |
| 				 * to update the status of the page.
 | |
| 				 */
 | |
| 				if (kbase_is_page_migration_enabled() && !ignore_page_migration &&
 | |
| 				    !is_huge(base_tagged_addr) && !is_partial(base_tagged_addr))
 | |
| 					kbase_mmu_progress_migration_on_insert(
 | |
| 						base_tagged_addr, reg, mmut, insert_vpfn + i);
 | |
| 			}
 | |
| 			num_of_valid_entries += count;
 | |
| 		}
 | |
| 
 | |
| 		mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
 | |
| 
 | |
| 		if (dirty_pgds)
 | |
| 			*dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level);
 | |
| 
 | |
| 		/* MMU cache flush operation here will depend on whether bottom level
 | |
| 		 * PGD is newly created or not.
 | |
| 		 *
 | |
| 		 * If bottom level PGD is newly created then no GPU cache maintenance is
 | |
| 		 * required as the PGD will not exist in GPU cache. Otherwise GPU cache
 | |
| 		 * maintenance is required for existing PGD.
 | |
| 		 */
 | |
| 		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
 | |
| 
 | |
| 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
 | |
| 				   pgd_dma_addr(p, pgd) + (vindex * sizeof(u64)),
 | |
| 				   count * sizeof(u64), flush_op);
 | |
| 
 | |
| 		if (newly_created_pgd) {
 | |
| 			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
 | |
| 						 new_pgds);
 | |
| 			if (err) {
 | |
| 				dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
 | |
| 					__func__, err);
 | |
| 
 | |
| 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
 | |
| 
 | |
| 				kunmap_pgd(p, pgd_page);
 | |
| 				goto fail_unlock_free_pgds;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		phys += (count / GPU_PAGES_PER_CPU_PAGE);
 | |
| 		insert_vpfn += count;
 | |
| 		remain -= count;
 | |
| 		kunmap_pgd(p, pgd_page);
 | |
| 	}
 | |
| 
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 
 | |
| 	return 0;
 | |
| 
 | |
| fail_unlock_free_pgds:
 | |
| 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
 | |
| 	for (l = cur_level; l > insert_level; l--)
 | |
| 		if (new_pgds[l] != KBASE_INVALID_PHYSICAL_ADDRESS)
 | |
| 			kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
 | |
| 
 | |
| 	if (insert_vpfn != (start_vpfn * GPU_PAGES_PER_CPU_PAGE)) {
 | |
| 		/* Invalidate the pages we have partially completed */
 | |
| 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn * GPU_PAGES_PER_CPU_PAGE,
 | |
| 						  insert_vpfn, dirty_pgds, start_phys,
 | |
| 						  ignore_page_migration);
 | |
| 	}
 | |
| 
 | |
| 	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
 | |
| 					  dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
 | |
| 	kbase_mmu_free_pgds_list(kbdev, mmut);
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
 | |
| 				    unsigned long flags, int const group_id, u64 *dirty_pgds,
 | |
| 				    struct kbase_va_region *reg)
 | |
| {
 | |
| 	int err;
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (nr == 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	err = mmu_insert_pages_no_flush(kbdev, mmut, start_vpfn, phys, nr, flags, group_id,
 | |
| 					dirty_pgds, reg, false);
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
 | |
|  * number 'as_nr'.
 | |
|  */
 | |
| int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 | |
| 			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
 | |
| 			   int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
 | |
| 			   struct kbase_va_region *reg)
 | |
| {
 | |
| 	int err;
 | |
| 	u64 dirty_pgds = 0;
 | |
| 
 | |
| 	CSTD_UNUSED(as_nr);
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (nr == 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
 | |
| 					reg, false);
 | |
| 	if (err)
 | |
| 		return err;
 | |
| 
 | |
| 	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
 | |
| KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO);
 | |
| 
 | |
| int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
 | |
| 					      struct kbase_mmu_table *mmut, u64 vpfn,
 | |
| 					      struct tagged_addr *phys, size_t nr,
 | |
| 					      unsigned long flags, int as_nr, int const group_id,
 | |
| 					      enum kbase_caller_mmu_sync_info mmu_sync_info,
 | |
| 					      struct kbase_va_region *reg)
 | |
| {
 | |
| 	int err;
 | |
| 	u64 dirty_pgds = 0;
 | |
| 
 | |
| 	CSTD_UNUSED(as_nr);
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (nr == 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	/* Imported allocations don't have metadata and therefore always ignore the
 | |
| 	 * page migration logic.
 | |
| 	 */
 | |
| 	err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
 | |
| 					reg, true);
 | |
| 	if (err)
 | |
| 		return err;
 | |
| 
 | |
| 	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				   u64 vpfn, struct tagged_addr *phys, size_t nr,
 | |
| 				   unsigned long flags, int as_nr, int const group_id,
 | |
| 				   enum kbase_caller_mmu_sync_info mmu_sync_info,
 | |
| 				   struct kbase_va_region *reg)
 | |
| {
 | |
| 	int err;
 | |
| 	u64 dirty_pgds = 0;
 | |
| 
 | |
| 	CSTD_UNUSED(as_nr);
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (nr == 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	/* Memory aliases are always built on top of existing allocations,
 | |
| 	 * therefore the state of physical pages shall be updated.
 | |
| 	 */
 | |
| 	err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
 | |
| 					reg, false);
 | |
| 	if (err)
 | |
| 		return err;
 | |
| 
 | |
| 	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO);
 | |
| 
 | |
| void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr)
 | |
| {
 | |
| 	lockdep_assert_held(&kbdev->hwaccess_lock);
 | |
| 	lockdep_assert_held(&kbdev->mmu_hw_mutex);
 | |
| 	KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
 | |
| 
 | |
| 	kbdev->mmu_mode->update(kbdev, mmut, as_nr);
 | |
| }
 | |
| KBASE_EXPORT_TEST_API(kbase_mmu_update);
 | |
| 
 | |
| void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
 | |
| {
 | |
| 	lockdep_assert_held(&kbdev->hwaccess_lock);
 | |
| #if !MALI_USE_CSF
 | |
| 	lockdep_assert_held(&kbdev->mmu_hw_mutex);
 | |
| #endif
 | |
| 
 | |
| 	kbdev->mmu_mode->disable_as(kbdev, as_nr);
 | |
| }
 | |
| 
 | |
| void kbase_mmu_disable(struct kbase_context *kctx)
 | |
| {
 | |
| 	/* Calls to this function are inherently asynchronous, with respect to
 | |
| 	 * MMU operations.
 | |
| 	 */
 | |
| 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
 | |
| 	struct kbase_device *kbdev = kctx->kbdev;
 | |
| 	struct kbase_mmu_hw_op_param op_param = { 0 };
 | |
| 	int lock_err, flush_err;
 | |
| 
 | |
| 	/* ASSERT that the context has a valid as_nr, which is only the case
 | |
| 	 * when it's scheduled in.
 | |
| 	 *
 | |
| 	 * as_nr won't change because the caller has the hwaccess_lock
 | |
| 	 */
 | |
| 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
 | |
| 
 | |
| 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 | |
| 
 | |
| 	op_param.vpfn = 0;
 | |
| 	op_param.nr = ~0U;
 | |
| 	op_param.op = KBASE_MMU_OP_FLUSH_MEM;
 | |
| 	op_param.kctx_id = kctx->id;
 | |
| 	op_param.mmu_sync_info = mmu_sync_info;
 | |
| 
 | |
| #if MALI_USE_CSF
 | |
| 	/* 0xF value used to prevent skipping of any levels when flushing */
 | |
| 	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
 | |
| 		op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
 | |
| 
 | |
| 	/* lock MMU to prevent existing jobs on GPU from executing while the AS is
 | |
| 	 * not yet disabled
 | |
| 	 */
 | |
| 	lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
 | |
| 	if (lock_err)
 | |
| 		dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
 | |
| 			kctx->id);
 | |
| 
 | |
| 	/* Issue the flush command only when L2 cache is in stable power on state.
 | |
| 	 * Any other state for L2 cache implies that shader cores are powered off,
 | |
| 	 * which in turn implies there is no execution happening on the GPU.
 | |
| 	 */
 | |
| 	if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
 | |
| 		flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
 | |
| 								GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
 | |
| 		if (flush_err)
 | |
| 			dev_err(kbdev->dev,
 | |
| 				"Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
 | |
| 				kctx->as_nr, kctx->tgid, kctx->id);
 | |
| 	}
 | |
| 	kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
 | |
| 
 | |
| 	if (!lock_err) {
 | |
| 		/* unlock the MMU to allow it to resume */
 | |
| 		lock_err =
 | |
| 			kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
 | |
| 		if (lock_err)
 | |
| 			dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
 | |
| 				kctx->tgid, kctx->id);
 | |
| 	}
 | |
| #else
 | |
| 	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
 | |
| 
 | |
| 	CSTD_UNUSED(lock_err);
 | |
| 
 | |
| 	/*
 | |
| 	 * The address space is being disabled, drain all knowledge of it out
 | |
| 	 * from the caches as pages and page tables might be freed after this.
 | |
| 	 *
 | |
| 	 * The job scheduler code will already be holding the locks and context
 | |
| 	 * so just do the flush.
 | |
| 	 */
 | |
| 	flush_err = kbase_mmu_hw_do_flush(kbdev, &kbdev->as[kctx->as_nr], &op_param);
 | |
| 	if (flush_err) {
 | |
| 		dev_err(kbdev->dev,
 | |
| 			"Flush for GPU page table update did not complete to disable AS %d for ctx %d_%d",
 | |
| 			kctx->as_nr, kctx->tgid, kctx->id);
 | |
| 		/* GPU reset would have been triggered by the flush function */
 | |
| 	}
 | |
| 
 | |
| 	kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
 | |
| 
 | |
| 	/*
 | |
| 	 * JM GPUs has some L1 read only caches that need to be invalidated
 | |
| 	 * with START_FLUSH configuration. Purge the MMU disabled kctx from
 | |
| 	 * the slot_rb tracking field so such invalidation is performed when
 | |
| 	 * a new katom is executed on the affected slots.
 | |
| 	 */
 | |
| 	kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
 | |
| #endif
 | |
| }
 | |
| KBASE_EXPORT_TEST_API(kbase_mmu_disable);
 | |
| 
 | |
| static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 | |
| 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
 | |
| 						  u64 vpfn, int level,
 | |
| 						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
 | |
| 						  int as_nr)
 | |
| {
 | |
| 	phys_addr_t current_pgd = pgds[level];
 | |
| 	struct page *p = phys_to_page(current_pgd);
 | |
| 	u64 *current_page = kmap_pgd(p, current_pgd);
 | |
| 	unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page);
 | |
| 	unsigned int index = (vpfn >> ((3 - level) * 9)) & 0x1FFU;
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	/* We need to track every level that needs updating */
 | |
| 	if (dirty_pgds)
 | |
| 		*dirty_pgds |= 1ULL << level;
 | |
| 
 | |
| 	kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1);
 | |
| 	if (current_valid_entries == 1 && level != MIDGARD_MMU_LEVEL(0)) {
 | |
| 		kbdev->mmu_mode->set_num_valid_entries(current_page, 0);
 | |
| 
 | |
| 		kunmap_pgd(p, current_page);
 | |
| 
 | |
| 		kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1, flush_op,
 | |
| 						      dirty_pgds, as_nr);
 | |
| 
 | |
| 		/* Check if fine grained GPU cache maintenance is being used */
 | |
| 		if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
 | |
| 			/* Ensure the invalidated PTE is visible in memory right away */
 | |
| 			kbase_mmu_sync_pgd_cpu(kbdev,
 | |
| 					       pgd_dma_addr(p, current_pgd) + (index * sizeof(u64)),
 | |
| 					       sizeof(u64));
 | |
| 			/* Invalidate the GPU cache for the whole PGD page and not just for
 | |
| 			 * the cacheline containing the invalidated PTE, as the PGD page is
 | |
| 			 * going to be freed. There is an extremely remote possibility that
 | |
| 			 * other cachelines (containing all invalid PTEs) of PGD page are
 | |
| 			 * also present in the GPU cache.
 | |
| 			 */
 | |
| 			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd, 512 * sizeof(u64),
 | |
| 					       KBASE_MMU_OP_FLUSH_PT);
 | |
| 		}
 | |
| 
 | |
| 		kbase_mmu_add_to_free_pgds_list(mmut, current_pgd);
 | |
| 	} else {
 | |
| 		current_valid_entries--;
 | |
| 
 | |
| 		kbdev->mmu_mode->set_num_valid_entries(current_page, current_valid_entries);
 | |
| 
 | |
| 		kunmap_pgd(p, current_page);
 | |
| 
 | |
| 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
 | |
| 				   pgd_dma_addr(p, current_pgd) + (index * sizeof(u64)),
 | |
| 				   sizeof(u64), flush_op);
 | |
| 
 | |
| 		/* When fine grained GPU cache maintenance is used then invalidate the MMU caches
 | |
| 		 * now as the top most level PGD entry, affected by the teardown operation, has
 | |
| 		 * been invalidated (both in memory as well as in GPU L2 cache). This is to avoid
 | |
| 		 * the possibility of invalid ATEs being reloaded into the GPU L2 cache whilst the
 | |
| 		 * teardown is happening.
 | |
| 		 */
 | |
| 		if (flush_op == KBASE_MMU_OP_FLUSH_PT)
 | |
| 			mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, 1, level, as_nr);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
 | |
|  *
 | |
|  * @kbdev:         Pointer to kbase device.
 | |
|  * @kctx:          Pointer to kbase context.
 | |
|  * @as_nr:         Address space number, for GPU cache maintenance operations
 | |
|  *                 that happen outside a specific kbase context.
 | |
|  * @phys:          Array of physical pages to flush.
 | |
|  * @phys_page_nr:  Number of physical pages to flush.
 | |
|  * @op_param:      Non-NULL pointer to struct containing information about the flush
 | |
|  *                 operation to perform.
 | |
|  *
 | |
|  * This function will do one of three things:
 | |
|  * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
 | |
|  *    individual pages that were unmapped if feature is supported on GPU.
 | |
|  * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
 | |
|  *    supported on GPU or,
 | |
|  * 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
 | |
|  *
 | |
|  * When performing a partial GPU cache flush, the number of physical
 | |
|  * pages does not have to be identical to the number of virtual pages on the MMU,
 | |
|  * to support a single physical address flush for an aliased page.
 | |
|  */
 | |
| static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
 | |
| 						struct kbase_context *kctx, int as_nr,
 | |
| 						struct tagged_addr *phys, size_t phys_page_nr,
 | |
| 						struct kbase_mmu_hw_op_param *op_param)
 | |
| {
 | |
| 	if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
 | |
| 		/* Full cache flush through the MMU_COMMAND */
 | |
| 		mmu_flush_invalidate(kbdev, kctx, as_nr, op_param);
 | |
| 	} else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
 | |
| 		/* Full cache flush through the GPU_CONTROL */
 | |
| 		mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param);
 | |
| 	}
 | |
| #if MALI_USE_CSF
 | |
| 	else {
 | |
| 		/* Partial GPU cache flush of the pages that were unmapped */
 | |
| 		unsigned long irq_flags;
 | |
| 		unsigned int i;
 | |
| 		bool flush_done = false;
 | |
| 
 | |
| 		for (i = 0; !flush_done && i < phys_page_nr; i++) {
 | |
| 			spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 | |
| 			if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
 | |
| 				mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
 | |
| 						   KBASE_MMU_OP_FLUSH_MEM);
 | |
| 			else
 | |
| 				flush_done = true;
 | |
| 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 | |
| 		}
 | |
| 	}
 | |
| #else
 | |
| 	CSTD_UNUSED(phys);
 | |
| 	CSTD_UNUSED(phys_page_nr);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 					u64 vpfn, size_t nr, u64 *dirty_pgds,
 | |
| 					struct list_head *free_pgds_list,
 | |
| 					enum kbase_mmu_op_type flush_op, int as_nr)
 | |
| {
 | |
| 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
 | |
| 
 | |
| 	CSTD_UNUSED(free_pgds_list);
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 	kbase_mmu_reset_free_pgds_list(mmut);
 | |
| 	/* Convert to GPU_PAGE_SIZE units. */
 | |
| 	vpfn *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 	nr *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 
 | |
| 	while (nr) {
 | |
| 		unsigned int index = vpfn & 0x1FF;
 | |
| 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 | |
| 		unsigned int pcount;
 | |
| 		int level;
 | |
| 		u64 *page;
 | |
| 		phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
 | |
| 		register unsigned int num_of_valid_entries;
 | |
| 		phys_addr_t pgd = mmut->pgd;
 | |
| 		struct page *p = phys_to_page(pgd);
 | |
| 
 | |
| 		count = MIN(nr, count);
 | |
| 
 | |
| 		/* need to check if this is a 2MB page or a small page */
 | |
| 		for (level = MIDGARD_MMU_TOPLEVEL; level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
 | |
| 			phys_addr_t next_pgd;
 | |
| 
 | |
| 			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
 | |
| 			page = kmap_pgd(p, pgd);
 | |
| 			if (mmu_mode->ate_is_valid(page[index], level))
 | |
| 				break; /* keep the mapping */
 | |
| 			else if (!mmu_mode->pte_is_valid(page[index], level)) {
 | |
| 				dev_warn(kbdev->dev, "Invalid PTE found @ level %d for VA %llx",
 | |
| 					 level, vpfn << PAGE_SHIFT);
 | |
| 				/* nothing here, advance to the next PTE of the current level */
 | |
| 				count = (1 << ((3 - level) * 9));
 | |
| 				count -= (vpfn & (count - 1));
 | |
| 				count = MIN(nr, count);
 | |
| 				goto next;
 | |
| 			}
 | |
| 			next_pgd = mmu_mode->pte_to_phy_addr(
 | |
| 				kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 | |
| 					kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index]));
 | |
| 			kunmap_pgd(p, page);
 | |
| 			pgds[level] = pgd;
 | |
| 			pgd = next_pgd;
 | |
| 			p = phys_to_page(pgd);
 | |
| 		}
 | |
| 
 | |
| 		switch (level) {
 | |
| 		case MIDGARD_MMU_LEVEL(0):
 | |
| 		case MIDGARD_MMU_LEVEL(1):
 | |
| 			dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
 | |
| 				 level);
 | |
| 			kunmap_pgd(p, page);
 | |
| 			goto out;
 | |
| 		case MIDGARD_MMU_LEVEL(2):
 | |
| 			/* can only teardown if count >= 512 */
 | |
| 			if (count >= 512) {
 | |
| 				pcount = 1;
 | |
| 			} else {
 | |
| 				dev_warn(
 | |
| 					kbdev->dev,
 | |
| 					"%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down",
 | |
| 					__func__, count);
 | |
| 				pcount = 0;
 | |
| 			}
 | |
| 			break;
 | |
| 		case MIDGARD_MMU_BOTTOMLEVEL:
 | |
| 			/* page count is the same as the logical count */
 | |
| 			pcount = count;
 | |
| 			break;
 | |
| 		default:
 | |
| 			dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__);
 | |
| 			vpfn += count;
 | |
| 			nr -= count;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		if (pcount > 0)
 | |
| 			*dirty_pgds |= 1ULL << level;
 | |
| 
 | |
| 		num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
 | |
| 		if (WARN_ON_ONCE(num_of_valid_entries < pcount))
 | |
| 			num_of_valid_entries = 0;
 | |
| 		else
 | |
| 			num_of_valid_entries -= pcount;
 | |
| 
 | |
| 		/* Invalidate the entries we added */
 | |
| 		mmu_mode->entries_invalidate(&page[index], pcount);
 | |
| 
 | |
| 		if (!num_of_valid_entries) {
 | |
| 			mmu_mode->set_num_valid_entries(page, 0);
 | |
| 
 | |
| 			kunmap_pgd(p, page);
 | |
| 
 | |
| 			/* To avoid the invalid ATEs from the PGD page (that is going to be freed)
 | |
| 			 * from getting reloaded into the GPU L2 cache whilst the teardown is
 | |
| 			 * happening, the fine grained GPU L2 cache maintenance is done in the top
 | |
| 			 * to bottom level PGD order. MMU cache invalidation is done after
 | |
| 			 * invalidating the entry of top most level PGD, affected by the teardown.
 | |
| 			 */
 | |
| 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level - 1,
 | |
| 							      flush_op, dirty_pgds, as_nr);
 | |
| 
 | |
| 			/* Check if fine grained GPU cache maintenance is being used */
 | |
| 			if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
 | |
| 				/* Ensure the invalidated ATEs are visible in memory right away */
 | |
| 				kbase_mmu_sync_pgd_cpu(kbdev,
 | |
| 						       pgd_dma_addr(p, pgd) + (index * sizeof(u64)),
 | |
| 						       pcount * sizeof(u64));
 | |
| 				/* Invalidate the GPU cache for the whole PGD page and not just for
 | |
| 				 * the cachelines containing the invalidated ATEs, as the PGD page
 | |
| 				 * is going to be freed. There is an extremely remote possibility
 | |
| 				 * that other cachelines (containing all invalid ATEs) of PGD page
 | |
| 				 * are also present in the GPU cache.
 | |
| 				 */
 | |
| 				kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64),
 | |
| 						       KBASE_MMU_OP_FLUSH_PT);
 | |
| 			}
 | |
| 
 | |
| 			kbase_mmu_add_to_free_pgds_list(mmut, pgd);
 | |
| 
 | |
| 			vpfn += count;
 | |
| 			nr -= count;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
 | |
| 
 | |
| 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
 | |
| 				   pgd_dma_addr(p, pgd) + (index * sizeof(u64)),
 | |
| 				   pcount * sizeof(u64), flush_op);
 | |
| 
 | |
| 		/* When fine grained GPU cache maintenance is used then invalidation of MMU cache
 | |
| 		 * is done inline for every bottom level PGD touched in the teardown.
 | |
| 		 */
 | |
| 		if (flush_op == KBASE_MMU_OP_FLUSH_PT)
 | |
| 			mmu_invalidate_on_teardown(kbdev, mmut->kctx, vpfn, pcount, level, as_nr);
 | |
| next:
 | |
| 		kunmap_pgd(p, page);
 | |
| 		vpfn += count;
 | |
| 		nr -= count;
 | |
| 	}
 | |
| out:
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
 | |
|  *
 | |
|  * @kbdev:    Pointer to kbase device.
 | |
|  * @mmut:     Pointer to GPU MMU page table.
 | |
|  * @vpfn:     Start page frame number (in PAGE_SIZE units) of the GPU virtual pages to unmap.
 | |
|  * @phys:     Array of physical pages currently mapped to the virtual
 | |
|  *            pages to unmap, or NULL. This is used for GPU cache maintenance
 | |
|  *            and page migration support.
 | |
|  * @nr_phys_pages: Number of physical pages (in PAGE_SIZE units) to flush.
 | |
|  * @nr_virt_pages: Number of virtual pages (in PAGE_SIZE units) whose PTEs should be destroyed.
 | |
|  * @as_nr:    Address space number, for GPU cache maintenance operations
 | |
|  *            that happen outside a specific kbase context.
 | |
|  * @ignore_page_migration: Whether page migration metadata should be ignored.
 | |
|  *
 | |
|  * We actually discard the ATE and free the page table pages if no valid entries
 | |
|  * exist in the PGD.
 | |
|  *
 | |
|  * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
 | |
|  * currently scheduled into the runpool, and so potentially uses a lot of locks.
 | |
|  * These locks must be taken in the correct order with respect to others
 | |
|  * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
 | |
|  * information.
 | |
|  *
 | |
|  * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
 | |
|  * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
 | |
|  * GPU cache maintenance will be done as usual; that is, invalidating the whole GPU caches
 | |
|  * instead of specific physical address ranges.
 | |
|  *
 | |
|  * Return: 0 on success, otherwise an error code.
 | |
|  */
 | |
| static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 | |
| 			      struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
 | |
| 			      int as_nr, bool ignore_page_migration)
 | |
| {
 | |
| 	u64 start_vpfn = vpfn;
 | |
| 	enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
 | |
| 	struct kbase_mmu_hw_op_param op_param;
 | |
| 	int err = -EFAULT;
 | |
| 	u64 dirty_pgds = 0;
 | |
| 	LIST_HEAD(free_pgds_list);
 | |
| 
 | |
| 	/* Calls to this function are inherently asynchronous, with respect to
 | |
| 	 * MMU operations.
 | |
| 	 */
 | |
| 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
 | |
| 
 | |
| 	/* This function performs two operations: MMU maintenance and flushing
 | |
| 	 * the caches. To ensure internal consistency between the caches and the
 | |
| 	 * MMU, it does not make sense to be able to flush only the physical pages
 | |
| 	 * from the cache and keep the PTE, nor does it make sense to use this
 | |
| 	 * function to remove a PTE and keep the physical pages in the cache.
 | |
| 	 *
 | |
| 	 * However, we have legitimate cases where we can try to tear down a mapping
 | |
| 	 * with zero virtual and zero physical pages, so we must have the following
 | |
| 	 * behaviour:
 | |
| 	 *  - if both physical and virtual page counts are zero, return early
 | |
| 	 *  - if either physical and virtual page counts are zero, return early
 | |
| 	 *  - if there are fewer physical pages than virtual pages, return -EINVAL
 | |
| 	 */
 | |
| 	if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0))
 | |
| 		return 0;
 | |
| 
 | |
| 	if (unlikely(nr_virt_pages < nr_phys_pages))
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	/* MMU cache flush strategy depends on the number of pages to unmap. In both cases
 | |
| 	 * the operation is invalidate but the granularity of cache maintenance may change
 | |
| 	 * according to the situation.
 | |
| 	 *
 | |
| 	 * If GPU control command operations are present and the number of pages is "small",
 | |
| 	 * then the optimal strategy is flushing on the physical address range of the pages
 | |
| 	 * which are affected by the operation. That implies both the PGDs which are modified
 | |
| 	 * or removed from the page table and the physical pages which are freed from memory.
 | |
| 	 *
 | |
| 	 * Otherwise, there's no alternative to invalidating the whole GPU cache.
 | |
| 	 */
 | |
| 	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys &&
 | |
| 	    nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
 | |
| 		flush_op = KBASE_MMU_OP_FLUSH_PT;
 | |
| 
 | |
| 	mutex_lock(&mmut->mmu_lock);
 | |
| 
 | |
| 	err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
 | |
| 					   &free_pgds_list, flush_op, as_nr);
 | |
| 
 | |
| 	/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
 | |
| 	op_param = (struct kbase_mmu_hw_op_param){
 | |
| 		.vpfn = start_vpfn,
 | |
| 		.nr = nr_virt_pages,
 | |
| 		.mmu_sync_info = mmu_sync_info,
 | |
| 		.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
 | |
| 		.op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
 | |
| 								  KBASE_MMU_OP_FLUSH_MEM,
 | |
| 		.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
 | |
| 	};
 | |
| 	mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages,
 | |
| 					    &op_param);
 | |
| 
 | |
| 	/* If page migration is enabled: the status of all physical pages involved
 | |
| 	 * shall be updated, unless they are not movable. Their status shall be
 | |
| 	 * updated before releasing the lock to protect against concurrent
 | |
| 	 * requests to migrate the pages, if they have been isolated.
 | |
| 	 */
 | |
| 	if (kbase_is_page_migration_enabled() && phys && !ignore_page_migration)
 | |
| 		kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
 | |
| 
 | |
| 	kbase_mmu_free_pgds_list(kbdev, mmut);
 | |
| 
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 | |
| 			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
 | |
| 			     int as_nr)
 | |
| {
 | |
| 	return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
 | |
| 				  false);
 | |
| }
 | |
| KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
 | |
| 
 | |
| int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 				      u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
 | |
| 				      size_t nr_virt_pages, int as_nr)
 | |
| {
 | |
| 	return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
 | |
| 				  true);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
 | |
|  *                                     page table entries
 | |
|  *
 | |
|  * @kbdev: Pointer to kbase device.
 | |
|  * @mmut:  The involved MMU table
 | |
|  * @vpfn:  Virtual PFN (Page Frame Number), in PAGE_SIZE units, of the first page to update
 | |
|  * @phys:  Pointer to the array of tagged physical addresses of the physical
 | |
|  *         pages that are pointed to by the page table entries (that need to
 | |
|  *         be updated). The pointer should be within the reg->gpu_alloc->pages
 | |
|  *         array.
 | |
|  * @nr:    Number of pages (in PAGE_SIZE units) to update
 | |
|  * @flags: Flags
 | |
|  * @group_id: The physical memory group in which the page was allocated.
 | |
|  *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
 | |
|  * @dirty_pgds: Flags to track every level where a PGD has been updated.
 | |
|  *
 | |
|  * This will update page table entries that already exist on the GPU based on
 | |
|  * new flags and replace any existing phy pages that are passed (the PGD pages
 | |
|  * remain unchanged). It is used as a response to the changes of phys as well
 | |
|  * as the the memory attributes.
 | |
|  *
 | |
|  * The caller is responsible for validating the memory attributes.
 | |
|  *
 | |
|  * Return: 0 if the attributes data in page table entries were updated
 | |
|  *         successfully, otherwise an error code.
 | |
|  */
 | |
| static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 					   u64 vpfn, struct tagged_addr *phys, size_t nr,
 | |
| 					   unsigned long flags, int const group_id, u64 *dirty_pgds)
 | |
| {
 | |
| 	phys_addr_t pgd;
 | |
| 	u64 *pgd_page;
 | |
| 	int err;
 | |
| 
 | |
| 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 | |
| 
 | |
| 	/* Early out if there is nothing to do */
 | |
| 	if (nr == 0)
 | |
| 		return 0;
 | |
| 
 | |
| 	/* Convert to GPU_PAGE_SIZE units. */
 | |
| 	vpfn *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 	nr *= GPU_PAGES_PER_CPU_PAGE;
 | |
| 	mutex_lock(&mmut->mmu_lock);
 | |
| 
 | |
| 	while (nr) {
 | |
| 		unsigned int i;
 | |
| 		unsigned int index = vpfn & 0x1FF;
 | |
| 		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
 | |
| 		struct page *p;
 | |
| 		register unsigned int num_of_valid_entries;
 | |
| 		int cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 | |
| 
 | |
| 		if (count > nr)
 | |
| 			count = nr;
 | |
| 
 | |
| 		if (is_huge(*phys) &&
 | |
| 		    (index == (index_in_large_page(*phys) * GPU_PAGES_PER_CPU_PAGE)))
 | |
| 			cur_level = MIDGARD_MMU_LEVEL(2);
 | |
| 
 | |
| 		err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd);
 | |
| 		if (WARN_ON(err))
 | |
| 			goto fail_unlock;
 | |
| 
 | |
| 		p = pfn_to_page(PFN_DOWN(pgd));
 | |
| 		pgd_page = kmap_pgd(p, pgd);
 | |
| 		if (!pgd_page) {
 | |
| 			dev_warn(kbdev->dev, "kmap failure on update_pages");
 | |
| 			err = -ENOMEM;
 | |
| 			goto fail_unlock;
 | |
| 		}
 | |
| 
 | |
| 		num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
 | |
| 
 | |
| 		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
 | |
| 			unsigned int level_index = (vpfn >> 9) & 0x1FFU;
 | |
| 			struct tagged_addr *target_phys = phys - index_in_large_page(*phys);
 | |
| 
 | |
| #ifdef CONFIG_MALI_BIFROST_DEBUG
 | |
| 			WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(pgd_page[level_index],
 | |
| 								    MIDGARD_MMU_LEVEL(2)));
 | |
| #endif
 | |
| 			pgd_page[level_index] = kbase_mmu_create_ate(
 | |
| 				kbdev, *target_phys, flags, MIDGARD_MMU_LEVEL(2), group_id);
 | |
| 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
 | |
| 					   pgd_dma_addr(p, pgd) + (level_index * sizeof(u64)),
 | |
| 					   sizeof(u64), KBASE_MMU_OP_NONE);
 | |
| 		} else {
 | |
| 			for (i = 0; i < count; i += GPU_PAGES_PER_CPU_PAGE) {
 | |
| 				phys_addr_t base_phys_address =
 | |
| 					as_phys_addr_t(phys[i / GPU_PAGES_PER_CPU_PAGE]);
 | |
| 				unsigned int j;
 | |
| 
 | |
| 				for (j = 0; j < GPU_PAGES_PER_CPU_PAGE; j++) {
 | |
| 					phys_addr_t page_address =
 | |
| 						base_phys_address + (j * GPU_PAGE_SIZE);
 | |
| #ifdef CONFIG_MALI_BIFROST_DEBUG
 | |
| 					WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
 | |
| 						pgd_page[index + i + j], MIDGARD_MMU_BOTTOMLEVEL));
 | |
| #endif
 | |
| 					pgd_page[index + i + j] = kbase_mmu_create_ate(
 | |
| 						kbdev, as_tagged(page_address), flags,
 | |
| 						MIDGARD_MMU_BOTTOMLEVEL, group_id);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			/* MMU cache flush strategy is NONE because GPU cache maintenance
 | |
| 			 * will be done by the caller.
 | |
| 			 */
 | |
| 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
 | |
| 					   pgd_dma_addr(p, pgd) + (index * sizeof(u64)),
 | |
| 					   count * sizeof(u64), KBASE_MMU_OP_NONE);
 | |
| 		}
 | |
| 
 | |
| 		kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
 | |
| 
 | |
| 		if (dirty_pgds && count > 0)
 | |
| 			*dirty_pgds |= 1ULL << cur_level;
 | |
| 
 | |
| 		phys += (count / GPU_PAGES_PER_CPU_PAGE);
 | |
| 		vpfn += count;
 | |
| 		nr -= count;
 | |
| 
 | |
| 		kunmap_pgd(p, pgd_page);
 | |
| 	}
 | |
| 
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 	return 0;
 | |
| 
 | |
| fail_unlock:
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx,
 | |
| 					 u64 vpfn, struct tagged_addr *phys, size_t nr,
 | |
| 					 unsigned long flags, int const group_id)
 | |
| {
 | |
| 	int err;
 | |
| 	struct kbase_mmu_hw_op_param op_param;
 | |
| 	u64 dirty_pgds = 0;
 | |
| 	struct kbase_mmu_table *mmut;
 | |
| 	/* Calls to this function are inherently asynchronous, with respect to
 | |
| 	 * MMU operations.
 | |
| 	 */
 | |
| 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
 | |
| 	int as_nr;
 | |
| 
 | |
| #if !MALI_USE_CSF
 | |
| 	if (unlikely(kctx == NULL))
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	as_nr = kctx->as_nr;
 | |
| 	mmut = &kctx->mmu;
 | |
| #else
 | |
| 	if (kctx) {
 | |
| 		mmut = &kctx->mmu;
 | |
| 		as_nr = kctx->as_nr;
 | |
| 	} else {
 | |
| 		mmut = &kbdev->csf.mcu_mmu;
 | |
| 		as_nr = MCU_AS_NR;
 | |
| 	}
 | |
| #endif
 | |
| 
 | |
| 	err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
 | |
| 					      &dirty_pgds);
 | |
| 
 | |
| 	op_param = (const struct kbase_mmu_hw_op_param){
 | |
| 		.vpfn = vpfn,
 | |
| 		.nr = nr,
 | |
| 		.op = KBASE_MMU_OP_FLUSH_MEM,
 | |
| 		.kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
 | |
| 		.mmu_sync_info = mmu_sync_info,
 | |
| 		.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
 | |
| 	};
 | |
| 
 | |
| 	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
 | |
| 		mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param);
 | |
| 	else
 | |
| 		mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param);
 | |
| 
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys,
 | |
| 			   size_t nr, unsigned long flags, int const group_id)
 | |
| {
 | |
| 	if (unlikely(kctx == NULL))
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id);
 | |
| }
 | |
| 
 | |
| #if MALI_USE_CSF
 | |
| int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
 | |
| 				   size_t nr, unsigned long flags, int const group_id)
 | |
| {
 | |
| 	return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id);
 | |
| }
 | |
| #endif /* MALI_USE_CSF */
 | |
| 
 | |
| static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
 | |
| {
 | |
| 	lockdep_assert_held(&kbdev->hwaccess_lock);
 | |
| 
 | |
| 	WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress);
 | |
| 	kbdev->mmu_page_migrate_in_progress = true;
 | |
| }
 | |
| 
 | |
| static void mmu_page_migration_transaction_end(struct kbase_device *kbdev)
 | |
| {
 | |
| 	lockdep_assert_held(&kbdev->hwaccess_lock);
 | |
| 	WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress);
 | |
| 	kbdev->mmu_page_migrate_in_progress = false;
 | |
| 	/* Invoke the PM state machine, as the MMU page migration session
 | |
| 	 * may have deferred a transition in L2 state machine.
 | |
| 	 */
 | |
| 	kbase_pm_update_state(kbdev);
 | |
| }
 | |
| 
 | |
| int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
 | |
| 			   dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level)
 | |
| {
 | |
| 	struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys));
 | |
| 	struct kbase_mmu_hw_op_param op_param;
 | |
| 	struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
 | |
| 						     page_md->data.mapped.mmut :
 | |
| 						     page_md->data.pt_mapped.mmut;
 | |
| 	struct kbase_device *kbdev;
 | |
| 	phys_addr_t pgd;
 | |
| 	u64 *old_page, *new_page, *pgd_page, *target, vpfn;
 | |
| 	unsigned int index;
 | |
| 	int check_state, ret = 0;
 | |
| 	unsigned long hwaccess_flags = 0;
 | |
| 	unsigned int num_of_valid_entries;
 | |
| 	u8 vmap_count = 0;
 | |
| 	u8 pgd_entries_to_sync = (level == MIDGARD_MMU_BOTTOMLEVEL) ? GPU_PAGES_PER_CPU_PAGE : 1;
 | |
| 
 | |
| 	/* If page migration support is not compiled in, return with fault */
 | |
| 	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
 | |
| 		return -EINVAL;
 | |
| 	/* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
 | |
| 	 * here we skip the no kctx case, which is only used with MCU's mmut.
 | |
| 	 */
 | |
| 	if (!mmut->kctx)
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	if (level > MIDGARD_MMU_BOTTOMLEVEL)
 | |
| 		return -EINVAL;
 | |
| 	else if (level == MIDGARD_MMU_BOTTOMLEVEL)
 | |
| 		vpfn = page_md->data.mapped.vpfn;
 | |
| 	else
 | |
| 		vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level);
 | |
| 
 | |
| 	kbdev = mmut->kctx->kbdev;
 | |
| 	index = (vpfn >> ((3 - level) * 9)) & 0x1FFU;
 | |
| 
 | |
| 	/* Create all mappings before copying content.
 | |
| 	 * This is done as early as possible because it is the only operation that may
 | |
| 	 * fail. It is possible to do this before taking any locks because the
 | |
| 	 * pages to migrate are not going to change and even the parent PGD is not
 | |
| 	 * going to be affected by any other concurrent operation, since the page
 | |
| 	 * has been isolated before migration and therefore it cannot disappear in
 | |
| 	 * the middle of this function.
 | |
| 	 */
 | |
| 	old_page = kbase_kmap(as_page(old_phys));
 | |
| 	if (!old_page) {
 | |
| 		dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
 | |
| 		ret = -EINVAL;
 | |
| 		goto old_page_map_error;
 | |
| 	}
 | |
| 
 | |
| 	new_page = kbase_kmap(as_page(new_phys));
 | |
| 	if (!new_page) {
 | |
| 		dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
 | |
| 		ret = -EINVAL;
 | |
| 		goto new_page_map_error;
 | |
| 	}
 | |
| 
 | |
| 	/* GPU cache maintenance affects both memory content and page table,
 | |
| 	 * but at two different stages. A single virtual memory page is affected
 | |
| 	 * by the migration.
 | |
| 	 *
 | |
| 	 * Notice that the MMU maintenance is done in the following steps:
 | |
| 	 *
 | |
| 	 * 1) The MMU region is locked without performing any other operation.
 | |
| 	 *    This lock must cover the entire migration process, in order to
 | |
| 	 *    prevent any GPU access to the virtual page whose physical page
 | |
| 	 *    is being migrated.
 | |
| 	 * 2) Immediately after locking: the MMU region content is flushed via
 | |
| 	 *    GPU control while the lock is taken and without unlocking.
 | |
| 	 *    The region must stay locked for the duration of the whole page
 | |
| 	 *    migration procedure.
 | |
| 	 *    This is necessary to make sure that pending writes to the old page
 | |
| 	 *    are finalized before copying content to the new page.
 | |
| 	 * 3) Before unlocking: changes to the page table are flushed.
 | |
| 	 *    Finer-grained GPU control operations are used if possible, otherwise
 | |
| 	 *    the whole GPU cache shall be flushed again.
 | |
| 	 *    This is necessary to make sure that the GPU accesses the new page
 | |
| 	 *    after migration.
 | |
| 	 * 4) The MMU region is unlocked.
 | |
| 	 */
 | |
| #define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1))
 | |
| 	op_param.mmu_sync_info = CALLER_MMU_ASYNC;
 | |
| 	op_param.kctx_id = mmut->kctx->id;
 | |
| 	op_param.vpfn = (vpfn / GPU_PAGES_PER_CPU_PAGE) & PGD_VPFN_MASK(level);
 | |
| 	op_param.nr = 1U << ((3 - level) * 9);
 | |
| 	op_param.op = KBASE_MMU_OP_FLUSH_PT;
 | |
| 	/* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */
 | |
| 	op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
 | |
| 						   pgd_level_to_skip_flush(1ULL << level) :
 | |
| 						   pgd_level_to_skip_flush(3ULL << level);
 | |
| 
 | |
| 	mutex_lock(&mmut->mmu_lock);
 | |
| 
 | |
| 	/* The state was evaluated before entering this function, but it could
 | |
| 	 * have changed before the mmu_lock was taken. However, the state
 | |
| 	 * transitions which are possible at this point are only two, and in both
 | |
| 	 * cases it is a stable state progressing to a "free in progress" state.
 | |
| 	 *
 | |
| 	 * After taking the mmu_lock the state can no longer change: read it again
 | |
| 	 * and make sure that it hasn't changed before continuing.
 | |
| 	 */
 | |
| 	spin_lock(&page_md->migrate_lock);
 | |
| 	check_state = PAGE_STATUS_GET(page_md->status);
 | |
| 	if (level == MIDGARD_MMU_BOTTOMLEVEL)
 | |
| 		vmap_count = page_md->vmap_count;
 | |
| 	spin_unlock(&page_md->migrate_lock);
 | |
| 
 | |
| 	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
 | |
| 		if (check_state != ALLOCATED_MAPPED) {
 | |
| 			dev_dbg(kbdev->dev,
 | |
| 				"%s: state changed to %d (was %d), abort page migration", __func__,
 | |
| 				check_state, ALLOCATED_MAPPED);
 | |
| 			ret = -EAGAIN;
 | |
| 			goto page_state_change_out;
 | |
| 		} else if (vmap_count > 0) {
 | |
| 			dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration",
 | |
| 				__func__);
 | |
| 			ret = -EAGAIN;
 | |
| 			goto page_state_change_out;
 | |
| 		}
 | |
| 	} else {
 | |
| 		if (check_state != PT_MAPPED) {
 | |
| 			dev_dbg(kbdev->dev,
 | |
| 				"%s: state changed to %d (was %d), abort PGD page migration",
 | |
| 				__func__, check_state, PT_MAPPED);
 | |
| 			WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS);
 | |
| 			ret = -EAGAIN;
 | |
| 			goto page_state_change_out;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd);
 | |
| 	if (ret) {
 | |
| 		dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
 | |
| 		goto get_pgd_at_level_error;
 | |
| 	}
 | |
| 
 | |
| 	pgd_page = kmap_pgd(phys_to_page(pgd), pgd);
 | |
| 	if (!pgd_page) {
 | |
| 		dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
 | |
| 		ret = -EINVAL;
 | |
| 		goto pgd_page_map_error;
 | |
| 	}
 | |
| 
 | |
| 	mutex_lock(&kbdev->mmu_hw_mutex);
 | |
| 
 | |
| 	/* Lock MMU region and flush GPU cache by using GPU control,
 | |
| 	 * in order to keep MMU region locked.
 | |
| 	 */
 | |
| 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 	if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) {
 | |
| 		/* Defer the migration as L2 is in a transitional phase */
 | |
| 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 		mutex_unlock(&kbdev->mmu_hw_mutex);
 | |
| 		dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
 | |
| 		ret = -EAGAIN;
 | |
| 		goto l2_state_defer_out;
 | |
| 	}
 | |
| 	/* Prevent transitional phases in L2 by starting the transaction */
 | |
| 	mmu_page_migration_transaction_begin(kbdev);
 | |
| 	if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
 | |
| 		int as_nr = mmut->kctx->as_nr;
 | |
| 		struct kbase_as *as = &kbdev->as[as_nr];
 | |
| 
 | |
| 		ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param);
 | |
| 		if (!ret) {
 | |
| #if MALI_USE_CSF
 | |
| 			if (mmu_flush_cache_on_gpu_ctrl(kbdev))
 | |
| 				ret = kbase_gpu_cache_flush_pa_range_and_busy_wait(
 | |
| 					kbdev, as_phys_addr_t(old_phys), PAGE_SIZE,
 | |
| 					GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC);
 | |
| 			else
 | |
| #endif
 | |
| 				ret = kbase_gpu_cache_flush_and_busy_wait(
 | |
| 					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
 | |
| 		}
 | |
| 		if (ret)
 | |
| 			mmu_page_migration_transaction_end(kbdev);
 | |
| 	}
 | |
| 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 
 | |
| 	if (ret < 0) {
 | |
| 		mutex_unlock(&kbdev->mmu_hw_mutex);
 | |
| 		dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
 | |
| 		goto undo_mappings;
 | |
| 	}
 | |
| 
 | |
| 	/* Copy memory content.
 | |
| 	 *
 | |
| 	 * It is necessary to claim the ownership of the DMA buffer for the old
 | |
| 	 * page before performing the copy, to make sure of reading a consistent
 | |
| 	 * version of its content, before copying. After the copy, ownership of
 | |
| 	 * the DMA buffer for the new page is given to the GPU in order to make
 | |
| 	 * the content visible to potential GPU access that may happen as soon as
 | |
| 	 * this function releases the lock on the MMU region.
 | |
| 	 */
 | |
| 	dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 | |
| 	memcpy(new_page, old_page, PAGE_SIZE);
 | |
| 	dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 | |
| 
 | |
| 	/* Remap GPU virtual page.
 | |
| 	 *
 | |
| 	 * This code rests on the assumption that page migration is only enabled
 | |
| 	 * for small pages, that necessarily live in the bottom level of the MMU
 | |
| 	 * page table. For this reason, the PGD level tells us inequivocably
 | |
| 	 * whether the page being migrated is a "content page" or another PGD
 | |
| 	 * of the page table:
 | |
| 	 *
 | |
| 	 * - Bottom level implies ATE (Address Translation Entry)
 | |
| 	 * - Any other level implies PTE (Page Table Entry)
 | |
| 	 *
 | |
| 	 * The current implementation doesn't handle the case of a level 0 PGD,
 | |
| 	 * that is: the root PGD of the page table.
 | |
| 	 */
 | |
| 	target = &pgd_page[index];
 | |
| 
 | |
| 	/* Certain entries of a page table page encode the count of valid entries
 | |
| 	 * present in that page. So need to save & restore the count information
 | |
| 	 * when updating the PTE/ATE to point to the new page.
 | |
| 	 */
 | |
| 	num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
 | |
| 
 | |
| 	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
 | |
| 		phys_addr_t base_phys_address = as_phys_addr_t(new_phys);
 | |
| 		unsigned int i;
 | |
| 
 | |
| 		for (i = 0; i < GPU_PAGES_PER_CPU_PAGE; i++) {
 | |
| 			phys_addr_t page_address = base_phys_address + (i * GPU_PAGE_SIZE);
 | |
| 
 | |
| 			WARN_ON_ONCE((*target & 1UL) == 0);
 | |
| 			*target = kbase_mmu_create_ate(
 | |
| 				kbdev, as_tagged(page_address), page_md->data.mapped.reg->flags,
 | |
| 				level, page_md->data.mapped.reg->gpu_alloc->group_id);
 | |
| 			target++;
 | |
| 		}
 | |
| 	} else {
 | |
| 		u64 managed_pte;
 | |
| 
 | |
| #ifdef CONFIG_MALI_BIFROST_DEBUG
 | |
| 		/* The PTE should be pointing to the page being migrated */
 | |
| 		WARN_ON_ONCE(
 | |
| 			as_phys_addr_t(old_phys) !=
 | |
| 			kbdev->mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 | |
| 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
 | |
| #endif
 | |
| 		kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
 | |
| 		*target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev,
 | |
| 								 MGM_DEFAULT_PTE_GROUP,
 | |
| 								 PBHA_ID_DEFAULT, PTE_FLAGS_NONE,
 | |
| 								 level, managed_pte);
 | |
| 	}
 | |
| 
 | |
| 	kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
 | |
| 
 | |
| 	/* This function always updates a single entry inside an existing PGD when
 | |
| 	 * level != MIDGARD_MMU_BOTTOMLEVEL, and would update more than one entry for
 | |
| 	 * MIDGARD_MMU_BOTTOMLEVEL PGD when PAGE_SIZE is not 4K, therefore cache
 | |
| 	 * maintenance is necessary.
 | |
| 	 */
 | |
| 	kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
 | |
| 			   pgd_dma_addr(phys_to_page(pgd), pgd) + (index * sizeof(u64)),
 | |
| 			   pgd_entries_to_sync * sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
 | |
| 
 | |
| 	/* Unlock MMU region.
 | |
| 	 *
 | |
| 	 * For GPUs without FLUSH_PA_RANGE support, the GPU caches were completely
 | |
| 	 * cleaned and invalidated after locking the virtual address range affected
 | |
| 	 * by the migration. As long as the lock is in place, GPU access to the
 | |
| 	 * locked range would remain blocked. So there is no need to clean and
 | |
| 	 * invalidate the GPU caches again after the copying the page contents
 | |
| 	 * of old page and updating the page table entry to point to new page.
 | |
| 	 *
 | |
| 	 * For GPUs with FLUSH_PA_RANGE support, the contents of old page would
 | |
| 	 * have been evicted from the GPU caches after locking the virtual address
 | |
| 	 * range. The page table entry contents also would have been invalidated
 | |
| 	 * from the GPU's L2 cache by kbase_mmu_sync_pgd() after the page table
 | |
| 	 * update.
 | |
| 	 *
 | |
| 	 * If kbase_mmu_hw_do_unlock_no_addr() fails, GPU reset will be triggered which
 | |
| 	 * would remove the MMU lock and so there is no need to rollback page migration
 | |
| 	 * and the failure can be ignored.
 | |
| 	 */
 | |
| 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 	if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
 | |
| 		int as_nr = mmut->kctx->as_nr;
 | |
| 		struct kbase_as *as = &kbdev->as[as_nr];
 | |
| 		int local_ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
 | |
| 
 | |
| 		CSTD_UNUSED(local_ret);
 | |
| 	}
 | |
| 
 | |
| 	/* Release the transition prevention in L2 by ending the transaction */
 | |
| 	mmu_page_migration_transaction_end(kbdev);
 | |
| 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 | |
| 	/* Releasing locks before checking the migration transaction error state */
 | |
| 	mutex_unlock(&kbdev->mmu_hw_mutex);
 | |
| 
 | |
| 	/* Undertaking metadata transfer, while we are holding the mmu_lock */
 | |
| 	spin_lock(&page_md->migrate_lock);
 | |
| 	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
 | |
| 		enum kbase_page_status page_status = PAGE_STATUS_GET(page_md->status);
 | |
| 
 | |
| 		if (page_status == ALLOCATED_MAPPED) {
 | |
| 			/* Replace page in array of pages of the physical allocation. */
 | |
| 			size_t page_array_index =
 | |
| 				div_u64(page_md->data.mapped.vpfn, GPU_PAGES_PER_CPU_PAGE) -
 | |
| 				page_md->data.mapped.reg->start_pfn;
 | |
| 
 | |
| 			page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
 | |
| 		} else if (page_status == NOT_MOVABLE) {
 | |
| 			dev_dbg(kbdev->dev,
 | |
| 				"%s: migration completed and page has become NOT_MOVABLE.",
 | |
| 				__func__);
 | |
| 		} else {
 | |
| 			dev_WARN(kbdev->dev,
 | |
| 				 "%s: migration completed but page has moved to status %d.",
 | |
| 				 __func__, page_status);
 | |
| 		}
 | |
| 	}
 | |
| 	/* Update the new page dma_addr with the transferred metadata from the old_page */
 | |
| 	page_md->dma_addr = new_dma_addr;
 | |
| 	page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
 | |
| 	spin_unlock(&page_md->migrate_lock);
 | |
| 	set_page_private(as_page(new_phys), (unsigned long)page_md);
 | |
| 	/* Old page metatdata pointer cleared as it now owned by the new page */
 | |
| 	set_page_private(as_page(old_phys), 0);
 | |
| 
 | |
| l2_state_defer_out:
 | |
| 	kunmap_pgd(phys_to_page(pgd), pgd_page);
 | |
| pgd_page_map_error:
 | |
| get_pgd_at_level_error:
 | |
| page_state_change_out:
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 
 | |
| 	kbase_kunmap(as_page(new_phys), new_page);
 | |
| new_page_map_error:
 | |
| 	kbase_kunmap(as_page(old_phys), old_page);
 | |
| old_page_map_error:
 | |
| 	return ret;
 | |
| 
 | |
| undo_mappings:
 | |
| 	/* Unlock the MMU table and undo mappings. */
 | |
| 	mutex_unlock(&mmut->mmu_lock);
 | |
| 	kunmap_pgd(phys_to_page(pgd), pgd_page);
 | |
| 	kbase_kunmap(as_page(new_phys), new_page);
 | |
| 	kbase_kunmap(as_page(old_phys), old_page);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 | |
| 			       phys_addr_t pgd, int level)
 | |
| {
 | |
| 	u64 *pgd_page;
 | |
| 	int i;
 | |
| 	struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
 | |
| 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
 | |
| 	u64 *pgd_page_buffer = NULL;
 | |
| 	struct page *p = phys_to_page(pgd);
 | |
| 
 | |
| 	lockdep_assert_held(&mmut->mmu_lock);
 | |
| 
 | |
| 	pgd_page = kmap_atomic_pgd(p, pgd);
 | |
| 	/* kmap_atomic should NEVER fail. */
 | |
| 	if (WARN_ON_ONCE(pgd_page == NULL))
 | |
| 		return;
 | |
| 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
 | |
| 		/* Copy the page to our preallocated buffer so that we can minimize
 | |
| 		 * kmap_atomic usage
 | |
| 		 */
 | |
| 		pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
 | |
| 		memcpy(pgd_page_buffer, pgd_page, GPU_PAGE_SIZE);
 | |
| 	}
 | |
| 
 | |
| 	/* When page migration is enabled, kbase_region_tracker_term() would ensure
 | |
| 	 * there are no pages left mapped on the GPU for a context. Hence the count
 | |
| 	 * of valid entries is expected to be zero here.
 | |
| 	 */
 | |
| 	if (kbase_is_page_migration_enabled() && mmut->kctx)
 | |
| 		WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
 | |
| 	/* Invalidate page after copying */
 | |
| 	mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
 | |
| 	kunmap_atomic_pgd(pgd_page);
 | |
| 	pgd_page = pgd_page_buffer;
 | |
| 
 | |
| 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
 | |
| 		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
 | |
| 			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
 | |
| 				phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr(
 | |
| 					mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev,
 | |
| 									     MGM_DEFAULT_PTE_GROUP,
 | |
| 									     level, pgd_page[i]));
 | |
| 
 | |
| 				mmu_teardown_level(kbdev, mmut, target_pgd, level + 1);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	kbase_mmu_free_pgd(kbdev, mmut, pgd);
 | |
| }
 | |
| 
 | |
| static void kbase_mmu_mark_non_movable(struct kbase_device *const kbdev, struct page *page)
 | |
| {
 | |
| 	struct kbase_page_metadata *page_md;
 | |
| 
 | |
| 	if (!kbase_is_page_migration_enabled())
 | |
| 		return;
 | |
| 
 | |
| 	/* Composite large-page is excluded from migration, trigger a warn if a development
 | |
| 	 * wrongly leads to it.
 | |
| 	 */
 | |
| 	if (is_huge_head(as_tagged(page_to_phys(page))) ||
 | |
| 	    is_partial(as_tagged(page_to_phys(page))))
 | |
| 		dev_WARN(kbdev->dev, "%s: migration on large-page attempted.", __func__);
 | |
| 
 | |
| 	page_md = kbase_page_private(page);
 | |
| 
 | |
| 	spin_lock(&page_md->migrate_lock);
 | |
| 	page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
 | |
| 
 | |
| 	if (IS_PAGE_MOVABLE(page_md->status))
 | |
| 		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
 | |
| 
 | |
| 	spin_unlock(&page_md->migrate_lock);
 | |
| }
 | |
| 
 | |
| int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *const mmut,
 | |
| 		   struct kbase_context *const kctx, int const group_id)
 | |
| {
 | |
| 	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || WARN_ON(group_id < 0))
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)),
 | |
| 			   "List of free PGDs may not be large enough.");
 | |
| 	compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL,
 | |
| 			   "Array of MMU levels is not large enough.");
 | |
| 
 | |
| 	mmut->group_id = group_id;
 | |
| 	mutex_init(&mmut->mmu_lock);
 | |
| 	mmut->kctx = kctx;
 | |
| 	mmut->pgd = KBASE_INVALID_PHYSICAL_ADDRESS;
 | |
| 
 | |
| #if GPU_PAGES_PER_CPU_PAGE > 1
 | |
| 	INIT_LIST_HEAD(&mmut->pgd_pages_list);
 | |
| #endif
 | |
| 
 | |
| 	/* We allocate pages into the kbdev memory pool, then
 | |
| 	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
 | |
| 	 * avoid allocations from the kernel happening with the lock held.
 | |
| 	 */
 | |
| 	while (mmut->pgd == KBASE_INVALID_PHYSICAL_ADDRESS) {
 | |
| 		int err;
 | |
| 
 | |
| 		err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
 | |
| 					  MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
 | |
| 		if (err) {
 | |
| 			kbase_mmu_term(kbdev, mmut);
 | |
| 			return -ENOMEM;
 | |
| 		}
 | |
| 
 | |
| 		mutex_lock(&mmut->mmu_lock);
 | |
| 		mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
 | |
| 		mutex_unlock(&mmut->mmu_lock);
 | |
| 	}
 | |
| 
 | |
| 	kbase_mmu_mark_non_movable(kbdev, pfn_to_page(PFN_DOWN(mmut->pgd)));
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 | |
| {
 | |
| 	WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
 | |
| 	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
 | |
| 	     mmut->kctx->tgid, mmut->kctx->id);
 | |
| 
 | |
| 	if (mmut->pgd != KBASE_INVALID_PHYSICAL_ADDRESS) {
 | |
| 		mutex_lock(&mmut->mmu_lock);
 | |
| 		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
 | |
| 		mutex_unlock(&mmut->mmu_lock);
 | |
| 
 | |
| 		if (mmut->kctx)
 | |
| 			KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
 | |
| 	}
 | |
| 
 | |
| 	mutex_destroy(&mmut->mmu_lock);
 | |
| }
 | |
| 
 | |
| void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
 | |
| {
 | |
| 	destroy_workqueue(kbdev->as[i].pf_wq);
 | |
| }
 | |
| 
 | |
| void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
 | |
| 			      phys_addr_t phys, size_t size, enum kbase_mmu_op_type flush_op)
 | |
| {
 | |
| #if MALI_USE_CSF
 | |
| 	unsigned long irq_flags;
 | |
| 
 | |
| 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 | |
| 	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
 | |
| 	    kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
 | |
| 		mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
 | |
| 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 | |
| #else
 | |
| 	CSTD_UNUSED(kbdev);
 | |
| 	CSTD_UNUSED(kctx);
 | |
| 	CSTD_UNUSED(phys);
 | |
| 	CSTD_UNUSED(size);
 | |
| 	CSTD_UNUSED(flush_op);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| #ifdef CONFIG_MALI_VECTOR_DUMP
 | |
| static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level,
 | |
| 				    char **const buffer, size_t *size_left)
 | |
| {
 | |
| 	phys_addr_t target_pgd;
 | |
| 	u64 *pgd_page;
 | |
| 	int i;
 | |
| 	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
 | |
| 	size_t dump_size;
 | |
| 	struct kbase_device *kbdev;
 | |
| 	struct kbase_mmu_mode const *mmu_mode;
 | |
| 	struct page *p;
 | |
| 
 | |
| 	if (WARN_ON(kctx == NULL))
 | |
| 		return 0;
 | |
| 	lockdep_assert_held(&kctx->mmu.mmu_lock);
 | |
| 
 | |
| 	kbdev = kctx->kbdev;
 | |
| 	mmu_mode = kbdev->mmu_mode;
 | |
| 
 | |
| 	p = pfn_to_page(PFN_DOWN(pgd));
 | |
| 	pgd_page = kmap_pgd(p, pgd);
 | |
| 	if (!pgd_page) {
 | |
| 		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	if (*size_left >= size) {
 | |
| 		/* A modified physical address that contains
 | |
| 		 * the page table level
 | |
| 		 */
 | |
| 		u64 m_pgd = pgd | (u64)level;
 | |
| 
 | |
| 		/* Put the modified physical address in the output buffer */
 | |
| 		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
 | |
| 		*buffer += sizeof(m_pgd);
 | |
| 
 | |
| 		/* Followed by the page table itself */
 | |
| 		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
 | |
| 		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
 | |
| 
 | |
| 		*size_left -= size;
 | |
| 	}
 | |
| 
 | |
| 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
 | |
| 		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
 | |
| 			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
 | |
| 				target_pgd = mmu_mode->pte_to_phy_addr(
 | |
| 					kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 | |
| 						kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level,
 | |
| 						pgd_page[i]));
 | |
| 
 | |
| 				dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1,
 | |
| 								  buffer, size_left);
 | |
| 				if (!dump_size) {
 | |
| 					kunmap_pgd(p, pgd_page);
 | |
| 					return 0;
 | |
| 				}
 | |
| 				size += dump_size;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	kunmap_pgd(p, pgd_page);
 | |
| 
 | |
| 	return size;
 | |
| }
 | |
| 
 | |
| void *kbase_mmu_dump(struct kbase_context *kctx, size_t nr_pages)
 | |
| {
 | |
| 	void *kaddr;
 | |
| 	size_t size_left;
 | |
| 
 | |
| 	KBASE_DEBUG_ASSERT(kctx);
 | |
| 
 | |
| 	if (nr_pages == 0) {
 | |
| 		/* can't dump in a 0 sized buffer, early out */
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	size_left = nr_pages * PAGE_SIZE;
 | |
| 
 | |
| 	if (WARN_ON(size_left == 0))
 | |
| 		return NULL;
 | |
| 	kaddr = vmalloc_user(size_left);
 | |
| 
 | |
| 	mutex_lock(&kctx->mmu.mmu_lock);
 | |
| 
 | |
| 	if (kaddr) {
 | |
| 		u64 end_marker = 0xFFULL;
 | |
| 		char *buffer;
 | |
| 		char *mmu_dump_buffer;
 | |
| 		u64 config[3];
 | |
| 		size_t dump_size, size = 0;
 | |
| 		struct kbase_mmu_setup as_setup;
 | |
| 
 | |
| 		buffer = (char *)kaddr;
 | |
| 		mmu_dump_buffer = buffer;
 | |
| 
 | |
| 		kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, &as_setup);
 | |
| 		config[0] = as_setup.transtab;
 | |
| 		config[1] = as_setup.memattr;
 | |
| 		config[2] = as_setup.transcfg;
 | |
| 		memcpy(buffer, &config, sizeof(config));
 | |
| 		mmu_dump_buffer += sizeof(config);
 | |
| 		size_left -= sizeof(config);
 | |
| 		size += sizeof(config);
 | |
| 
 | |
| 		dump_size = kbasep_mmu_dump_level(kctx, kctx->mmu.pgd, MIDGARD_MMU_TOPLEVEL,
 | |
| 						  &mmu_dump_buffer, &size_left);
 | |
| 
 | |
| 		if (!dump_size)
 | |
| 			goto fail_free;
 | |
| 
 | |
| 		size += dump_size;
 | |
| 
 | |
| 		/* Add on the size for the end marker */
 | |
| 		size += sizeof(u64);
 | |
| 
 | |
| 		if (size > (nr_pages * PAGE_SIZE)) {
 | |
| 			/* The buffer isn't big enough - free the memory and
 | |
| 			 * return failure
 | |
| 			 */
 | |
| 			goto fail_free;
 | |
| 		}
 | |
| 
 | |
| 		/* Add the end marker */
 | |
| 		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
 | |
| 	}
 | |
| 
 | |
| 	mutex_unlock(&kctx->mmu.mmu_lock);
 | |
| 	return kaddr;
 | |
| 
 | |
| fail_free:
 | |
| 	vfree(kaddr);
 | |
| 	mutex_unlock(&kctx->mmu.mmu_lock);
 | |
| 	return NULL;
 | |
| }
 | |
| KBASE_EXPORT_TEST_API(kbase_mmu_dump);
 | |
| #endif /* CONFIG_MALI_VECTOR_DUMP */
 | |
| 
 | |
| void kbase_mmu_bus_fault_worker(struct work_struct *data)
 | |
| {
 | |
| 	struct kbase_as *faulting_as;
 | |
| 	unsigned int as_no;
 | |
| 	struct kbase_context *kctx;
 | |
| 	struct kbase_device *kbdev;
 | |
| 	struct kbase_fault *fault;
 | |
| 
 | |
| 	faulting_as = container_of(data, struct kbase_as, work_busfault);
 | |
| 	fault = &faulting_as->bf_data;
 | |
| 
 | |
| 	/* Ensure that any pending page fault worker has completed */
 | |
| 	flush_work(&faulting_as->work_pagefault);
 | |
| 
 | |
| 	as_no = faulting_as->number;
 | |
| 
 | |
| 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
 | |
| 
 | |
| 	/* Grab the context, already refcounted in kbase_mmu_interrupt() on
 | |
| 	 * flagging of the bus-fault. Therefore, it cannot be scheduled out of
 | |
| 	 * this AS until we explicitly release it
 | |
| 	 */
 | |
| 	kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no);
 | |
| 	if (!kctx) {
 | |
| 		atomic_dec(&kbdev->faults_pending);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* check if we still have GPU */
 | |
| 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
 | |
| 		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
 | |
| 		release_ctx(kbdev, kctx);
 | |
| 		atomic_dec(&kbdev->faults_pending);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	if (unlikely(fault->protected_mode)) {
 | |
| 		kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault);
 | |
| 		kbase_mmu_hw_clear_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
 | |
| 		release_ctx(kbdev, kctx);
 | |
| 		atomic_dec(&kbdev->faults_pending);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| #if MALI_USE_CSF
 | |
| 	/* Before the GPU power off, wait is done for the completion of
 | |
| 	 * in-flight MMU fault work items. So GPU is expected to remain
 | |
| 	 * powered up whilst the bus fault handling is being done.
 | |
| 	 */
 | |
| 	kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
 | |
| #else
 | |
| 	/* NOTE: If GPU already powered off for suspend,
 | |
| 	 * we don't need to switch to unmapped
 | |
| 	 */
 | |
| 	if (!kbase_pm_context_active_handle_suspend(kbdev,
 | |
| 						    KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
 | |
| 		kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
 | |
| 		kbase_pm_context_idle(kbdev);
 | |
| 	}
 | |
| #endif
 | |
| 
 | |
| 	release_ctx(kbdev, kctx);
 | |
| 
 | |
| 	atomic_dec(&kbdev->faults_pending);
 | |
| }
 | |
| 
 | |
| void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
 | |
| {
 | |
| 	int i;
 | |
| 
 | |
| 	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
 | |
| 		struct kbase_as *as = &kbdev->as[i];
 | |
| 
 | |
| 		flush_workqueue(as->pf_wq);
 | |
| 	}
 | |
| }
 |