diff --git a/media_driver/linux/ult/libdrm_mock/mos_bufmgr_api_mock.c b/media_driver/linux/ult/libdrm_mock/mos_bufmgr_api_mock.c index b75c3ed3ea1..7c7d0ac0a16 100644 --- a/media_driver/linux/ult/libdrm_mock/mos_bufmgr_api_mock.c +++ b/media_driver/linux/ult/libdrm_mock/mos_bufmgr_api_mock.c @@ -48,16 +48,16 @@ struct mos_linux_bo * mos_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, int mem_type) + unsigned long size, unsigned int alignment, int mem_type, unsigned int pat_index, bool cpu_cacheable) { - return bufmgr->bo_alloc(bufmgr, name, size, alignment, mem_type); + return bufmgr->bo_alloc(bufmgr, name, size, alignment, mem_type, pat_index, cpu_cacheable); } struct mos_linux_bo * mos_bo_alloc_for_render(struct mos_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, int mem_type) + unsigned long size, unsigned int alignment, int mem_type, unsigned int pat_index, bool cpu_cacheable) { - return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment, mem_type); + return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment, mem_type, pat_index, cpu_cacheable); } struct mos_linux_bo * @@ -77,10 +77,10 @@ mos_bo_alloc_userptr(struct mos_bufmgr *bufmgr, struct mos_linux_bo * mos_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, int x, int y, int cpp, uint32_t *tiling_mode, - unsigned long *pitch, unsigned long flags, int mem_type) + unsigned long *pitch, unsigned long flags, int mem_type, unsigned int pat_index, bool cpu_cacheable) { return bufmgr->bo_alloc_tiled(bufmgr, name, x, y, cpp, - tiling_mode, pitch, flags, mem_type); + tiling_mode, pitch, flags, mem_type, pat_index, cpu_cacheable); } void diff --git a/media_driver/linux/ult/libdrm_mock/mos_bufmgr_mock.c b/media_driver/linux/ult/libdrm_mock/mos_bufmgr_mock.c index 4b9d960752f..a8d471c3b5c 100644 --- a/media_driver/linux/ult/libdrm_mock/mos_bufmgr_mock.c +++ b/media_driver/linux/ult/libdrm_mock/mos_bufmgr_mock.c @@ -879,8 +879,12 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, uint32_t tiling_mode, unsigned long stride, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { + MOS_UNUSED(pat_index); + MOS_UNUSED(cpu_cacheable); struct mos_bufmgr_gem *bufmgr_gem = (struct mos_bufmgr_gem *) bufmgr; struct mos_bo_gem *bo_gem; unsigned int page_size = getpagesize(); @@ -1049,13 +1053,17 @@ mos_gem_bo_alloc_for_render(struct mos_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { return mos_gem_bo_alloc_internal(bufmgr, name, size, I915_TILING_NONE, 0, BO_ALLOC_FOR_RENDER, alignment, - mem_type); + mem_type, + pat_index, + cpu_cacheable); } static struct mos_linux_bo * @@ -1063,17 +1071,19 @@ mos_gem_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { return mos_gem_bo_alloc_internal(bufmgr, name, size, 0, - I915_TILING_NONE, 0, 0, mem_type); + I915_TILING_NONE, 0, 0, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * mos_gem_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, int x, int y, int cpp, uint32_t *tiling_mode, unsigned long *pitch, unsigned long flags, - int mem_type) + int mem_type, unsigned int pat_index, bool cpu_cacheable) { struct mos_bufmgr_gem *bufmgr_gem = (struct mos_bufmgr_gem *)bufmgr; unsigned long size, stride; @@ -1116,7 +1126,7 @@ mos_gem_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, if (tiling == I915_TILING_NONE) stride = 0; return mos_gem_bo_alloc_internal(bufmgr, name, size, flags, - tiling, stride, 0, mem_type); + tiling, stride, 0, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * diff --git a/media_softlet/agnostic/common/os/mos_interface.h b/media_softlet/agnostic/common/os/mos_interface.h index 7d067da858d..7b4f9eafdab 100644 --- a/media_softlet/agnostic/common/os/mos_interface.h +++ b/media_softlet/agnostic/common/os/mos_interface.h @@ -308,6 +308,21 @@ class MosInterface static GMM_CLIENT_CONTEXT *GetGmmClientContext( MOS_STREAM_HANDLE streamState); + //! + //! \brief Get PAT index from gmm + //! + //! \param [in] gmmClient + //! GMM client context + //! \param [in] gmmResourceInfo + //! gmm resource info + //! + //! \return unsigned int + //! Pat index + //! + static unsigned int GetPATIndexFromGmm( + GMM_CLIENT_CONTEXT *gmmClient, + GMM_RESOURCE_INFO *gmmResourceInfo); + //! //! \brief Get current Gpu context priority //! \details Get current Gpu context priority diff --git a/media_softlet/linux/common/ddi/media_libva_util_next.cpp b/media_softlet/linux/common/ddi/media_libva_util_next.cpp index 5b14516a00f..e3c49eef854 100644 --- a/media_softlet/linux/common/ddi/media_libva_util_next.cpp +++ b/media_softlet/linux/common/ddi/media_libva_util_next.cpp @@ -734,16 +734,19 @@ VAStatus MediaLibvaUtilNext::CreateInternalSurface( params.memType = MemoryPolicyManager::UpdateMemoryPolicy(&memPolicyPar); + unsigned int patIndex = MosInterface::GetPATIndexFromGmm(mediaDrvCtx->pGmmClientContext, gmmResourceInfo); + bool isCpuCacheable = gmmResourceInfo->GetResFlags().Info.Cacheable; + if ( params.tileFormat == I915_TILING_NONE ) { - bo = mos_bo_alloc(mediaDrvCtx->pDrmBufMgr, "MEDIA", gmmSize, 4096, params.memType); + bo = mos_bo_alloc(mediaDrvCtx->pDrmBufMgr, "MEDIA", gmmSize, 4096, params.memType, patIndex, isCpuCacheable); params.pitch = gmmPitch; } else { unsigned long ulPitch = 0; bo = mos_bo_alloc_tiled(mediaDrvCtx->pDrmBufMgr, "MEDIA", gmmPitch, (gmmSize + gmmPitch -1)/gmmPitch, 1, ¶ms.tileFormat, - (unsigned long *)&ulPitch, 0, params.memType); + (unsigned long *)&ulPitch, 0, params.memType, patIndex, isCpuCacheable); params.pitch = ulPitch; } @@ -1658,8 +1661,11 @@ VAStatus MediaLibvaUtilNext::Allocate2DBuffer( mem_type = MemoryPolicyManager::UpdateMemoryPolicy(&memPolicyPar); + unsigned int patIndex = MosInterface::GetPATIndexFromGmm(mediaBuffer->pMediaCtx->pGmmClientContext, gmmResourceInfo); + bool isCpuCacheable = gmmResourceInfo->GetResFlags().Info.Cacheable; + MOS_LINUX_BO *bo; - bo = mos_bo_alloc(bufmgr, "Media 2D Buffer", gmmSize, 4096, mem_type); + bo = mos_bo_alloc(bufmgr, "Media 2D Buffer", gmmSize, 4096, mem_type, patIndex, isCpuCacheable); mediaBuffer->bMapped = false; if (bo) @@ -1731,7 +1737,11 @@ VAStatus MediaLibvaUtilNext::AllocateBuffer( memPolicyPar.preferredMemType = mediaBuffer->bUseSysGfxMem ? MOS_MEMPOOL_SYSTEMMEMORY : 0; mem_type = MemoryPolicyManager::UpdateMemoryPolicy(&memPolicyPar); - MOS_LINUX_BO *bo = mos_bo_alloc(bufmgr, "Media Buffer", size, 4096, mem_type); + + unsigned int patIndex = MosInterface::GetPATIndexFromGmm(mediaBuffer->pMediaCtx->pGmmClientContext, mediaBuffer->pGmmResourceInfo); + bool isCpuCacheable = mediaBuffer->pGmmResourceInfo->GetResFlags().Info.Cacheable; + + MOS_LINUX_BO *bo = mos_bo_alloc(bufmgr, "Media Buffer", size, 4096, mem_type, patIndex, isCpuCacheable); mediaBuffer->bMapped = false; if (bo) { diff --git a/media_softlet/linux/common/os/i915/include/mos_bufmgr.h b/media_softlet/linux/common/os/i915/include/mos_bufmgr.h index fbecca36485..9f2fd36ad8b 100644 --- a/media_softlet/linux/common/os/i915/include/mos_bufmgr.h +++ b/media_softlet/linux/common/os/i915/include/mos_bufmgr.h @@ -170,13 +170,17 @@ struct mos_aub_annotation { #define BO_ALLOC_FOR_RENDER (1<<0) +#define PAT_INDEX_INVALID ((uint32_t)-1) + struct mos_linux_bo *mos_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, int mem_type); + unsigned long size, unsigned int alignment, int mem_type, unsigned int pat_index = PAT_INDEX_INVALID, bool cpu_cacheable = true); struct mos_linux_bo *mos_bo_alloc_for_render(struct mos_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, - int mem_type); + int mem_type, + unsigned int pat_index = PAT_INDEX_INVALID, + bool cpu_cacheable = true); struct mos_linux_bo *mos_bo_alloc_userptr(struct mos_bufmgr *bufmgr, const char *name, void *addr, uint32_t tiling_mode, @@ -188,7 +192,9 @@ struct mos_linux_bo *mos_bo_alloc_tiled(struct mos_bufmgr *bufmgr, uint32_t *tiling_mode, unsigned long *pitch, unsigned long flags, - int mem_type); + int mem_type, + unsigned int pat_index = PAT_INDEX_INVALID, + bool cpu_cacheable = true); void mos_bo_reference(struct mos_linux_bo *bo); void mos_bo_unreference(struct mos_linux_bo *bo); int mos_bo_map(struct mos_linux_bo *bo, int write_enable); diff --git a/media_softlet/linux/common/os/i915/include/mos_bufmgr_priv.h b/media_softlet/linux/common/os/i915/include/mos_bufmgr_priv.h index 873d78373e3..dcbf71025c1 100644 --- a/media_softlet/linux/common/os/i915/include/mos_bufmgr_priv.h +++ b/media_softlet/linux/common/os/i915/include/mos_bufmgr_priv.h @@ -48,7 +48,7 @@ struct mos_bufmgr { * using bo_map() or drm_intel_gem_bo_map_gtt() to be used by the CPU. */ struct mos_linux_bo *(*bo_alloc) (struct mos_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, int mem_type); + unsigned long size, unsigned int alignment, int mem_type, unsigned int pat_index, bool cpu_cacheable); /** * Allocate a buffer object, hinting that it will be used as a @@ -60,7 +60,9 @@ struct mos_bufmgr { const char *name, unsigned long size, unsigned int alignment, - int mem_type); + int mem_type, + unsigned int pat_index, + bool cpu_cacheable); /** * Allocate a buffer object from an existing user accessible @@ -95,7 +97,9 @@ struct mos_bufmgr { uint32_t *tiling_mode, unsigned long *pitch, unsigned long flags, - int mem_type); + int mem_type, + unsigned int pat_index, + bool cpu_cacheable); /** Takes a reference on a buffer object */ void (*bo_reference) (struct mos_linux_bo *bo); diff --git a/media_softlet/linux/common/os/i915/include/uapi/README b/media_softlet/linux/common/os/i915/include/uapi/README index 20c0ec4fbdf..3d675095a1b 100644 --- a/media_softlet/linux/common/os/i915/include/uapi/README +++ b/media_softlet/linux/common/os/i915/include/uapi/README @@ -13,45 +13,55 @@ $ make headers_install INSTALL_HDR_PATH=/path/to/install The last update was done at the following kernel commit: -commit 1cc064dce4ed0ff111b6d6cb06b3cccf1cba29f5 -Author: Umesh Nerlige Ramappa -Date: Thu Mar 23 15:58:59 2023 -0700 - - drm/i915/perf: Add support for OA media units - - MTL introduces additional OA units dedicated to media use cases. Add - support for programming these OA units by passing the media engine class - and instance parameters. - - UMD specific changes for GPUvis support: - https://patchwork.freedesktop.org/patch/522827/?series=114023 - https://patchwork.freedesktop.org/patch/522822/?series=114023 - https://patchwork.freedesktop.org/patch/522826/?series=114023 - https://patchwork.freedesktop.org/patch/522828/?series=114023 - https://patchwork.freedesktop.org/patch/522816/?series=114023 - https://patchwork.freedesktop.org/patch/522825/?series=114023 - - v2: (Ashutosh) - - check for IP_VER(12, 70) instead of MTL - - remove PERF_GROUP_OAG comment in mtl_oa_base - - remove oa_buffer.group - - use engine->oa_group->type in engine_supports_oa_format - - remove fw_domains and use FORCEWAKE_ALL - - remove MPES/MPEC comment - - s/xehp/mtl/ in b counter validation function name - - remove engine_supports_oa in __oa_engine_group - - remove warn_ON from __oam_engine_group - - refactor oa_init_groups and oa_init_regs - - assign g->type correctly - - use enum oa_type definition - - v3: (Ashutosh) - - Drop oa_unit_functional as engine_supports_oa is enough - - v4: - - s/DRM_DEBUG/drm_dbg/ - - Signed-off-by: Umesh Nerlige Ramappa - Reviewed-by: Ashutosh Dixit - Link: https://patchwork.freedesktop.org/patch/msgid/20230323225901.3743681-10-umesh.nerlige.ramappa@intel.com +commit 81b1b599dfd71c958418dad586fa72c8d30d1065 +Author: Fei Yang +Date: Tue Jun 6 12:00:42 2023 +0200 + + drm/i915: Allow user to set cache at BO creation + + To comply with the design that buffer objects shall have immutable + cache setting through out their life cycle, {set, get}_caching ioctl's + are no longer supported from MTL onward. With that change caching + policy can only be set at object creation time. The current code + applies a default (platform dependent) cache setting for all objects. + However this is not optimal for performance tuning. The patch extends + the existing gem_create uAPI to let user set PAT index for the object + at creation time. + The new extension is platform independent, so UMD's can switch to using + this extension for older platforms as well, while {set, get}_caching are + still supported on these legacy paltforms for compatibility reason. + However, since PAT index was not clearly defined for platforms prior to + GEN12 (TGL), so we are limiting this externsion to GEN12+ platforms + only. See ext_set_pat() in for the implementation details. + + The documentation related to the PAT/MOCS tables is currently available + for Tiger Lake here: + https://www.intel.com/content/www/us/en/docs/graphics-for-linux/developer-reference/1-0/tiger-lake.html + + The documentation for other platforms is currently being updated. + + BSpec: 45101 + + Mesa support has been submitted in this merge request: + https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878 + + The media driver supprt has bin submitted in this merge request: + https://github.com/intel/media-driver/pull/1680 + + The IGT test related to this change is + igt@gem_create@create-ext-set-pat + + Signed-off-by: Fei Yang + Cc: Chris Wilson + Cc: Matt Roper + Cc: Andi Shyti + Reviewed-by: Andi Shyti + Acked-by: Jordan Justen + Tested-by: Jordan Justen + Acked-by: Carl Zhang + Tested-by: Lihao Gu + Signed-off-by: Andi Shyti + Acked-by: Tvrtko Ursulin + Acked-by: Slawomir Milczarek + Link: https://patchwork.freedesktop.org/patch/msgid/20230606100042.482345-2-andi.shyti@linux.intel.com diff --git a/media_softlet/linux/common/os/i915/include/uapi/i915_drm.h b/media_softlet/linux/common/os/i915/include/uapi/i915_drm.h index c77b55ff6b8..0a5c8144552 100644 --- a/media_softlet/linux/common/os/i915/include/uapi/i915_drm.h +++ b/media_softlet/linux/common/os/i915/include/uapi/i915_drm.h @@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ENGINE_SEMA(class, instance) \ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) -#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) +/* + * Top 4 bits of every non-engine counter are GT id. + */ +#define __I915_PMU_GT_SHIFT (60) + +#define ___I915_PMU_OTHER(gt, x) \ + (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \ + ((__u64)(gt) << __I915_PMU_GT_SHIFT)) + +#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x) #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) @@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY +#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0) +#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1) +#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2) +#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3) +#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4) + /* Each region is a minimum of 16k, and there are at most 255 of them. */ #define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use @@ -659,7 +674,8 @@ typedef struct drm_i915_irq_wait { * If the IOCTL is successful, the returned parameter will be set to one of the * following values: * * 0 if HuC firmware load is not complete, - * * 1 if HuC firmware is authenticated and running. + * * 1 if HuC firmware is loaded and fully authenticated, + * * 2 if HuC firmware is loaded and authenticated for clear media only */ #define I915_PARAM_HUC_STATUS 42 @@ -771,6 +787,25 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57 +/* + * Query the status of PXP support in i915. + * + * The query can fail in the following scenarios with the listed error codes: + * -ENODEV = PXP support is not available on the GPU device or in the + * kernel due to missing component drivers or kernel configs. + * + * If the IOCTL is successful, the returned parameter will be set to one of + * the following values: + * 1 = PXP feature is supported and is ready for use. + * 2 = PXP feature is supported but should be ready soon (pending + * initialization of non-i915 system dependencies). + * + * NOTE: When param is supported (positive return values), user space should + * still refer to the GEM PXP context-creation UAPI header specs to be + * aware of possible failure due to system state machine at the time. + */ +#define I915_PARAM_PXP_STATUS 58 + /* Must be kept compact -- no holes and well documented */ /** @@ -1225,7 +1260,7 @@ struct drm_i915_gem_exec_object2 { * * See struct drm_i915_gem_create_ext for the rules when dealing with * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with - * minimum page sizes + * minimum page sizes, like DG2. */ __u64 offset; @@ -2096,6 +2131,21 @@ struct drm_i915_gem_context_param { * * -ENODEV: feature not available * -EPERM: trying to mark a recoverable or not bannable context as protected + * -ENXIO: A dependency such as a component driver or firmware is not yet + * loaded so user space may need to attempt again. Depending on the + * device, this error may be reported if protected context creation is + * attempted very early after kernel start because the internal timeout + * waiting for such dependencies is not guaranteed to be larger than + * required (numbers differ depending on system and kernel config): + * - ADL/RPL: dependencies may take up to 3 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * - MTL: dependencies may take up to 8 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * NOTE: such dependencies happen once, so a subsequent call to create a + * protected context after a prior successful call will not experience + * such timeouts and will not return -ENXIO (unless the driver is reloaded, + * or, depending on the device, resumes from a suspended state). + * -EIO: The firmware did not succeed in creating the protected context. */ #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd /* Must be kept compact -- no holes and well documented */ @@ -2672,9 +2722,11 @@ enum drm_i915_oa_format { I915_OA_FORMAT_A12_B8_C8, I915_OA_FORMAT_A32u40_A4u32_B8_C8, + /* DG2 */ I915_OAR_FORMAT_A32u40_A4u32_B8_C8, I915_OA_FORMAT_A24u40_A14u32_B8_C8, + /* MTL OAM */ I915_OAM_FORMAT_MPEC8u64_B8_C8, I915_OAM_FORMAT_MPEC8u32_B8_C8, @@ -3540,12 +3592,12 @@ struct drm_i915_gem_create_ext { * * The (page-aligned) allocated size for the object will be returned. * - * On some platforms the kernel will always use 64K or larger + * On platforms like DG2/ATS the kernel will always use 64K or larger * pages for I915_MEMORY_CLASS_DEVICE. The kernel also requires a * minimum of 64K GTT alignment for such objects. * * NOTE: Previously the ABI here required a minimum GTT alignment of 2M - * , due to how the hardware implemented 64K GTT page support, + * on DG2/ATS, due to how the hardware implemented 64K GTT page support, * where we had the following complications: * * 1) The entire PDE (which covers a 2MB virtual address range), must @@ -3580,7 +3632,7 @@ struct drm_i915_gem_create_ext { * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only * strictly required on configurations where some subset of the device * memory is directly visible/mappable through the CPU (which we also - * call small BAR). Note that this is quite + * call small BAR), like on some DG2+ systems. Note that this is quite * undesirable, but due to various factors like the client CPU, BIOS etc * it's something we can expect to see in the wild. See * &drm_i915_memory_region_info.probed_cpu_visible_size for how to @@ -3628,9 +3680,13 @@ struct drm_i915_gem_create_ext { * * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see * struct drm_i915_gem_create_ext_protected_content. + * + * For I915_GEM_CREATE_EXT_SET_PAT usage see + * struct drm_i915_gem_create_ext_set_pat. */ #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 +#define I915_GEM_CREATE_EXT_SET_PAT 2 __u64 extensions; }; @@ -3745,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content { __u32 flags; }; +/** + * struct drm_i915_gem_create_ext_set_pat - The + * I915_GEM_CREATE_EXT_SET_PAT extension. + * + * If this extension is provided, the specified caching policy (PAT index) is + * applied to the buffer object. + * + * Below is an example on how to create an object with specific caching policy: + * + * .. code-block:: C + * + * struct drm_i915_gem_create_ext_set_pat set_pat_ext = { + * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT }, + * .pat_index = 0, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = PAGE_SIZE, + * .extensions = (uintptr_t)&set_pat_ext, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + */ +struct drm_i915_gem_create_ext_set_pat { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + /** + * @pat_index: PAT index to be set + * PAT index is a bit field in Page Table Entry to control caching + * behaviors for GPU accesses. The definition of PAT index is + * platform dependent and can be found in hardware specifications, + */ + __u32 pat_index; + /** @rsvd: reserved for future use */ + __u32 rsvd; +}; + /* ID of the protected content session managed by i915 when PXP is active */ #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf diff --git a/media_softlet/linux/common/os/i915/mos_bufmgr.c b/media_softlet/linux/common/os/i915/mos_bufmgr.c index 660a4e14827..50c1068ec0d 100644 --- a/media_softlet/linux/common/os/i915/mos_bufmgr.c +++ b/media_softlet/linux/common/os/i915/mos_bufmgr.c @@ -346,6 +346,16 @@ struct mos_bo_gem { * Memory Type on created the surfaces for local/system memory */ int mem_region; + + /** + * PAT Index + */ + unsigned int pat_index; + + /** + * Is cpu cacheable + */ + bool cpu_cacheable; }; struct mos_exec_info { @@ -1007,11 +1017,14 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, uint32_t tiling_mode, unsigned long stride, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { struct mos_bufmgr_gem *bufmgr_gem = (struct mos_bufmgr_gem *) bufmgr; struct mos_bo_gem *bo_gem; unsigned int page_size = getpagesize(); + static bool support_pat_index = true; int ret; struct mos_gem_bo_bucket *bucket; bool alloc_from_cache; @@ -1034,7 +1047,13 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, } else { bo_size = bucket->size; } - + if (!support_pat_index) + { + /* For old kernel without pat index support, + * We need to reset pat_index for bo reuse policy + */ + pat_index = PAT_INDEX_INVALID; + } pthread_mutex_lock(&bufmgr_gem->lock); /* Get a buffer out of the cache if available */ retry: @@ -1075,7 +1094,11 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, bucket); goto retry; } - + if (bo_gem->pat_index != pat_index) + { + mos_gem_bo_free(&bo_gem->bo); + goto retry; + } if (mos_gem_bo_set_tiling_internal(&bo_gem->bo, tiling_mode, stride)) { @@ -1098,8 +1121,10 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, bo_gem->bo.size = bo_size; bo_gem->mem_region = I915_MEMORY_CLASS_SYSTEM; + bo_gem->pat_index = PAT_INDEX_INVALID; + bo_gem->cpu_cacheable = true; - if(bufmgr_gem->has_lmem && + if (bufmgr_gem->has_lmem && (mem_type == MOS_MEMPOOL_VIDEOMEMORY || mem_type == MOS_MEMPOOL_DEVICEMEMORY)) { struct drm_i915_gem_memory_class_instance mem_region; memclear(mem_region); @@ -1124,15 +1149,48 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, bo_gem->bo.handle = bo_gem->gem_handle; bo_gem->mem_region = I915_MEMORY_CLASS_DEVICE; } - else { - struct drm_i915_gem_create create; - memclear(create); - create.size = bo_size; - ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_CREATE, - &create); - bo_gem->gem_handle = create.handle; - bo_gem->bo.handle = bo_gem->gem_handle; + else + { + ret = -EINVAL; + if (support_pat_index && pat_index != PAT_INDEX_INVALID) + { + struct drm_i915_gem_create_ext_set_pat set_pat_ext; + memclear(set_pat_ext); + set_pat_ext.base.name = I915_GEM_CREATE_EXT_SET_PAT; + set_pat_ext.pat_index = pat_index; + + struct drm_i915_gem_create_ext create; + memclear(create); + create.size = bo_size; + create.extensions = (uintptr_t)(&set_pat_ext); + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_CREATE_EXT, + &create); + bo_gem->gem_handle = create.handle; + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->pat_index = pat_index; + if (ret != 0) + { + /* For old kernel without pat_index support, + * DRM_IOCTL_I915_GEM_CREATE_EXT with unknown + * set_pat_ext extension will return -EINVAL + * support_pat_index need to be set false. + */ + support_pat_index = false; + } + } + if (ret != 0) + { + struct drm_i915_gem_create create; + memclear(create); + create.size = bo_size; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_CREATE, + &create); + bo_gem->gem_handle = create.handle; + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->pat_index = PAT_INDEX_INVALID; + } } if (ret != 0) { free(bo_gem); @@ -1174,6 +1232,11 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, bo_gem->reusable = true; bo_gem->use_48b_address_range = bufmgr_gem->bufmgr.bo_use_48b_address_range ? true : false; + if (bo_gem->pat_index != PAT_INDEX_INVALID) + { + bo_gem->cpu_cacheable = cpu_cacheable; + } + mos_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); if (bufmgr_gem->use_softpin) @@ -1192,12 +1255,14 @@ mos_gem_bo_alloc_for_render(struct mos_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { return mos_gem_bo_alloc_internal(bufmgr, name, size, I915_TILING_NONE, 0, BO_ALLOC_FOR_RENDER, - alignment, mem_type); + alignment, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * @@ -1205,17 +1270,19 @@ mos_gem_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { return mos_gem_bo_alloc_internal(bufmgr, name, size, 0, - I915_TILING_NONE, 0, 0, mem_type); + I915_TILING_NONE, 0, 0, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * mos_gem_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, int x, int y, int cpp, uint32_t *tiling_mode, unsigned long *pitch, unsigned long flags, - int mem_type) + int mem_type, unsigned int pat_index, bool cpu_cacheable) { struct mos_bufmgr_gem *bufmgr_gem = (struct mos_bufmgr_gem *)bufmgr; unsigned long size, stride; @@ -1259,7 +1326,7 @@ mos_gem_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, stride = 0; return mos_gem_bo_alloc_internal(bufmgr, name, size, flags, - tiling, stride, 0, mem_type); + tiling, stride, 0, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * @@ -1304,10 +1371,12 @@ mos_gem_bo_alloc_userptr(struct mos_bufmgr *bufmgr, return nullptr; } - bo_gem->gem_handle = userptr.handle; - bo_gem->bo.handle = bo_gem->gem_handle; - bo_gem->bo.bufmgr = bufmgr; - bo_gem->is_userptr = true; + bo_gem->gem_handle = userptr.handle; + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->is_userptr = true; + bo_gem->pat_index = PAT_INDEX_INVALID; + bo_gem->cpu_cacheable = true; #ifdef __cplusplus bo_gem->bo.virt = addr; #else @@ -1486,6 +1555,8 @@ mos_bufmgr_bo_gem_create_from_name(struct mos_bufmgr *bufmgr, #endif bo_gem->bo.bufmgr = bufmgr; bo_gem->name = name; + bo_gem->pat_index = PAT_INDEX_INVALID; + bo_gem->cpu_cacheable = true; atomic_set(&bo_gem->refcount, 1); bo_gem->validate_index = -1; bo_gem->gem_handle = open_arg.handle; @@ -1761,7 +1832,7 @@ map_wc(struct mos_linux_bo *bo) } else { - mmap_arg.flags = I915_MMAP_OFFSET_WC; + mmap_arg.flags = I915_MMAP_OFFSET_WC; } ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, @@ -1895,6 +1966,11 @@ drm_export int mos_gem_bo_map(struct mos_linux_bo *bo, int write_enable) #endif return 0; } + /* If cpu cacheable is false, it means bo is Non-Coherent. */ + if (!bo_gem->cpu_cacheable) + { + return mos_gem_bo_map_wc(bo); + } pthread_mutex_lock(&bufmgr_gem->lock); @@ -1915,7 +1991,7 @@ drm_export int mos_gem_bo_map(struct mos_linux_bo *bo, int write_enable) } else { - mmap_arg.flags = I915_MMAP_OFFSET_WB; + mmap_arg.flags = I915_MMAP_OFFSET_WB; } ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, @@ -3445,8 +3521,9 @@ mos_gem_bo_create_from_prime(struct mos_bufmgr *bufmgr, int prime_fd, int size) bo_gem->bo.handle = handle; bo_gem->bo.bufmgr = bufmgr; - bo_gem->gem_handle = handle; - + bo_gem->gem_handle = handle; + bo_gem->pat_index = PAT_INDEX_INVALID; + bo_gem->cpu_cacheable = true; atomic_set(&bo_gem->refcount, 1); bo_gem->name = "prime"; diff --git a/media_softlet/linux/common/os/i915/mos_bufmgr_api.c b/media_softlet/linux/common/os/i915/mos_bufmgr_api.c index 1ee613a8cda..516609f63b2 100644 --- a/media_softlet/linux/common/os/i915/mos_bufmgr_api.c +++ b/media_softlet/linux/common/os/i915/mos_bufmgr_api.c @@ -49,7 +49,7 @@ struct mos_linux_bo * mos_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, int mem_type) + unsigned long size, unsigned int alignment, int mem_type, unsigned int pat_index, bool cpu_cacheable) { if(!bufmgr) { @@ -59,7 +59,7 @@ mos_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, if (bufmgr->bo_alloc) { - return bufmgr->bo_alloc(bufmgr, name, size, alignment, mem_type); + return bufmgr->bo_alloc(bufmgr, name, size, alignment, mem_type, pat_index, cpu_cacheable); } else { @@ -70,7 +70,7 @@ mos_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, struct mos_linux_bo * mos_bo_alloc_for_render(struct mos_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, int mem_type) + unsigned long size, unsigned int alignment, int mem_type, unsigned int pat_index, bool cpu_cacheable) { if(!bufmgr) { @@ -80,7 +80,7 @@ mos_bo_alloc_for_render(struct mos_bufmgr *bufmgr, const char *name, if (bufmgr->bo_alloc_for_render) { - return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment, mem_type); + return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment, mem_type, pat_index, cpu_cacheable); } else { @@ -119,7 +119,7 @@ struct mos_linux_bo * mos_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, int x, int y, int cpp, uint32_t *tiling_mode, unsigned long *pitch, unsigned long flags, - int mem_type) + int mem_type, unsigned int pat_index, bool cpu_cacheable) { if(!bufmgr) { @@ -130,7 +130,7 @@ mos_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, if (bufmgr->bo_alloc_tiled) { return bufmgr->bo_alloc_tiled(bufmgr, name, x, y, cpp, - tiling_mode, pitch, flags, mem_type); + tiling_mode, pitch, flags, mem_type, pat_index, cpu_cacheable); } else { diff --git a/media_softlet/linux/common/os/i915_production/mos_bufmgr.c b/media_softlet/linux/common/os/i915_production/mos_bufmgr.c index 3bc4f252952..a0e713e634e 100644 --- a/media_softlet/linux/common/os/i915_production/mos_bufmgr.c +++ b/media_softlet/linux/common/os/i915_production/mos_bufmgr.c @@ -1054,8 +1054,12 @@ mos_gem_bo_alloc_internal(struct mos_bufmgr *bufmgr, uint32_t tiling_mode, unsigned long stride, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { + MOS_UNUSED(pat_index); + MOS_UNUSED(cpu_cacheable); struct mos_bufmgr_gem *bufmgr_gem = (struct mos_bufmgr_gem *) bufmgr; struct mos_bo_gem *bo_gem; unsigned int page_size = getpagesize(); @@ -1237,12 +1241,14 @@ mos_gem_bo_alloc_for_render(struct mos_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { return mos_gem_bo_alloc_internal(bufmgr, name, size, I915_TILING_NONE, 0, BO_ALLOC_FOR_RENDER, - alignment, mem_type); + alignment, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * @@ -1250,17 +1256,19 @@ mos_gem_bo_alloc(struct mos_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, - int mem_type) + int mem_type, + unsigned int pat_index, + bool cpu_cacheable) { return mos_gem_bo_alloc_internal(bufmgr, name, size, 0, - I915_TILING_NONE, 0, 0, mem_type); + I915_TILING_NONE, 0, 0, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * mos_gem_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, int x, int y, int cpp, uint32_t *tiling_mode, unsigned long *pitch, unsigned long flags, - int mem_type) + int mem_type, unsigned int pat_index, bool cpu_cacheable) { struct mos_bufmgr_gem *bufmgr_gem = (struct mos_bufmgr_gem *)bufmgr; unsigned long size, stride; @@ -1304,7 +1312,7 @@ mos_gem_bo_alloc_tiled(struct mos_bufmgr *bufmgr, const char *name, stride = 0; return mos_gem_bo_alloc_internal(bufmgr, name, size, flags, - tiling, stride, 0, mem_type); + tiling, stride, 0, mem_type, pat_index, cpu_cacheable); } static struct mos_linux_bo * diff --git a/media_softlet/linux/common/os/i915_production/mos_bufmgr_priv.h b/media_softlet/linux/common/os/i915_production/mos_bufmgr_priv.h index 9f7d908653e..5ed0cc463f0 100644 --- a/media_softlet/linux/common/os/i915_production/mos_bufmgr_priv.h +++ b/media_softlet/linux/common/os/i915_production/mos_bufmgr_priv.h @@ -48,7 +48,7 @@ struct mos_bufmgr { * using bo_map() or drm_intel_gem_bo_map_gtt() to be used by the CPU. */ struct mos_linux_bo *(*bo_alloc) (struct mos_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment, int mem_type); + unsigned long size, unsigned int alignment, int mem_type, unsigned int pat_index, bool cpu_cacheable); /** * Allocate a buffer object, hinting that it will be used as a @@ -60,7 +60,9 @@ struct mos_bufmgr { const char *name, unsigned long size, unsigned int alignment, - int mem_type); + int mem_type, + unsigned int pat_index, + bool cpu_cacheable); /** * Allocate a buffer object from an existing user accessible @@ -95,7 +97,9 @@ struct mos_bufmgr { uint32_t *tiling_mode, unsigned long *pitch, unsigned long flags, - int mem_type); + int mem_type, + unsigned int pat_index, + bool cpu_cacheable); /** Takes a reference on a buffer object */ void (*bo_reference) (struct mos_linux_bo *bo); diff --git a/media_softlet/linux/common/os/mos_graphicsresource_specific_next.cpp b/media_softlet/linux/common/os/mos_graphicsresource_specific_next.cpp index 453a57670fe..d4199444098 100644 --- a/media_softlet/linux/common/os/mos_graphicsresource_specific_next.cpp +++ b/media_softlet/linux/common/os/mos_graphicsresource_specific_next.cpp @@ -74,6 +74,13 @@ MOS_STATUS GraphicsResourceSpecificNext::Allocate(OsContextNext* osContextPtr, C return MOS_STATUS_INVALID_HANDLE; } + GMM_CLIENT_CONTEXT *gmmClientContext = pOsContextSpecific->GetGmmClientContext(); + if (nullptr == gmmClientContext) + { + MOS_OS_ASSERTMESSAGE("Get GMM Client Context failed."); + return MOS_STATUS_INVALID_HANDLE; + } + MOS_STATUS status = MOS_STATUS_SUCCESS; uint32_t tileFormatLinux = I915_TILING_NONE; uint32_t alignedHeight = params.m_height; @@ -250,6 +257,9 @@ MOS_STATUS GraphicsResourceSpecificNext::Allocate(OsContextNext* osContextPtr, C char bufName[m_maxBufNameLength]; MosUtilities::MosSecureStrcpy(bufName, m_maxBufNameLength, params.m_name.c_str()); + unsigned int patIndex = MosInterface::GetPATIndexFromGmm(gmmClientContext, gmmResourceInfoPtr); + bool isCpuCacheable = gmmResourceInfoPtr->GetResFlags().Info.Cacheable; + MOS_TraceEventExt(EVENT_RESOURCE_ALLOCATE, EVENT_TYPE_START, nullptr, 0, nullptr, 0); if (nullptr != params.m_pSystemMemory) { @@ -264,7 +274,7 @@ MOS_STATUS GraphicsResourceSpecificNext::Allocate(OsContextNext* osContextPtr, C // Only Linear and Y TILE supported else if (tileFormatLinux == I915_TILING_NONE) { - boPtr = mos_bo_alloc(pOsContextSpecific->m_bufmgr, bufName, bufSize, 4096, mem_type); + boPtr = mos_bo_alloc(pOsContextSpecific->m_bufmgr, bufName, bufSize, 4096, mem_type, patIndex, isCpuCacheable); } else { @@ -276,7 +286,9 @@ MOS_STATUS GraphicsResourceSpecificNext::Allocate(OsContextNext* osContextPtr, C &tileFormatLinux, &linuxPitch, 0, - mem_type); + mem_type, + patIndex, + isCpuCacheable); bufPitch = (uint32_t)linuxPitch; } @@ -628,6 +640,8 @@ MOS_STATUS GraphicsResourceSpecificNext::AllocateExternalResource( GMM_RESCREATE_PARAMS gmmParams; GMM_RESOURCE_INFO *gmmResourceInfo = nullptr; GMM_RESOURCE_TYPE resourceType = RESOURCE_2D; + unsigned int patIndex = PAT_INDEX_INVALID; + bool isCpuCacheable = true; MosUtilities::MosZeroMemory(&gmmParams, sizeof(gmmParams)); @@ -725,6 +739,7 @@ MOS_STATUS GraphicsResourceSpecificNext::AllocateExternalResource( } gmmParams.Flags.Info.LocalOnly = MEDIA_IS_SKU(&perStreamParameters->m_skuTable, FtrLocalMemory); + MOS_OS_CHK_NULL_RETURN(perStreamParameters->pGmmClientContext); resource->pGmmResInfo = gmmResourceInfo = perStreamParameters->pGmmClientContext->CreateResInfoObject(&gmmParams); MOS_OS_CHK_NULL_RETURN(gmmResourceInfo); @@ -758,10 +773,13 @@ MOS_STATUS GraphicsResourceSpecificNext::AllocateExternalResource( iSize = GFX_ULONG_CAST(gmmResourceInfo->GetSizeSurface()); iHeight = gmmResourceInfo->GetBaseHeight(); + patIndex = MosInterface::GetPATIndexFromGmm(perStreamParameters->pGmmClientContext, gmmResourceInfo); + isCpuCacheable = gmmResourceInfo->GetResFlags().Info.Cacheable; + // Only Linear and Y TILE supported if (tileformat_linux == I915_TILING_NONE) { - bo = mos_bo_alloc(perStreamParameters->bufmgr, bufname, iSize, 4096, MOS_MEMPOOL_VIDEOMEMORY); + bo = mos_bo_alloc(perStreamParameters->bufmgr, bufname, iSize, 4096, MOS_MEMPOOL_VIDEOMEMORY, patIndex, isCpuCacheable); } else { @@ -773,7 +791,9 @@ MOS_STATUS GraphicsResourceSpecificNext::AllocateExternalResource( &tileformat_linux, &ulPitch, 0, - MOS_MEMPOOL_VIDEOMEMORY); + MOS_MEMPOOL_VIDEOMEMORY, + patIndex, + isCpuCacheable); iPitch = (int32_t)ulPitch; } diff --git a/media_softlet/linux/common/os/mos_interface.cpp b/media_softlet/linux/common/os/mos_interface.cpp index 56e96064ca4..7fd5eaff28b 100644 --- a/media_softlet/linux/common/os/mos_interface.cpp +++ b/media_softlet/linux/common/os/mos_interface.cpp @@ -2640,6 +2640,25 @@ GMM_CLIENT_CONTEXT *MosInterface::GetGmmClientContext( return nullptr; } +unsigned int MosInterface::GetPATIndexFromGmm( + GMM_CLIENT_CONTEXT *gmmClient, + GMM_RESOURCE_INFO *gmmResourceInfo) +{ + if (gmmClient && gmmResourceInfo) + { + // GetDriverProtectionBits funtion could hide gmm details info, + // and we should use GetDriverProtectionBits to replace CachePolicyGetPATIndex in future. + // isCompressionEnable could be false temparaily. + bool isCompressionEnable = false; + return gmmClient->CachePolicyGetPATIndex( + gmmResourceInfo, + gmmResourceInfo->GetCachePolicyUsage(), + &isCompressionEnable, + gmmResourceInfo->GetResFlags().Info.Cacheable); + } + return PAT_INDEX_INVALID; +} + void MosInterface::GetGpuPriority(MOS_STREAM_HANDLE streamState, int32_t* pPriority) { MOS_OS_FUNCTION_ENTER;