Skip to content
This repository has been archived by the owner on Jan 8, 2024. It is now read-only.

Commit

Permalink
Merge branch 'internal_master_prm' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
JaxLinAMD committed Jun 10, 2021
2 parents 1736627 + 02ac99b commit 7f6e731
Show file tree
Hide file tree
Showing 72 changed files with 4,503 additions and 4,170 deletions.
8 changes: 0 additions & 8 deletions cmake/PalCompileDefinitions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,6 @@ function(pal_compile_definitions TARGET)
target_compile_definitions(${TARGET} PRIVATE PAL_BUILD_NULL_DEVICE=1)
endif()

if(PAL_BUILD_GPUOPEN)
target_compile_definitions(${TARGET} PUBLIC PAL_BUILD_GPUOPEN=1)
endif()

if(PAL_ENABLE_DEVDRIVER_USAGE)
target_compile_definitions(${TARGET} PRIVATE PAL_ENABLE_DEVDRIVER_USAGE=1)
endif()

if(PAL_ENABLE_PRINTS_ASSERTS)
target_compile_definitions(${TARGET} PUBLIC
$<$<NOT:$<CONFIG:Debug>>:PAL_ENABLE_PRINTS_ASSERTS=1>
Expand Down
7 changes: 1 addition & 6 deletions cmake/PalOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ include(CMakeDependentOption)
# All options/cache variables should have the prefix "PAL_" this serves two main purposes
# Name collision issues
# Cmake-gui allows grouping of variables based on prefixes, which then makes it clear what options PAL defined
option(PAL_BUILD_GPUOPEN "Build GPUOpen developer driver support?" OFF)

option(PAL_ENABLE_DEVDRIVER_USAGE "Enables developer driver suppport." ON)

option(PAL_DBG_COMMAND_COMMENTS "Command with comments" OFF)

Expand Down Expand Up @@ -62,7 +59,5 @@ set( PAL_CWPACK_PATH ${PROJECT_SOURCE_DIR}/src/util/imported/cwpack CACHE P
set( PAL_VAM_PATH ${PROJECT_SOURCE_DIR}/src/core/imported/vam CACHE PATH "Specify the path to the VAM project.")
set( PAL_ADDR_PATH ${PROJECT_SOURCE_DIR}/src/core/imported/addrlib CACHE PATH "Specify the path to the ADDRLIB project.")

if (PAL_BUILD_GPUOPEN)
set(PAL_GPUOPEN_PATH "default" CACHE PATH "Specify the path to the GPUOPEN_PATH project.")
endif()
set(PAL_GPUOPEN_PATH "default" CACHE PATH "Specify the path to the GPUOPEN_PATH project.")

18 changes: 8 additions & 10 deletions cmake/PalOverrides.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,18 @@ pal_override(ADDR_SI_CHIP_DIR "${PROJECT_SOURCE_DIR}/src/core/hw/gfxip/gfx6/chip
# VAM

# GPUOPEN
if(PAL_BUILD_GPUOPEN)
# PAL override to build GPUOpen without the Metrohash library since PAL has its own.
pal_override(GPUOPEN_BUILD_METROHASH OFF)

# PAL override to specify the path to the MetroHash module.
pal_override(METROHASH_PATH "${PAL_METROHASH_PATH}/src")
# PAL override to build GPUOpen without the Metrohash library since PAL has its own.
pal_override(GPUOPEN_BUILD_METROHASH OFF)

# PAL override to build GPUOpen with server helper classes
pal_override(GPUOPEN_BUILD_SERVER_HELPERS ON)
# PAL override to specify the path to the MetroHash module.
pal_override(METROHASH_PATH "${PAL_METROHASH_PATH}/src")

# PAL override to build GPUOpen with support for the standard driver protocols
pal_override(GPUOPEN_BUILD_STANDARD_DRIVER_PROTOCOLS ON)
# PAL override to build GPUOpen with server helper classes
pal_override(GPUOPEN_BUILD_SERVER_HELPERS ON)

endif()
# PAL override to build GPUOpen with support for the standard driver protocols
pal_override(GPUOPEN_BUILD_STANDARD_DRIVER_PROTOCOLS ON)

# GPU Overrides

Expand Down
65 changes: 44 additions & 21 deletions inc/core/palCmdBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ class IIndirectCmdGenerator;
class IMsaaState;
class IPerfExperiment;
class IQueue;
class IScissorState;
class IViewportState;
class IQueryPool;
enum class PerfTraceMarkerType : uint32;
enum class PointOrigin : uint32;
Expand Down Expand Up @@ -297,25 +295,24 @@ enum ImageLayoutEngineFlags : uint32
/// GPU memory in a ICmdBuffer::CmdBarrier() call to ensure cache coherency between those usages.
enum CacheCoherencyUsageFlags : uint32
{
CoherCpu = 0x00000001, ///< Data read or written by CPU.
CoherShader = 0x00000002, ///< Data read or written by a GPU shader.
CoherCopy = 0x00000004, ///< Data read or written by a ICmdBuffer::CmdCopy*() call.
CoherColorTarget = 0x00000008, ///< Color target.
CoherDepthStencilTarget = 0x00000010, ///< Depth stencil target.
CoherResolve = 0x00000020, ///< Source or destination of a CmdResolveImage() call.
CoherClear = 0x00000040, ///< Destination of a CmdClear() call.
CoherIndirectArgs = 0x00000080, ///< Source argument data read by CmdDrawIndirect() and similar functions.
CoherIndexData = 0x00000100, ///< Index buffer data.
CoherQueueAtomic = 0x00000200, ///< Destination of a CmdMemoryAtomic() call.
CoherTimestamp = 0x00000400, ///< Destination of a CmdWriteTimestamp() call. It can be extended to
/// represent general or other types of L2 access. For example, in
/// gl2UncachedCpuCoherency it also indicates IGpuEvent write to
/// GL2 will be uncached, because we don't have a CoherEvent flag.
CoherCeLoad = 0x00000800, ///< Source of a CmdLoadCeRam() call.
CoherCeDump = 0x00001000, ///< Destination of CmdDumpCeRam() call.
CoherStreamOut = 0x00002000, ///< Data written as stream output.
CoherMemory = 0x00004000, ///< Data read or written directly from/to memory
CoherSampleRate = 0x00008000, ///< CmdBindSampleRateImage() source.
CoherCpu = 0x00000001, ///< Data read or written by CPU.
CoherShader = 0x00000002, ///< Data read or written by a GPU shader.
CoherCopy = 0x00000004, ///< Data read or written by a ICmdBuffer::CmdCopy*() call.
CoherColorTarget = 0x00000008, ///< Color target.
CoherDepthStencilTarget = 0x00000010, ///< Depth stencil target.
CoherResolve = 0x00000020, ///< Source or destination of a CmdResolveImage() call.
CoherClear = 0x00000040, ///< Destination of a CmdClear() call.
CoherIndirectArgs = 0x00000080, ///< Source argument data read by CmdDrawIndirect() and similar functions.
CoherIndexData = 0x00000100, ///< Index buffer data.
CoherQueueAtomic = 0x00000200, ///< Destination of a CmdMemoryAtomic() call.
CoherTimestamp = 0x00000400, ///< Destination of a CmdWriteTimestamp() call.
CoherCeLoad = 0x00000800, ///< Source of a CmdLoadCeRam() call.
CoherCeDump = 0x00001000, ///< Destination of CmdDumpCeRam() call.
CoherStreamOut = 0x00002000, ///< Data written as stream output.
CoherMemory = 0x00004000, ///< Data read or written directly from/to memory
CoherSampleRate = 0x00008000, ///< CmdBindSampleRateImage() source.
CoherCp = CoherTimestamp, ///< HW Command Processor (CP) encompassing the front - end command
/// processing of any queue, including SDMA.
CoherAllUsages = 0x0000FFFF
};

Expand Down Expand Up @@ -2309,6 +2306,32 @@ class ICmdBuffer : public IDestroyable
virtual void CmdBarrier(
const BarrierInfo& barrierInfo) = 0;

/// Perform source pipeline point and cache access optimization based on the legacy barrier interface.
///
/// @param [in] pipePointWaitCount Number of entries in pPipePoints.
/// @param [in/out] pPipePoints Array of @ref HwPipePoint to optimize.
/// @param [in/out] pCacheMask A mask of ORed @ref CacheCoherencyUsageFlags to optimize.
///
/// @note HwPipePostBlt will be converted to a more accurate stage based on the underlying implementation of
/// outstanding BLTs, but will be left as HwPipePostBlt if the internal outstanding BLTs can't be expressed
/// as a client-facing HwPipePoint (e.g., if there are CP DMA BLTs in flight).
virtual void OptimizeBarrierReleaseInfo(
uint32 pipePointWaitCount,
HwPipePoint* pPipePoints,
uint32* pCacheMask) const = 0;

/// Perform source pipeline stage and cache access optimization based on the acquire/release interface.
///
/// @param [in/out] pStageMask A mask of ORed @ref PipelineStageFlag to optimize.
/// @param [in/out] pAccessMask A mask of ORed @ref CacheCoherencyUsageFlags to optimize.
///
/// @note PipelineStageBlt will be converted to more accurate stage(s) based on the underlying implementation of
/// outstanding BLTs, but will be left as PipelineStageBlt if the internal outstanding BLTs can't be expressed
/// as a client-facing PipelineStage (e.g., if there are CP DMA BLTs in flight).
virtual void OptimizeAcqRelReleaseInfo(
uint32* pStageMask,
uint32* pAccessMask) const = 0;

/// Performs the release portion of an acquire/release-based barrier. This releases a set of resources from their
/// current usage, while CmdAcquire() is expected to be called to acquire access to the resources for future,
/// different usage.
Expand Down
79 changes: 42 additions & 37 deletions inc/core/palDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,18 @@ enum MsaaFlags : uint16
MsaaAll = 0x3FFF,
};

/// Supported RTIP version enumeration
enum class RayTracingIpLevel : uint32
{
_None = 0,
#ifndef None
None = _None, ///< The device does not have an RayTracing Ip Level
#endif

RtIp1_0 = 0x1, ///< First Implementation of HW RT
RtIp1_1 = 0x2, ///< Added computation of triangle barycentrics into HW
};

/// Reports various properties of a particular IDevice to the client. @see IDevice::GetProperties.
struct DeviceProperties
{
Expand Down Expand Up @@ -1093,6 +1105,8 @@ struct DeviceProperties
uint32 maxGsOutputVert; ///< Maximum number of GS output vertices.
uint32 maxGsTotalOutputComponents; ///< Maximum number of GS output components totally.

RayTracingIpLevel rayTracingIp; ///< HW RayTracing IP version

union
{
struct
Expand Down Expand Up @@ -1896,6 +1910,22 @@ struct SamplerInfo
} flags;
};

/// Specifies which heuristic should be utilized for sorting children when box sorting is enabled
enum class BoxSortHeuristic : uint32
{
ClosestFirst = 0x0, ///< Traversal is ordered to enter the children that
///< intersect the ray closer to the ray origin first.
///< This is good baseline option. Default option for RT IP 1.x.
LargestFirst = 0x1, ///< Traversal is ordered to enter the children that have the largest
///< interval where the box intersects the ray first.
///< Good for shadow rays with terminate on first hit.
ClosestMidPoint = 0x2, ///< Traversal is ordered to enter the children that have a midpoint in the interval
///< where the box intersects that has the lowest intersection time before clamping(
///< Good for reflection rays.
Disabled = 0x3, ///< Box sort and heuristic are disabled.
Count
};

/// Specifies parameter for creating a BvH (bounding volume hierarchy, used by ray-trace) descriptor
struct BvhInfo
{
Expand All @@ -1905,11 +1935,18 @@ struct BvhInfo
gpusize numNodes; ///< Number of nodes in the view
uint32 boxGrowValue; ///< Number of ULPs (unit in last place) to be added during ray-box test.

#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 668
BoxSortHeuristic boxSortHeuristic; ///< Specifies which heuristic should be utilized for
///< sorting children when box sorting is enabled
#endif

union
{
struct
{
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 668
uint32 findNearest : 1; ///< Enable sorting the box intersect results
#endif
uint32 useZeroOffset : 1; ///< If set, SRD address is programmed to zero
uint32 returnBarycentrics : 1; ///< When enabled, ray intersection will return triangle barycentrics.
///< Note: Only valid if @see supportIntersectRayBarycentrics is true.
Expand All @@ -1920,7 +1957,12 @@ struct BvhInfo
uint32 bypassMallRead : 1;
uint32 bypassMallWrite : 1;
uint32 placeholder2 : 1; ///< Reserved for future HW

#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 668
uint32 reserved : 26; ///< Reserved for future HW
#else
uint32 reserved : 27; ///< Reserved for future HW
#endif
};

uint32 u32All; ///< Flags packed as 32-bit uint.
Expand Down Expand Up @@ -4740,43 +4782,6 @@ class IDevice
m_pClientData = pClientData;
}

#if defined(PAL_DOPP)
/// Sets Primary Source ID For DOPP processing.
/// This function is an Escape call to tell KMD which Screen DOPP will process.
///
/// @param [in] pScreen Pal::IScreen* of the Primary selected for DOPP processing.
///
/// @returns Success if Primary for DOPP processing is set.
virtual Result SetPrimarySourceIDForDopp(Pal::IScreen* pScreen) = 0;

/// Get Dopp Primary Surface Info for previously selected Screen.
/// This function is an Escape call to query KMD about Primary surface properties
/// of previously selected surface.
///
/// @param [out] pDesktopProp A pointer to Extent3d structure.
/// @returns Success if Extent3d properties are retreved.
virtual Result GetDoppPrimarySurfaceInfo(Extent3d* pDesktopProp) = 0;

/// Enable Post Processing in DOPP.
/// This function is an Escape call to tell KMD to enable DOPP post processing.
///
/// @param [in] enable Enable/Disable DOPP Post Processing.
///
/// @returns Success if call is successful.
virtual Result EnablePostProcessDopp(bool enable) = 0;

/// Present Texture To Video Dopp
/// This function is an Escape call to pass present texture handle to KMD.
/// Then KMD will flip (make visible) this surface instead of original desktop
/// surface on next SwapBuffers call.
///
/// @param [in] pPresentTexture Present Texture memory pointer
/// @param [in] isBlocking If true, call is blocking.
///
/// @returns Success if call is successful.
virtual Result PresentTextureToVideoDopp(Pal::IGpuMemory* pPresentTexture, bool isBlocking) = 0;
#endif

protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
Expand Down
12 changes: 1 addition & 11 deletions inc/core/palGpuMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,7 @@ union GpuMemoryCreateFlags
#else
uint32 placeholder657 : 1;
#endif
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 659
uint32 doppDesktopTexture : 1; ///< Indicate dopp desktop texture
uint32 reserved : 4; ///< Reserved for future use.
#else
uint32 reserved : 5; ///< Reserved for future use.
#endif
uint32 reserved : 5; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
Expand Down Expand Up @@ -359,12 +354,7 @@ struct GpuMemoryDesc
uint32 isExecutable : 1; ///< GPU memory is used for execution. Valid only when IOMMUv2 is supported
uint32 isExternPhys : 1; ///< GPU memory is External Physical memory
uint32 placeholder0 : 1; ///< Reserved for future memory flag
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 659
uint32 isDoppDesktopTexture : 1; ///< GPU memory is allocated for dopp desktop texture
uint32 reserved : 23; ///< Reserved for future use
#else
uint32 reserved : 24; ///< Reserved for future use
#endif
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< GPU memory desc flags.
Expand Down
5 changes: 0 additions & 5 deletions inc/core/palImage.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,7 @@ union ImageCreateFlags
#else
uint32 reserved616 : 1;
#endif
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 659
uint32 isDoppTexture : 1; ///< Indicates DOPP texture
uint32 reserved : 8; ///< Reserved for future use.
#else
uint32 reserved : 9; ///< Reserved for future use.
#endif
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
Expand Down
4 changes: 2 additions & 2 deletions inc/core/palLib.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
/// compatible, it is not assumed that the client will initialize all input structs to 0.
///
/// @ingroup LibInit
#define PAL_INTERFACE_MAJOR_VERSION 667
#define PAL_INTERFACE_MAJOR_VERSION 668

/// Minor interface version. Note that the interface version is distinct from the PAL version itself, which is returned
/// in @ref Pal::PlatformProperties.
Expand All @@ -53,7 +53,7 @@
/// of the existing enum values will change. This number will be reset to 0 when the major version is incremented.
///
/// @ingroup LibInit
#define PAL_INTERFACE_MINOR_VERSION 1
#define PAL_INTERFACE_MINOR_VERSION 2

/// Minimum major interface version. This is the minimum interface version PAL supports in order to support backward
/// compatibility. When it is equal to PAL_INTERFACE_MAJOR_VERSION, only the latest interface version is supported.
Expand Down
31 changes: 25 additions & 6 deletions inc/util/palDeque.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,15 @@ class Deque
/// failed because of an internal failure to allocate system memory.
Result PushFront(const T& data);

/// Emplaces a newly constructed item onto the front of the deque.
///
/// @param [in] args arguments used to construct the new item.
///
/// @returns @ref Success if the item was successfully added to the deque or @ref ErrorOutOfMemory if the operation
/// failed because of an internal failure to allocate system memory.
template<typename... Args>
Result EmplaceFront(Args&&... args);

/// Pushes a copy of the specified item onto the back of the deque.
///
/// @param [in] data Item to be added to the back of the deque.
Expand All @@ -169,6 +178,15 @@ class Deque
/// failed because of an internal failure to allocate system memory.
Result PushBack(const T& data);

/// Emplaces a newly constructed item onto the back of the deque.
///
/// @param [in] args arguments used to construct the new item.
///
/// @returns @ref Success if the item was successfully added to the deque or @ref ErrorOutOfMemory if the operation
/// failed because of an internal failure to allocate system memory.
template<typename... Args>
Result EmplaceBack(Args&&... args);

/// Pops the first item off the front of the deque, returning the popped value.
///
/// @param [out] pOut Item popped off the front of the deque.
Expand All @@ -186,14 +204,11 @@ class Deque
Result PopBack(T* pOut);

private:
Result AllocateFront(T**);
Result AllocateBack(T**);
DequeBlockHeader* AllocateNewBlock();
void FreeUnusedBlock(DequeBlockHeader* pHeader);

// Acts as a proxy for the destructor of a data element when one is popped. If T is a native or POD type this
// function is safe to be empty. However, if the objects being stored require complex move/copy/delete semantics
// we'd need to have a specialization explicitly declared.
void CleanupElement(T* pData) const { }

size_t m_numElements; // Number of elements
const size_t m_numElementsPerBlock; // Block granularity when we need to alloc a new one

Expand Down Expand Up @@ -239,7 +254,11 @@ PAL_INLINE Deque<T, Allocator>::~Deque()
{
while (m_pFrontHeader != nullptr)
{
CleanupElement(m_pFront);
// Explicitly destroy the removed value if it's non-trivial.
if (!std::is_pod<T>::value)
{
m_pFront->~T();
}
++m_pFront; // Advance to the next element
--m_numElements;

Expand Down
Loading

0 comments on commit 7f6e731

Please sign in to comment.