Skip to content

Commit

Permalink
[Metal] enable loading of the Metal render system for ogre2
Browse files Browse the repository at this point in the history
- Add parameter "metal" and a member variable useMetalRenderSystem to Ogre2RenderEngine
- Modify LoadPlugins to load the Metal render system if enabled
- Modify CreateRenderSystem to select the Metal render system
- Modify RegisterHlms to load the Metal resources
- Add missing Metal shaders for the Terra (from Ogre2)
- Add Metal shader for skybox
- Update skybox material to support universal shaders
- Create subdirectories for GLSL and Metal in media/materials/programs and update Ogre2RenderEngine with new resource locations
- Add Metal and unified shaders to each material
- Add placeholders for a number of shaders to prevent compile errors when running examples
- Minor formatting changes to plain_color, point and skybox Metal shaders.
- Port depth_camera_vs and selection_buffer_fs to Metal
- The mouse_picker, ogre2_demo and transform_control demos now work correctly.
- Shader formatting - replace tabs with whitespace
- Port gaussian_noise vertex and pixel shaders from the GLSL counterparts
- The ogre2_demo and render_pass examples now work correctly.
- Port thermal_camera pixel shader
  - Correct error in thermal material script
  - Port thermal_camera pixel shaders from the GLSL counterparts
  - The thermal_camera examples now runs but further requires testing
- Add stubs for the remaining vertex and pixel shaders to port from GLSL
- Port heat_signature pixel shader
- Initial ports of depth_camera and gpu_rays shaders
  - On Metal the compositor in Ogre2GpuRay has a pixel format exception
  - Texture sampling for depth_camera_final_fs.metal is not using the equivalent of OpenGL's texelFetch
- Fix fragment program specifier in gaussian noise material
  - Unified shader had incorrect specifier
- Fix depth camera vertex shader not found in selection buffer material
  - This is the same issue as fixed in gazebosim#456
- Switch to user RGBA format for internal textures in gpu ray sensor
- Complete port of gpu rays 1st pass shader
  - Complete port of the GPU Rays shader
  - This depends upon the pixel format fix in gazebosim#468
- Add a method to render targets and cameras to retrieve the Metal texture id
  - These functions provide the Metal equivalent of GLId() and RenderTextureGLId() for OpenGL.
  - The argument is the address of a pointer to void* as the object required for Metal is an objective-c type id<MTLTexture> which we do not want exposed in the interface
  - There is a runtime dependency on an upstream change to ogre-next: OGRECave/ogre-next@3b11873
  - cherry-pick 8fc230c
- Ensure line length limits are adhered to.
- Fix inconsistent parameter name in RenderTextureMetalId
  - Fix an inconsistent parameter name causing a CI failure.
- Fix unused parameter warning in RenderTextureMetalId
  - Fix an unused parameter warning causing a CI failure.

Signed-off-by: Rhys Mainwaring <[email protected]>
  • Loading branch information
srmainwaring committed Nov 9, 2021
1 parent b1d2ac8 commit 1120397
Show file tree
Hide file tree
Showing 52 changed files with 2,196 additions and 57 deletions.
3 changes: 3 additions & 0 deletions ogre2/include/ignition/rendering/ogre2/Ogre2RenderEngine.hh
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ namespace ignition
/// \brief True to use the current opengl context
private: bool useCurrentGLContext = false;

/// \brief True to use the Metal render system
private: bool useMetalRenderSystem = false;

/// \brief Pointer to private data
private: std::unique_ptr<Ogre2RenderEnginePrivate> dataPtr;

Expand Down
71 changes: 46 additions & 25 deletions ogre2/src/Ogre2GpuRays.cc
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ void Ogre2GpuRays::CreateSampleTexture()
// R: u coordinate on the cubemap face
// G: v coordinate on the cubemap face
// B: cubemap face index
// A: unused
// this texture is passed to the 2nd pass fragment shader
auto engine = Ogre2RenderEngine::Instance();
auto ogreRoot = engine->OgreRoot();
Expand All @@ -630,7 +631,7 @@ void Ogre2GpuRays::CreateSampleTexture()
this->dataPtr->cubeUVTexture->setResolution(
this->dataPtr->w2nd, this->dataPtr->h2nd);
this->dataPtr->cubeUVTexture->setNumMipmaps(1u);
this->dataPtr->cubeUVTexture->setPixelFormat(Ogre::PFG_RGB32_FLOAT);
this->dataPtr->cubeUVTexture->setPixelFormat(Ogre::PFG_RGBA32_FLOAT);

const Ogre::uint32 rowAlignment = 1u;
const size_t dataSize = Ogre::PixelFormatGpuUtils::getSizeBytes(
Expand Down Expand Up @@ -670,7 +671,8 @@ void Ogre2GpuRays::CreateSampleTexture()
pDest[index++] = uv.Y();
// face
pDest[index++] = faceIdx;

// unused
pDest[index++] = 1.0;
h += hStep;
}
v += vStep;
Expand Down Expand Up @@ -1052,7 +1054,7 @@ void Ogre2GpuRays::Setup1stPass()
this->dataPtr->w1st, this->dataPtr->h1st);
this->dataPtr->firstPassTextures[i]->setNumMipmaps(1u);
this->dataPtr->firstPassTextures[i]->setPixelFormat(
Ogre::PFG_RGB32_FLOAT);
Ogre::PFG_RG32_FLOAT);

this->dataPtr->firstPassTextures[i]->scheduleTransitionTo(
Ogre::GpuResidency::Resident);
Expand Down Expand Up @@ -1116,7 +1118,7 @@ void Ogre2GpuRays::Setup2ndPass()
this->dataPtr->w2nd, this->dataPtr->h2nd);
this->dataPtr->secondPassTexture->setNumMipmaps(1u);
this->dataPtr->secondPassTexture->setPixelFormat(
Ogre::PFG_RGB32_FLOAT);
Ogre::PFG_RGBA32_FLOAT);

this->dataPtr->secondPassTexture->scheduleTransitionTo(
Ogre::GpuResidency::Resident);
Expand Down Expand Up @@ -1316,15 +1318,14 @@ void Ogre2GpuRays::PostRender()
unsigned int width = this->dataPtr->w2nd;
unsigned int height = this->dataPtr->h2nd;

int len = width * height * this->Channels();

PixelFormat format = PF_FLOAT32_RGB;
unsigned int channelCount = PixelUtil::ChannelCount(format);
PixelFormat format = PF_FLOAT32_RGBA;
unsigned int rawChannelCount = PixelUtil::ChannelCount(format);
unsigned int bytesPerChannel = PixelUtil::BytesPerChannel(format);
int rawLen = width * height * rawChannelCount;

if (!this->dataPtr->gpuRaysBuffer)
{
this->dataPtr->gpuRaysBuffer = new float[len];
this->dataPtr->gpuRaysBuffer = new float[rawLen];
}

// blit data from gpu to cpu
Expand All @@ -1338,18 +1339,40 @@ void Ogre2GpuRays::PostRender()
for (unsigned int i = 0; i < height; ++i)
{
unsigned int rawDataRowIdx = i * box.bytesPerRow / bytesPerChannel;
unsigned int rowIdx = i * width * channelCount;
unsigned int rowIdx = i * width * rawChannelCount;
memcpy(&this->dataPtr->gpuRaysBuffer[rowIdx], &bufferTmp[rawDataRowIdx],
width * channelCount * bytesPerChannel);
width * rawChannelCount * bytesPerChannel);
}

// Metal does not support RGB32_FLOAT so the internal texture format is
// RGBA32_FLOAT. For backward compatibility, output data is kept in RGB
// format instead of RGBA
int outputLen = width * height * this->Channels();
if (!this->dataPtr->gpuRaysScan)
{
this->dataPtr->gpuRaysScan = new float[len];
this->dataPtr->gpuRaysScan = new float[outputLen];
}

memcpy(this->dataPtr->gpuRaysScan,
this->dataPtr->gpuRaysBuffer, len * sizeof(float));
// copy data from RGBA buffer to RGB buffer
for (unsigned int row = 0; row < height; ++row)
{
// the texture box step size could be larger than our image buffer step
// size
for (unsigned int column = 0; column < width; ++column)
{
unsigned int idx = (row * width * this->Channels()) +
column * this->Channels();
unsigned int rawIdx = (row * width * rawChannelCount) +
column * rawChannelCount;

this->dataPtr->gpuRaysScan[idx] =
this->dataPtr->gpuRaysBuffer[rawIdx];
this->dataPtr->gpuRaysScan[idx + 1] =
this->dataPtr->gpuRaysBuffer[rawIdx + 1];
this->dataPtr->gpuRaysScan[idx + 2] =
this->dataPtr->gpuRaysBuffer[rawIdx + 2];
}
}

this->dataPtr->newGpuRaysFrame(this->dataPtr->gpuRaysScan,
width, height, this->Channels(), "PF_FLOAT32_RGB");
Expand All @@ -1360,18 +1383,16 @@ void Ogre2GpuRays::PostRender()
// {
// for (unsigned int j = 0; j < width; ++j)
// {
// if (this->dataPtr->gpuRaysBuffer[i*width*3 + j*3] < 20) {
// std::cerr
// << "["
// << this->dataPtr->gpuRaysBuffer[i*width*3 + j*3]
// << " "
// << this->dataPtr->gpuRaysBuffer[i*width*3 + j*3 + 1]
// << " "
// << this->dataPtr->gpuRaysBuffer[i*width*3 + j*3 + 2]
// << "]\n";
// }
// std::cerr
// << "["
// << this->dataPtr->gpuRaysScan[i*width*3 + j*3]
// << " "
// << this->dataPtr->gpuRaysScan[i*width*3 + j*3 + 1]
// << " "
// << this->dataPtr->gpuRaysScan[i*width*3 + j*3 + 2]
// << "]\n";
// }
// // igndbg << std::endl;
// std::cerr << std::endl;
// }
}

Expand Down
39 changes: 35 additions & 4 deletions ogre2/src/Ogre2RenderEngine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,10 @@ bool Ogre2RenderEngine::LoadImpl(
if (it != _params.end())
std::istringstream(it->second) >> this->winID;

it = _params.find("metal");
if (it != _params.end())
std::istringstream(it->second) >> this->useMetalRenderSystem;

try
{
this->LoadAttempt();
Expand Down Expand Up @@ -339,7 +343,7 @@ bool Ogre2RenderEngine::InitImpl()
void Ogre2RenderEngine::LoadAttempt()
{
this->CreateLogger();
if (!this->useCurrentGLContext)
if (!this->useCurrentGLContext && !this->useMetalRenderSystem)
this->CreateContext();
this->CreateRoot();
this->CreateOverlay();
Expand All @@ -362,7 +366,7 @@ void Ogre2RenderEngine::CreateLogger()

// create actual log
this->ogreLogManager = new Ogre::LogManager();
this->ogreLogManager->createLog(logPath, true, false, false);
this->ogreLogManager->createLog(logPath, true, true, false);
}

//////////////////////////////////////////////////
Expand Down Expand Up @@ -489,6 +493,12 @@ void Ogre2RenderEngine::LoadPlugins()
p = common::joinPaths(path, "Plugin_ParticleFX");
plugins.push_back(p);

if (this->useMetalRenderSystem)
{
std::string p = common::joinPaths(path, "RenderSystem_Metal");
plugins.push_back(p);
}

for (piter = plugins.begin(); piter != plugins.end(); ++piter)
{
// check if plugin library exists
Expand Down Expand Up @@ -534,6 +544,11 @@ void Ogre2RenderEngine::CreateRenderSystem()
const Ogre::RenderSystemList *rsList;

rsList = &(this->ogreRoot->getAvailableRenderers());
std::string targetRenderSysName("OpenGL 3+ Rendering Subsystem");
if (this->useMetalRenderSystem)
{
targetRenderSysName = "Metal Rendering Subsystem";
}

int c = 0;

Expand All @@ -551,11 +566,11 @@ void Ogre2RenderEngine::CreateRenderSystem()
// (it thinks the while loop is empty), so we must put the whole while
// statement on one line and add NOLINT at the end so that cpplint doesn't
// complain about the line being too long
while (renderSys && renderSys->getName().compare("OpenGL 3+ Rendering Subsystem") != 0); // NOLINT
while (renderSys && renderSys->getName().compare(targetRenderSysName) != 0); // NOLINT

if (renderSys == nullptr)
{
ignerr << "unable to find OpenGL rendering system. OGRE is probably "
ignerr << "unable to find " << targetRenderSysName << ". OGRE is probably "
"installed incorrectly. Double check the OGRE cmake output, "
"and make sure OpenGL is enabled." << std::endl;
}
Expand Down Expand Up @@ -663,6 +678,18 @@ void Ogre2RenderEngine::RegisterHlms()
Ogre::ResourceGroupManager::getSingleton().addResourceLocation(
terraGLSLMaterialFolder, "FileSystem", "General");

if (this->useMetalRenderSystem)
{
Ogre::String commonMetalMaterialFolder = common::joinPaths(
rootHlmsFolder, "2.0", "scripts", "materials", "Common", "Metal");
Ogre::ResourceGroupManager::getSingleton().addResourceLocation(
commonMetalMaterialFolder, "FileSystem", "General");
Ogre::String terraMetalMaterialFolder = common::joinPaths(
rootHlmsFolder, "2.0", "scripts", "materials", "Terra", "Metal");
Ogre::ResourceGroupManager::getSingleton().addResourceLocation(
terraMetalMaterialFolder, "FileSystem", "General");
}

// The following code is taken from the registerHlms() function in ogre2
// samples framework
if (rootHlmsFolder.empty())
Expand Down Expand Up @@ -814,6 +841,10 @@ void Ogre2RenderEngine::CreateResources()
std::make_pair(p, "General"));
archNames.push_back(
std::make_pair(p + "/materials/programs", "General"));
archNames.push_back(
std::make_pair(p + "/materials/programs/GLSL", "General"));
archNames.push_back(
std::make_pair(p + "/materials/programs/Metal", "General"));
archNames.push_back(
std::make_pair(p + "/materials/scripts", "General"));
archNames.push_back(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Our terrain has the following pattern:
//
// 1N 10 11
// o-o-o
// |/|/|
// 0N o-+-o 01
// |/|/|
// o-o-o
// NN N0 N1
//
// We need to calculate the normal of the vertex in
// the center '+', which is shared by 6 triangles.

#include <metal_stdlib>
using namespace metal;

struct PS_INPUT
{
float2 uv0;
};

struct Params
{
float2 heightMapResolution;
float3 vScale;
};

fragment float4 main_metal
(
PS_INPUT inPs [[stage_in]],

texture2d<float, access::read> heightMap [[texture(0)]],

constant Params &p [[buffer(PARAMETER_SLOT)]]
)
{
int2 iCoord = int2( inPs.uv0 * p.heightMapResolution );

int3 xN01;
xN01.x = max( iCoord.x - 1, 0 );
xN01.y = iCoord.x;
xN01.z = min( iCoord.x + 1, int(p.heightMapResolution.x) );
int3 yN01;
yN01.x = max( iCoord.y - 1, 0 );
yN01.y = iCoord.y;
yN01.z = min( iCoord.y + 1, int(p.heightMapResolution.y) );

//Watch out! It's heightXY, but heightMap.read uses YX.
float heightNN = heightMap.read( ushort2( xN01.x, yN01.x ) ).x * p.vScale.y;
float heightN0 = heightMap.read( ushort2( xN01.y, yN01.x ) ).x * p.vScale.y;
//float heightN1 = heightMap.read( ushort2( xN01.z, yN01.x ) ).x * p.vScale.y;

float height0N = heightMap.read( ushort2( xN01.x, yN01.y ) ).x * p.vScale.y;
float height00 = heightMap.read( ushort2( xN01.y, yN01.y ) ).x * p.vScale.y;
float height01 = heightMap.read( ushort2( xN01.z, yN01.y ) ).x * p.vScale.y;

//float height1N = heightMap.read( ushort2( xN01.x, yN01.z ) ).x * p.vScale.y;
float height10 = heightMap.read( ushort2( xN01.y, yN01.z ) ).x * p.vScale.y;
float height11 = heightMap.read( ushort2( xN01.z, yN01.z ) ).x * p.vScale.y;

float3 vNN = float3( -p.vScale.x, heightNN, -p.vScale.z );
float3 vN0 = float3( -p.vScale.x, heightN0, 0 );
//float3 vN1 = float3( -p.vScale.x, heightN1, p.vScale.z );

float3 v0N = float3( 0, height0N, -p.vScale.z );
float3 v00 = float3( 0, height00, 0 );
float3 v01 = float3( 0, height01, p.vScale.z );

//float3 v1N = float3( p.vScale.x, height1N, -p.vScale.z );
float3 v10 = float3( p.vScale.x, height10, 0 );
float3 v11 = float3( p.vScale.x, height11, p.vScale.z );

float3 vNormal = float3( 0, 0, 0 );

vNormal += cross( (v01 - v00), (v11 - v00) );
vNormal += cross( (v11 - v00), (v10 - v00) );
vNormal += cross( (v10 - v00), (v0N - v00) );
vNormal += cross( (v0N - v00), (vNN - v00) );
vNormal += cross( (vNN - v00), (vN0 - v00) );
vNormal += cross( (vN0 - v00), (v01 - v00) );

// vNormal += cross( (v01 - v00), (v11 - v00) );
// vNormal += cross( (v11 - v00), (v10 - v00) );
// vNormal += cross( (v10 - v00), (v1N - v00) );
// vNormal += cross( (v1N - v00), (v0N - v00) );
// vNormal += cross( (v0N - v00), (vNN - v00) );
// vNormal += cross( (vNN - v00), (vN0 - v00) );
// vNormal += cross( (vN0 - v00), (vN1 - v00) );
// vNormal += cross( (vN1 - v00), (v01 - v00) );

vNormal = normalize( vNormal );

//return vNormal.zx;
return float4( vNormal.zyx * 0.5f + 0.5f, 1.0f );
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//Based on GPUOpen's samples SeparableFilter11
//https://github.com/GPUOpen-LibrariesAndSDKs/SeparableFilter11
//For better understanding, read "Efficient Compute Shader Programming" from Bill Bilodeau
//http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Efficient%20Compute%20Shader%20Programming.pps

//TL;DR:
// * Each thread works on 4 pixels at a time (for VLIW hardware, i.e. Radeon HD 5000 & 6000 series).
// * 256 pixels per threadgroup. Each threadgroup works on 2 rows of 128 pixels each.
// That means 32x2 threads = 64. 64 threads x 4 pixels per thread = 256

@piece( data_type )float3@end
@piece( lds_data_type )float3@end
@piece( lds_definition )
threadgroup float3 g_f3LDS[ 2 ] [ @value( samples_per_threadgroup ) ]
@end

@piece( image_sample )
return inputImage.sample( inputSampler, f2SamplePosition, level(0) ).xyz;
@end

@piece( image_store )
@foreach( 4, iPixel )
outputImage.write( float4( outColour[ @iPixel ], 1.0 ), uint2( i2Center + @iPixel * i2Inc ), 0 );@end
@end
Loading

0 comments on commit 1120397

Please sign in to comment.