2023 Developer ToolsGraphics & Games
WWDC23 · 17 min · Developer Tools / Graphics & Games
Optimize GPU renderers with Metal
Discover how to optimize your GPU renderer using the latest Metal features and best practices. We’ll show you how to use function specialization and parallel shader compilation to maintain responsive authoring workflows and the fastest rendering speeds, and help you tune your compute shaders for optimal performance.
Watch at developer.apple.com ↗Chapters
Code shown on screen · 20 snippets
Reduce Branch Performance Cost
// Reduce branch performance cost
fragment FragOut frag_material_main(device Material &material [[buffer(0)]]) {
if(material.is_glossy) {
material_glossy(material);
}
if(material.has_shadows) {
light_shadows(material);
}
if(material.has_reflections) {
trace_reflections(material);
}
if(material.is_volumetric) {
output_volume_parameters(material);
}
return output_material();
} Function constant declaration per material feature
constant bool IsGlossy [[function_constant(0)]];
constant bool HasShadows [[function_constant(1)]];
constant bool HasReflections [[function_constant(2)]];
constant bool IsVolumetric [[function_constant(3)]]; Dynamic branch for the feature codepath is replaced with function constants
if(material.has_reflections) {
trace_reflections(material);
} Dynamic branch for the feature codepath is replaced with function constants
/* replaced with function constants*/
if(HasReflections) {
trace_reflections(material);
} Reduce branch performance cost with function constants
constant bool IsGlossy [[function_constant(0)]];
constant bool HasShadows [[function_constant(1)]];
constant bool HasReflections [[function_constant(2)]];
constant bool IsVolumetric [[function_constant(3)]];
// Reduce branch performance cost
fragment FragOut frag_material_main(device Material &material [[buffer(0)]]) {
if(IsGlossy) {
material_glossy(material);
}
if(HasShadows) {
light_shadows(material);
}
if(HasReflections) {
trace_reflections(material);
}
if(IsVolumetric) {
output_volume_parameters(material);
}
return output_material();
} Function constants for material parameters
// Function constants for material parameters
constant float4 MaterialColor [[function_constant(0)]];
constant float4 MaterialWeight [[function_constant(1)]];
constant float4 SheenColor [[function_constant(2)]];
constant float4 SheenFactor [[function_constant(3)]];
struct Material {
float4 blend_factor;
};
void material_glossy(const constant Material& material) {
float4 light, sheen;
light = glossy_eval(MaterialColor, MaterialWeight);
sheen = sheen_eval(SheenColor, SheenFactor);
glossy_output_write(light, sheen, material.blend_factor);
} MaterialParameter structure for constant parameters
struct MaterialParameter {
NSString* name;
MTLDataType type;
void* value_ptr;
};
MaterialParameter is_glossy{@"IsGlossy", MTLDataTypeBool, &material.is_glossy};
MaterialParameter mat_color{@"MaterialColor", MTLDataTypeFloat4, &material.color}; Declare and populate MTLFunctionConstantValues
// Declare and populate MTLFunctionConstantValues
MTLFunctionConstantValues* values = [MTLFunctionConstantValues new];
for(const MaterialParameter& parameter : shader_parameters) {
[values setConstantValue: parameter.value_ptr
type: parameter.type
withName: parameter.name];
} Create pipeline render state object with function constant declarations
struct Material {
bool is_glossy;
float color[4];
};
struct MaterialParameter {
NSString* name;
MTLDataType type;
void* value_ptr;
};
// Declare material
Material material = {true, {1.0f,0.0f,0.0f,1.0f}};
// Declare function constant paramters
MaterialParameter is_glossy{@"IsGlossy", MTLDataTypeBool, &material.is_glossy};
MaterialParameter mat_color{@"MaterialColor", MTLDataTypeFloat4, &material.color};
MaterialParameter shader_parameters[2] = {is_glossy, mat_color};
// Declare and populate MTLFunctionConstantValues
MTLFunctionConstantValues* values = [MTLFunctionConstantValues new];
for(const MaterialParameter& parameter : shader_parameters) {
[values setConstantValue: parameter.value_ptr
type: parameter.type
withName: parameter.name];
}
// Create MTLRenderPipelineDescriptor and create shader function from MTLLibrary
MTLRenderPipelineDescriptor *dsc = [MTLRenderPipelineDescriptor new];
NSError* error = nil;
dsc.fragmentFunction = [shader_library newFunctionWithName:@"frag_material_main"
constantValues:values
error:&error];
// Create pipeline render state object
id<MTLRenderPipelineState> pso = [device newRenderPipelineStateWithDescriptor:dsc
error:&error]; Create MTLRenderPipelineDescriptor and create shader function from MTLLibrary
// Create MTLRenderPipelineDescriptor and create shader function from MTLLibrary
MTLRenderPipelineDescriptor *dsc = [MTLRenderPipelineDescriptor new];
NSError* error = nil;
dsc.fragmentFunction = [shader_library newFunctionWithName:@"frag_material_main"
constantValues:values
error:&error]; Shader library creation
- (void)newLibraryWithSource:(NSString *)source
options:(MTLCompileOptions *)options
completionHandler:(MTLNewLibraryCompletionHandler)completionHandler; Render pipeline state creation
- (void)newRenderPipelineStateWithDescriptor:(MTLRenderPipelineDescriptor *)descriptor
completionHandler:(MTLNewRenderPipelineStateCompletionHandler)completionHandler; Use as many threads as possible for concurrent compilation
@property (atomic) BOOL shouldMaximizeConcurrentCompilation; Assign symbol visibility to default or hidden
__attribute__((visibility(“default")))
void matrix_mul();
__attribute__((visibility(“hidden")))
void matrix_mul_internal(); Verify device support
//For render pipelines
@property (readonly) BOOL supportsRenderDynamicLibraries;
//For compute pipelines
@property(readonly) BOOL supportsDynamicLibraries; Compile dynamic libraries
//create a dynamic library from an existing Metal library
- (id<MTLDynamicLibrary>) newDynamicLibrary:(id<MTLLibrary>) library
error:(NSError **) error
//create from the URL
- (id<MTLDynamicLibrary>) newDynamicLibraryWithURL:(NSURL *) url
error:(NSError **) error Dynamically link shaders
//Pipeline state
MTLRenderPipelineDescriptor* dsc = [MTLRenderPipelineDescriptor new];
dsc.vertexPreloadedLibraries = @[dylib_Math, dylib_Shadows];
dsc.fragmentPreloadedLibraries = @[dylib_Math, dylib_Shadows];
//Compile options
MTLCompileOptions* options = [MTLCompileOptions new];
options.libraries = @[dylib_Math, dylib_Shadows];
[device newLibraryWithSource:programString
options:options
error:&error]; Specify desired max total threads per threadgroup
@interface MTLComputePipelineDescriptor : NSObject
@property (readwrite, nonatomic) NSUInteger maxTotalThreadsPerThreadgroup; Match desired max total threads per threadgroup
@interface MTLCompileOptions : NSObject
@property (readwrite, nonatomic) NSUInteger maxTotalThreadsPerThreadgroup; Tune Metal dynamic libraries
MTLCompileOptions* options = [MTLCompileOptions new];
options.libraryType = MTLLibraryTypeDynamic;
options.installName = @"executable_path/dylib_Math.metallib";
if(@available(macOS 13.3, *)) {
options.maxTotalThreadsPerThreadgroup = 768;
}
id<MTLLibrary> lib = [device newLibraryWithSource:programString
options:options
error:&error];
id<MTLDynamicLibrary> dynamicLib = [device newDynamicLibrary:lib
error:&error]; Resources
Related sessions
-
14 min -
25 min -
25 min