From 090d262afe5d9b6a5c9d29fe15a4ec8d17ab7eda Mon Sep 17 00:00:00 2001 From: Dorian R Apanel Date: Wed, 12 Jul 2023 14:35:34 +0200 Subject: [PATCH 1/4] layers: Allow using uncached buffer for Debug Printf Adds option to force using AMD_DEVICE_COHERENT_MEMORY for debug printf buffer to print messages even if VK_ERROR_DEVICE_LOST is encountered. * Option added to layer json to be visible in vkconfig, * Forcing extension and device feature if not enabled by application, * Added workaround for atomic operations in uncached memory being in cache anyway, * Added workaround to failing MapMemory after DEVICE_LOST (occurs on AMD): When using uncached buffer, do not unmap buffer until messages are analyzed. --- layers/VkLayer_khronos_validation.json.in | 20 +++ layers/gpu_validation/debug_printf.cpp | 172 ++++++++++++++++++++-- layers/gpu_validation/debug_printf.h | 6 + layers/gpu_validation/gpu_utils.cpp | 10 +- layers/gpu_validation/gpu_utils.h | 3 +- layers/layer_options.cpp | 9 ++ tests/framework/layer_validation_tests.h | 2 +- tests/unit/debug_printf.cpp | 135 ++++++++++++++++- 8 files changed, 341 insertions(+), 16 deletions(-) diff --git a/layers/VkLayer_khronos_validation.json.in b/layers/VkLayer_khronos_validation.json.in index d56708b50f6..ce1ae254aa9 100644 --- a/layers/VkLayer_khronos_validation.json.in +++ b/layers/VkLayer_khronos_validation.json.in @@ -749,6 +749,26 @@ ] } }, + { + "key": "printf_uncached_buffer", + "label": "Printf using uncached buffer (ALPHA)", + "description": "Use VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD(from VK_AMD_device_coherent_memory) to allocate destination buffer. Slower, but useful in case of instrumenting shader causing VK_ERROR_DEVICE_LOST.", + "type": "BOOL", + "default": false, + "platforms": [ + "WINDOWS", + "LINUX" + ], + "dependence": { + "mode": "ALL", + "settings": [ + { + "key": "validate_gpu_based", + "value": "GPU_BASED_DEBUG_PRINTF" + } + ] + } + }, { "key": "printf_verbose", "label": "Printf verbose", diff --git a/layers/gpu_validation/debug_printf.cpp b/layers/gpu_validation/debug_printf.cpp index 66e3f655cac..e1efaef7d2d 100644 --- a/layers/gpu_validation/debug_printf.cpp +++ b/layers/gpu_validation/debug_printf.cpp @@ -21,6 +21,87 @@ #include #include "generated/layer_chassis_dispatch.h" +void DebugPrintf::PreCallRecordCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkDevice *pDevice, void *modified_ci) { + GpuAssistedBase::PreCallRecordCreateDevice(gpu, pCreateInfo, pAllocator, pDevice, modified_ci); + + std::string use_uncached_buffer_string = getLayerOption("khronos_validation.printf_uncached_buffer"); + vvl::ToLower(use_uncached_buffer_string); + use_uncached_buffer = !use_uncached_buffer_string.compare("true"); + + if (use_uncached_buffer) + { + static const std::string dcm_ext{"VK_AMD_device_coherent_memory"}; + { + bool dcm_supported = false; + uint32_t property_count = 0; + if (DispatchEnumerateDeviceExtensionProperties(gpu, nullptr, &property_count, nullptr) == VK_SUCCESS) { + std::vector property_list(property_count); + if (DispatchEnumerateDeviceExtensionProperties(gpu, nullptr, &property_count, property_list.data()) == + VK_SUCCESS) { + for (const VkExtensionProperties &properties : property_list) { + if (dcm_ext == properties.extensionName) { + dcm_supported = true; + } + } + } + } + if (!dcm_supported) { + ReportSetupProblem( + device, "Debug Printf with uncached buffer requires VK_AMD_device_coherent_memory which is not supported"); + aborted = true; + return; + } + } + + // See CreateDevice() in chassis.cpp. modified_ci is a pointer to a safe struct stored on the stack. + // This code follows the safe struct memory memory management scheme. That is, we must delete any memory + // remove from the safe struct, and any additions must be allocated in a way that is compatible with + // the safe struct destructor. + auto *modified_create_info = static_cast(modified_ci); + + bool found_ext = false; + for (uint32_t i = 0; i < modified_create_info->enabledExtensionCount; i++) { + if (dcm_ext == modified_create_info->ppEnabledExtensionNames[i]) { + found_ext = true; + break; + } + } + if (!found_ext) { + LogInfo(gpu, "UNASSIGNED-DEBUG-PRINTF", "VK_AMD_device_coherent_memory extension not enabled but use_uncached_buffer is true. Forcing extension."); + const char **ext_names = new const char *[modified_create_info->enabledExtensionCount + 1]; + // Copy the existing pointer table + std::copy(modified_create_info->ppEnabledExtensionNames, + modified_create_info->ppEnabledExtensionNames + modified_create_info->enabledExtensionCount, ext_names); + // Add our new extension + char *dcm_ext_copy = new char[dcm_ext.size() + 1]{}; + dcm_ext.copy(dcm_ext_copy, dcm_ext.size()); + dcm_ext_copy[dcm_ext.size()] = '\0'; + ext_names[modified_create_info->enabledExtensionCount] = dcm_ext_copy; + // Patch up the safe struct + delete[] modified_create_info->ppEnabledExtensionNames; + modified_create_info->ppEnabledExtensionNames = ext_names; + modified_create_info->enabledExtensionCount++; + } + auto *dcm_features = const_cast( + LvlFindInChain(modified_create_info)); + if (dcm_features) { + if (dcm_features->deviceCoherentMemory != VK_TRUE) { + LogInfo(gpu, "UNASSIGNED-DEBUG-PRINTF", + "use_uncached_buffer is true, but deviceCoherentMemory feature is not enabled. Force enabling feature."); + dcm_features->deviceCoherentMemory = VK_TRUE; + } + } else { + LogInfo(gpu, "UNASSIGNED-DEBUG-PRINTF", + "use_uncached_buffer is true, but deviceCoherentMemory feature is not enabled. Force enabling feature."); + auto new_dcm_features = LvlInitStruct(); + new_dcm_features.deviceCoherentMemory = VK_TRUE; + new_dcm_features.pNext = const_cast(modified_create_info->pNext); + modified_create_info->pNext = new VkPhysicalDeviceCoherentMemoryFeaturesAMD(new_dcm_features); + } + } +} + // Perform initializations that can be done at Create Device time. void DebugPrintf::CreateDevice(const VkDeviceCreateInfo *pCreateInfo) { if (enabled[gpu_validation]) { @@ -42,7 +123,16 @@ void DebugPrintf::CreateDevice(const VkDeviceCreateInfo *pCreateInfo) { use_stdout = !stdout_string.compare("true"); if (getenv("DEBUG_PRINTF_TO_STDOUT")) use_stdout = true; - // GpuAssistedBase::CreateDevice will set up bindings + // Need to get this option again, because PreCallRecordCreateDevice was done + // in separate DebugPrintf instance (during VkInstance creation). + std::string use_uncached_buffer_string = getLayerOption("khronos_validation.printf_uncached_buffer"); + vvl::ToLower(use_uncached_buffer_string); + use_uncached_buffer = !use_uncached_buffer_string.compare("true"); + if (use_uncached_buffer) { + force_device_coherent_memory = true; // vma needs to know it. + } + + // GpuAssistedBase::CreateDevice will set up bindings. VkDescriptorSetLayoutBinding binding = {3, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_COMPUTE_BIT | @@ -138,6 +228,49 @@ void DebugPrintf::PreCallRecordCreateShaderModule(VkDevice device, const VkShade } } +// Override GpuAssistedBase version to allow processing in case of VK_ERROR_DEVICE_LOST when using uncached buffer. +void DebugPrintf::PostCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence, + VkResult result) { + ValidationStateTracker::PostCallRecordQueueSubmit(queue, submitCount, pSubmits, fence, result); + + bool device_lost = (result == VK_ERROR_DEVICE_LOST); + + if (aborted) return; + + if (result != VK_SUCCESS) { + if (!use_uncached_buffer) { + return; + } else if (!device_lost) { + return; // VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY + } + } + + bool buffers_present = false; + // Don't QueueWaitIdle if there's nothing to process + for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) { + const VkSubmitInfo *submit = &pSubmits[submit_idx]; + for (uint32_t i = 0; i < submit->commandBufferCount; i++) { + buffers_present |= CommandBufferNeedsProcessing(submit->pCommandBuffers[i]); + } + } + if (!buffers_present) return; + + if (!device_lost) { + SubmitBarrier(queue); + + DispatchQueueWaitIdle(queue); /// @todo Dispatch wait idle only after SubmitBarrier() succeeded. + } else { + assert(use_uncached_buffer); + } + + for (uint32_t submit_idx = 0; submit_idx < submitCount; submit_idx++) { + const VkSubmitInfo *submit = &pSubmits[submit_idx]; + for (uint32_t i = 0; i < submit->commandBufferCount; i++) { + ProcessCommandBuffer(queue, submit->pCommandBuffers[i]); + } + } +} + vartype vartype_lookup(char intype) { switch (intype) { case 'd': @@ -308,10 +441,14 @@ void DebugPrintf::AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQ // 9 Printf Values Word 0 (optional) // 10 Printf Values Word 1 (optional) uint32_t expect = debug_output_buffer[1]; - if (!expect) return; + // Total size of all messages are written by AtomicAdd. Atomics in uncached memory seems to be working in caches anyway + // and are not flushed to uncached memory at the end. In that case, expect will contain zero. + // As a WA just parse messages using individual sizes (written correctly). + // Can the messages be overridden because of those atomics? + if (!expect && !use_uncached_buffer) return; uint32_t index = spvtools::kDebugOutputDataOffset; - while (debug_output_buffer[index]) { + while ((index < output_buffer_size) && debug_output_buffer[index]) { std::stringstream shader_message; VkShaderModule shader_module_handle = VK_NULL_HANDLE; VkPipeline pipeline_handle = VK_NULL_HANDLE; @@ -412,7 +549,8 @@ void DebugPrintf::AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQ } index += debug_record->size; } - if ((index - spvtools::kDebugOutputDataOffset) != expect) { + if ((use_uncached_buffer && (index >= output_buffer_size)) || + (!use_uncached_buffer && (index - spvtools::kDebugOutputDataOffset) != expect)) { LogWarning(device, "UNASSIGNED-DEBUG-PRINTF", "WARNING - Debug Printf message was truncated, likely due to a buffer size that was too small for the message"); } @@ -429,7 +567,6 @@ void debug_printf_state::CommandBuffer::Process(VkQueue queue) { uint32_t ray_trace_index = 0; for (auto &buffer_info : gpu_buffer_list) { - char *data; uint32_t operation_index = 0; if (buffer_info.pipeline_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { @@ -445,10 +582,16 @@ void debug_printf_state::CommandBuffer::Process(VkQueue queue) { assert(false); } - VkResult result = vmaMapMemory(device_state->vmaAllocator, buffer_info.output_mem_block.allocation, (void **)&data); + VkResult result = VK_SUCCESS; + if (buffer_info.output_mem_block.data == nullptr) { + result = vmaMapMemory(device_state->vmaAllocator, buffer_info.output_mem_block.allocation, + (void **)&buffer_info.output_mem_block.data); + } if (result == VK_SUCCESS) { - device_state->AnalyzeAndGenerateMessages(commandBuffer(), queue, buffer_info, operation_index, (uint32_t *)data); + device_state->AnalyzeAndGenerateMessages(commandBuffer(), queue, buffer_info, operation_index, + (uint32_t *)buffer_info.output_mem_block.data); vmaUnmapMemory(device_state->vmaAllocator, buffer_info.output_mem_block.allocation); + buffer_info.output_mem_block.data = nullptr; } } } @@ -672,6 +815,9 @@ void DebugPrintf::AllocateDebugPrintfResources(const VkCommandBuffer cmd_buffer, buffer_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; VmaAllocationCreateInfo alloc_info = {}; alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + if (use_uncached_buffer) { + alloc_info.requiredFlags |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD; + } result = vmaCreateBuffer(vmaAllocator, &buffer_info, &alloc_info, &output_block.buffer, &output_block.allocation, nullptr); if (result != VK_SUCCESS) { ReportSetupProblem(device, "Unable to allocate device memory. Device could become unstable."); @@ -680,11 +826,15 @@ void DebugPrintf::AllocateDebugPrintfResources(const VkCommandBuffer cmd_buffer, } // Clear the output block to zeros so that only printf values from the gpu will be present - uint32_t *data; - result = vmaMapMemory(vmaAllocator, output_block.allocation, reinterpret_cast(&data)); + result = vmaMapMemory(vmaAllocator, output_block.allocation, reinterpret_cast(&output_block.data)); if (result == VK_SUCCESS) { - memset(data, 0, output_buffer_size); - vmaUnmapMemory(vmaAllocator, output_block.allocation); + memset(output_block.data, 0, output_buffer_size); + // Mapping may fail after DEVICE_LOST. Keep it mapped for now in such case. + // Will be unmapped in debug_printf_state::CommandBuffer::Process + if (!use_uncached_buffer) { + vmaUnmapMemory(vmaAllocator, output_block.allocation); + output_block.data = nullptr; + } } auto desc_writes = LvlInitStruct(); diff --git a/layers/gpu_validation/debug_printf.h b/layers/gpu_validation/debug_printf.h index 7b0921747ad..6bb8e206e01 100644 --- a/layers/gpu_validation/debug_printf.h +++ b/layers/gpu_validation/debug_printf.h @@ -23,6 +23,7 @@ class DebugPrintf; struct DPFDeviceMemoryBlock { VkBuffer buffer; VmaAllocation allocation; + uint32_t* data; // only valid if using uncached buffer, because mapping may fail after device is lost. }; struct DPFBufferInfo { @@ -86,12 +87,16 @@ class DebugPrintf : public GpuAssistedBase { desired_features.fragmentStoresAndAtomics = true; } + void PreCallRecordCreateDevice(VkPhysicalDevice gpu, const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, VkDevice* pDevice, void* modified_ci) override; void CreateDevice(const VkDeviceCreateInfo* pCreateInfo) override; bool InstrumentShader(const vvl::span& input, std::vector& new_pgm, uint32_t* unique_shader_id) override; void PreCallRecordCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule, void* csm_state_data) override; + void PostCallRecordQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, + VkFence fence, VkResult result) override; std::vector ParseFormatString(const std::string& format_string); std::string FindFormatString(vvl::span pgm, uint32_t string_id); void AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQueue queue, DPFBufferInfo& buffer_info, @@ -172,6 +177,7 @@ class DebugPrintf : public GpuAssistedBase { void DestroyBuffer(DPFBufferInfo& buffer_info); private: + bool use_uncached_buffer = false; bool verbose = false; bool use_stdout = false; }; diff --git a/layers/gpu_validation/gpu_utils.cpp b/layers/gpu_validation/gpu_utils.cpp index 76ef2d2658e..0fde38c8993 100644 --- a/layers/gpu_validation/gpu_utils.cpp +++ b/layers/gpu_validation/gpu_utils.cpp @@ -192,7 +192,8 @@ static VKAPI_ATTR void VKAPI_CALL gpuVkCmdCopyBuffer(VkCommandBuffer commandBuff DispatchCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, regionCount, pRegions); } -VkResult UtilInitializeVma(VkInstance instance, VkPhysicalDevice physical_device, VkDevice device, bool use_buffer_device_address, VmaAllocator *pAllocator) { +VkResult UtilInitializeVma(VkInstance instance, VkPhysicalDevice physical_device, VkDevice device, bool use_buffer_device_address, + bool use_device_coherent_memory, VmaAllocator *pAllocator) { VmaVulkanFunctions functions; VmaAllocatorCreateInfo allocator_info = {}; allocator_info.instance = instance; @@ -203,6 +204,10 @@ VkResult UtilInitializeVma(VkInstance instance, VkPhysicalDevice physical_device allocator_info.flags |= VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT; } + if (use_device_coherent_memory) { + allocator_info.flags |= VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT; + } + functions.vkGetInstanceProcAddr = static_cast(gpuVkGetInstanceProcAddr); functions.vkGetDeviceProcAddr = static_cast(gpuVkGetDeviceProcAddr); functions.vkGetPhysicalDeviceProperties = static_cast(gpuVkGetPhysicalDeviceProperties); @@ -373,7 +378,8 @@ void GpuAssistedBase::CreateDevice(const VkDeviceCreateInfo *pCreateInfo) { } desc_set_bind_index = adjusted_max_desc_sets - 1; - VkResult result1 = UtilInitializeVma(instance, physical_device, device, force_buffer_device_address, &vmaAllocator); + VkResult result1 = UtilInitializeVma(instance, physical_device, device, force_buffer_device_address, + force_device_coherent_memory, &vmaAllocator); assert(result1 == VK_SUCCESS); desc_set_manager = std::make_unique(device, static_cast(bindings_.size())); diff --git a/layers/gpu_validation/gpu_utils.h b/layers/gpu_validation/gpu_utils.h index 78f086f71c6..fbb585085d8 100644 --- a/layers/gpu_validation/gpu_utils.h +++ b/layers/gpu_validation/gpu_utils.h @@ -79,7 +79,7 @@ VALSTATETRACK_DERIVED_STATE_OBJECT(VkQueue, gpu_utils_state::Queue, QUEUE_STATE) VALSTATETRACK_DERIVED_STATE_OBJECT(VkCommandBuffer, gpu_utils_state::CommandBuffer, CMD_BUFFER_STATE) VkResult UtilInitializeVma(VkInstance instance, VkPhysicalDevice physical_device, VkDevice device, bool use_buffer_device_address, - VmaAllocator *pAllocator); + bool use_device_coherent_memory, VmaAllocator *pAllocator); void UtilGenerateStageMessage(const uint32_t *debug_record, std::string &msg); void UtilGenerateCommonMessage(const debug_report_data *report_data, const VkCommandBuffer commandBuffer, @@ -216,6 +216,7 @@ class GpuAssistedBase : public ValidationStateTracker { public: bool aborted = false; bool force_buffer_device_address; + bool force_device_coherent_memory = false; PFN_vkSetDeviceLoaderData vkSetDeviceLoaderData; const char *setup_vuid; VkPhysicalDeviceFeatures supported_features{}; diff --git a/layers/layer_options.cpp b/layers/layer_options.cpp index 5b2b0f85fe3..6f33e8f0a2e 100644 --- a/layers/layer_options.cpp +++ b/layers/layer_options.cpp @@ -53,6 +53,8 @@ const char *SETTING_CUSTOM_STYPE_LIST = "custom_stype_list"; const char *SETTING_DUPLICATE_MESSAGE_LIMIT = "duplicate_message_limit"; const char *SETTING_FINE_GRAINED_LOCKING = "fine_grained_locking"; +const char *SETTING_DEBUG_PRINTF_UNCACHED_BUFFER = "printf_uncached_buffer"; + // Set the local disable flag for the appropriate VALIDATION_CHECK_DISABLE enum void SetValidationDisable(CHECK_DISABLED &disable_data, const ValidationCheckDisables disable_id) { switch (disable_id) { @@ -373,6 +375,11 @@ static std::string GetConfigValue(const char *setting) { return getLayerOption(key.c_str()); } +static void SetConfigValue(const char *setting, const char* value) { + const std::string key(GetSettingKey(setting)); + return setLayerOption(key.c_str(), value); +} + static std::string GetEnvVarValue(const char *setting) { std::string env_var = setting; vvl::ToUpper(env_var); @@ -438,6 +445,8 @@ void ProcessConfigAndEnvSettings(ConfigAndEnvSettings *settings_data) { CreateFilterMessageIdList(data, ",", settings_data->message_filter_list); } else if (name == SETTING_DUPLICATE_MESSAGE_LIMIT) { *settings_data->duplicate_message_limit = cur_setting.data.value32; + } else if ((name == SETTING_DEBUG_PRINTF_UNCACHED_BUFFER) && cur_setting.data.valueBool) { + SetConfigValue(SETTING_DEBUG_PRINTF_UNCACHED_BUFFER, "true"); } else if (name == SETTING_CUSTOM_STYPE_LIST) { if (cur_setting.type == VK_LAYER_SETTING_VALUE_TYPE_STRING_ARRAY_EXT) { std::string data(cur_setting.data.arrayString.pCharArray); diff --git a/tests/framework/layer_validation_tests.h b/tests/framework/layer_validation_tests.h index b3af136f0be..50febd35dab 100644 --- a/tests/framework/layer_validation_tests.h +++ b/tests/framework/layer_validation_tests.h @@ -280,7 +280,7 @@ class VkGpuAssistedLayerTest : public VkLayerTest { class NegativeDebugPrintf : public VkLayerTest { public: - void InitDebugPrintfFramework(); + void InitDebugPrintfFramework(bool use_uncached_buffer = false); protected: }; diff --git a/tests/unit/debug_printf.cpp b/tests/unit/debug_printf.cpp index 042c9afe7a4..3fdef7e3751 100644 --- a/tests/unit/debug_printf.cpp +++ b/tests/unit/debug_printf.cpp @@ -13,7 +13,25 @@ #include "../framework/layer_validation_tests.h" -void NegativeDebugPrintf::InitDebugPrintfFramework() { +class UncachedBufferSetting { + public: + UncachedBufferSetting(const bool use_uncached_buffer /*= false*/) { + setting_value_data.valueBool = use_uncached_buffer; + + strncpy(setting_val.name, "printf_uncached_buffer", sizeof(setting_val.name)); + setting_val.type = VK_LAYER_SETTING_VALUE_TYPE_BOOL_EXT; + setting_val.data = setting_value_data; + setting = {VK_STRUCTURE_TYPE_INSTANCE_LAYER_SETTINGS_EXT, nullptr, 1, &setting_val}; + } + VkLayerSettingsEXT *pnext{&setting}; + + private: + VkLayerSettingValueDataEXT setting_value_data{}; + VkLayerSettingValueEXT setting_val; + VkLayerSettingsEXT setting; +}; + +void NegativeDebugPrintf::InitDebugPrintfFramework(bool use_uncached_buffer) { VkValidationFeatureEnableEXT enables[] = {VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT}; VkValidationFeatureDisableEXT disables[] = { VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT, VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT, @@ -24,6 +42,11 @@ void NegativeDebugPrintf::InitDebugPrintfFramework() { features.pEnabledValidationFeatures = enables; features.pDisabledValidationFeatures = disables; + auto uncached_buffer_setting = UncachedBufferSetting(use_uncached_buffer); + if (use_uncached_buffer) { + features.pNext = uncached_buffer_setting.pnext; + } + InitFramework(m_errorMonitor, &features); } @@ -1063,3 +1086,113 @@ TEST_F(NegativeDebugPrintf, GPLFragmentIndependentSets) { vk::QueueWaitIdle(m_device->m_queue); m_errorMonitor->VerifyFound(); } + +TEST_F(NegativeDebugPrintf, UncachedBuffer) { + TEST_DESCRIPTION("Verify that calls to debugPrintfEXT are received in debug stream"); + SetTargetApiVersion(VK_API_VERSION_1_1); + AddRequiredExtensions(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME); + AddRequiredExtensions(VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME); // It must be supported but if not enabled + // Debug Printf can force enable it. + InitDebugPrintfFramework(true /*use_uncached_buffer*/); + + if (!AreRequiredExtensionsEnabled()) { + GTEST_SKIP() << RequiredExtensionsNotSupported() << " not supported"; + } + + // deviceCoherentMemory feature needs to be supported, but if not specified or enabled, Debug Printf will force enable it. + auto dcm_features = LvlInitStruct(); + auto features2 = GetPhysicalDeviceFeatures2(dcm_features); + if (dcm_features.deviceCoherentMemory == 0) { + GTEST_SKIP() << "deviceCoherentMemory feature not supported"; + } + + ASSERT_NO_FATAL_FAILURE(InitState(nullptr, &dcm_features, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT)); + if (DeviceValidationVersion() < VK_API_VERSION_1_1) { + GTEST_SKIP() << "At least Vulkan version 1.1 is required"; + } + + auto features = m_device->phy().features(); + if (!features.vertexPipelineStoresAndAtomics || !features.fragmentStoresAndAtomics) { + GTEST_SKIP() << "GPU-Assisted printf test requires vertexPipelineStoresAndAtomics and fragmentStoresAndAtomics"; + } + ASSERT_NO_FATAL_FAILURE(InitViewport()); + ASSERT_NO_FATAL_FAILURE(InitRenderTarget()); + + if (IsPlatform(kMockICD)) { + GTEST_SKIP() << "Test not supported by MockICD, GPU-Assisted validation test requires a driver that can draw"; + } + // Make a uniform buffer to be passed to the shader that contains the test number + uint32_t qfi = 0; + VkBufferCreateInfo bci = LvlInitStruct(); + bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + bci.size = 8; + bci.queueFamilyIndexCount = 1; + bci.pQueueFamilyIndices = &qfi; + + VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(float) * 4}; + + const VkPipelineLayoutObj pipeline_layout(m_device, {}, {push_constant_range}); + + float push_constants[4] = {0.0, 1.0, 2.0, 3.0}; + + char const *shader_source = R"glsl( + #version 450 + #extension GL_EXT_debug_printf : enable + layout(push_constant, std430) uniform foo { float x[4]; } constants; + + void main() { + float myfloat = 3.1415f; + gl_Position = vec4(0.0, 0.0, 0.0, 0.0); + if (gl_VertexIndex == 0) { + debugPrintfEXT("Here are three float values %f, %f, %f", 1.0, myfloat, gl_Position.x); + float x = constants.x[0]; + while(x > -1.f) { // infinite loop + x += 0.000001f; + } + debugPrintfEXT("Here is a value that should not be printed %f", x); + } + } + )glsl"; + char const *message = "Here are three float values 1.000000, 3.141500, 0.000000"; + + VkShaderObj vs(this, shader_source, VK_SHADER_STAGE_VERTEX_BIT, SPV_ENV_VULKAN_1_0, SPV_SOURCE_GLSL, nullptr, "main", true); + + VkViewport viewport = m_viewports[0]; + VkRect2D scissors = m_scissors[0]; + + VkSubmitInfo submit_info = LvlInitStruct(); + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &m_commandBuffer->handle(); + + VkPipelineObj pipe(m_device); + pipe.AddShader(&vs); + pipe.AddDefaultColorAttachment(); + pipe.DisableRasterization(); + VkResult err = pipe.CreateVKPipeline(pipeline_layout.handle(), renderPass()); + ASSERT_VK_SUCCESS(err); + + VkCommandBufferBeginInfo begin_info = LvlInitStruct(); + VkCommandBufferInheritanceInfo hinfo = LvlInitStruct(); + begin_info.pInheritanceInfo = &hinfo; + + m_commandBuffer->begin(&begin_info); + m_commandBuffer->BeginRenderPass(m_renderPassBeginInfo); + vk::CmdBindPipeline(m_commandBuffer->handle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipe.handle()); + vk::CmdPushConstants(m_commandBuffer->handle(), pipeline_layout.handle(), VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(push_constants), + push_constants); + vk::CmdSetViewport(m_commandBuffer->handle(), 0, 1, &viewport); + vk::CmdSetScissor(m_commandBuffer->handle(), 0, 1, &scissors); + vk::CmdDraw(m_commandBuffer->handle(), 3, 1, 0, 0); + vk::CmdEndRenderPass(m_commandBuffer->handle()); + m_commandBuffer->end(); + + m_errorMonitor->SetDesiredFailureMsg(kInformationBit, message); + + err = vk::QueueSubmit(m_device->m_queue, 1, &submit_info, VK_NULL_HANDLE); + ASSERT_TRUE((err == VK_SUCCESS) || (err == VK_ERROR_DEVICE_LOST)) << vk_result_string(err); + if (err == VK_SUCCESS) { + err = vk::QueueWaitIdle(m_device->m_queue); + ASSERT_TRUE((err == VK_SUCCESS) || (err == VK_ERROR_DEVICE_LOST)) << vk_result_string(err); + } + m_errorMonitor->VerifyFound(); +} From c0c5d42e390c7866978d19c2657ccc7cc5d9a2e0 Mon Sep 17 00:00:00 2001 From: Dorian R Apanel Date: Mon, 17 Jul 2023 15:20:33 +0200 Subject: [PATCH 2/4] tests: Removed unused variables --- tests/unit/debug_printf.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/unit/debug_printf.cpp b/tests/unit/debug_printf.cpp index 3fdef7e3751..8c56c38e1cd 100644 --- a/tests/unit/debug_printf.cpp +++ b/tests/unit/debug_printf.cpp @@ -1101,7 +1101,7 @@ TEST_F(NegativeDebugPrintf, UncachedBuffer) { // deviceCoherentMemory feature needs to be supported, but if not specified or enabled, Debug Printf will force enable it. auto dcm_features = LvlInitStruct(); - auto features2 = GetPhysicalDeviceFeatures2(dcm_features); + GetPhysicalDeviceFeatures2(dcm_features); if (dcm_features.deviceCoherentMemory == 0) { GTEST_SKIP() << "deviceCoherentMemory feature not supported"; } @@ -1121,13 +1121,6 @@ TEST_F(NegativeDebugPrintf, UncachedBuffer) { if (IsPlatform(kMockICD)) { GTEST_SKIP() << "Test not supported by MockICD, GPU-Assisted validation test requires a driver that can draw"; } - // Make a uniform buffer to be passed to the shader that contains the test number - uint32_t qfi = 0; - VkBufferCreateInfo bci = LvlInitStruct(); - bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - bci.size = 8; - bci.queueFamilyIndexCount = 1; - bci.pQueueFamilyIndices = &qfi; VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(float) * 4}; From cb05cb6e66af3ce56c4ada85ccea4436a359fcb9 Mon Sep 17 00:00:00 2001 From: Dorian R Apanel Date: Wed, 19 Jul 2023 13:01:40 +0200 Subject: [PATCH 3/4] tests: Added VK_TIMEOUT handling --- tests/unit/debug_printf.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/debug_printf.cpp b/tests/unit/debug_printf.cpp index 8c56c38e1cd..fb5263eb4f2 100644 --- a/tests/unit/debug_printf.cpp +++ b/tests/unit/debug_printf.cpp @@ -1182,10 +1182,11 @@ TEST_F(NegativeDebugPrintf, UncachedBuffer) { m_errorMonitor->SetDesiredFailureMsg(kInformationBit, message); err = vk::QueueSubmit(m_device->m_queue, 1, &submit_info, VK_NULL_HANDLE); - ASSERT_TRUE((err == VK_SUCCESS) || (err == VK_ERROR_DEVICE_LOST)) << vk_result_string(err); + // Some drivers return VK_TIMEOUT (spec does not allow it). + ASSERT_TRUE((err == VK_SUCCESS) || (err == VK_ERROR_DEVICE_LOST) || (err == VK_TIMEOUT)) << vk_result_string(err); if (err == VK_SUCCESS) { err = vk::QueueWaitIdle(m_device->m_queue); - ASSERT_TRUE((err == VK_SUCCESS) || (err == VK_ERROR_DEVICE_LOST)) << vk_result_string(err); + ASSERT_TRUE((err == VK_SUCCESS) || (err == VK_ERROR_DEVICE_LOST) || (err == VK_TIMEOUT)) << vk_result_string(err); } m_errorMonitor->VerifyFound(); } From 4b956cfbc4e2e8c402b971c8e7cb077f29e2d697 Mon Sep 17 00:00:00 2001 From: Dorian R Apanel Date: Fri, 21 Jul 2023 11:47:53 +0200 Subject: [PATCH 4/4] layers: Fix printf debug_output_buffer clear --- layers/gpu_validation/debug_printf.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/layers/gpu_validation/debug_printf.cpp b/layers/gpu_validation/debug_printf.cpp index e1efaef7d2d..1cfbd9bc8f8 100644 --- a/layers/gpu_validation/debug_printf.cpp +++ b/layers/gpu_validation/debug_printf.cpp @@ -440,11 +440,10 @@ void DebugPrintf::AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQ // 8 Printf Format String Id // 9 Printf Values Word 0 (optional) // 10 Printf Values Word 1 (optional) - uint32_t expect = debug_output_buffer[1]; + uint32_t expect = debug_output_buffer[spvtools::kDebugOutputSizeOffset]; // Total size of all messages are written by AtomicAdd. Atomics in uncached memory seems to be working in caches anyway // and are not flushed to uncached memory at the end. In that case, expect will contain zero. // As a WA just parse messages using individual sizes (written correctly). - // Can the messages be overridden because of those atomics? if (!expect && !use_uncached_buffer) return; uint32_t index = spvtools::kDebugOutputDataOffset; @@ -554,7 +553,14 @@ void DebugPrintf::AnalyzeAndGenerateMessages(VkCommandBuffer command_buffer, VkQ LogWarning(device, "UNASSIGNED-DEBUG-PRINTF", "WARNING - Debug Printf message was truncated, likely due to a buffer size that was too small for the message"); } - memset(debug_output_buffer, 0, 4 * (debug_output_buffer[spvtools::kDebugOutputSizeOffset] + spvtools::kDebugOutputDataOffset)); + + if (use_uncached_buffer) { + // WA for atomics. + memset(debug_output_buffer, 0, output_buffer_size); + } else { + // Clear only written memory. + memset(debug_output_buffer, 0, sizeof(uint32_t) * (expect + spvtools::kDebugOutputDataOffset)); + } } // For the given command buffer, map its debug data buffers and read their contents for analysis.