Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ggml_vulkan crashes if running 2 threads which allocate their own context for the same GPU device #1087

Open
philipag opened this issue Jan 26, 2025 · 3 comments

Comments

@philipag
Copy link

I am using Whisper.net which in turn uses whisper.cpp which in turn uses ggml.

Running a single thread with the Vulkan backend works perfectly (but does not saturate the GPU - unlike when using the CUDA backend, which is a separate issue), so I tried adding a second thread which does everything for the same GPU that the first thread does (allocate context and push a separate audio stream). This crashes (invalid memory access), and the 2 thread call stacks always look like this:

				Thread
Not Flagged		4488	50	Worker Thread	SpeechToTextJob.serial: 1/2	msvcp140d.dll!mtx_do_lock
 	 	 	 	 	 	[External Code]
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_pipeline_allocate_descriptor_sets(std::shared_ptr<vk_device_struct> & device={...}) Line 909
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_backend_vk_graph_compute(ggml_backend * backend=0x0000032c7918ccd0, ggml_cgraph * cgraph=0x0000032bb8c1a138) Line 7589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_graph_compute_async(ggml_backend * backend=0x0000032c7918ccd0, ggml_cgraph * cgraph=0x0000032bb8c1a138) Line 333
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_compute_splits(ggml_backend_sched * sched=0x0000032c72339bb0) Line 1397
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute_async(ggml_backend_sched * sched=0x0000032c72339bb0, ggml_cgraph * graph=0x0000032c7a1a0060) Line 1589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute(ggml_backend_sched * sched=0x0000032c72339bb0, ggml_cgraph * graph=0x0000032c7a1a0060) Line 1572
 	 	 	 	 	 	whisper.dll!ggml_graph_compute_helper(ggml_backend_sched * sched=0x0000032c72339bb0, ggml_cgraph * graph=0x0000032c7a1a0060, int n_threads=4) Line 183
 	 	 	 	 	 	whisper.dll!whisper_encode_internal(whisper_context & wctx={...}, whisper_state & wstate={...}, const int mel_offset=0, const int n_threads=4, bool(*)(void *) abort_callback=0x00007ff87a5f3318, void * abort_callback_data=0x0000000000000001) Line 2350
 	 	 	 	 	 	whisper.dll!whisper_full_with_state(whisper_context * ctx=0x0000032bb8ad0980, whisper_state * state=0x0000032c72c95d70, whisper_full_params params={...}, const float * samples=0x000002eb0af9fa28, int n_samples=462400) Line 5573
 	 	 	 	 	 	[External Code]
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(System.ReadOnlySpan<float> samples = "System.ReadOnlySpan<Single>[462400]", System.DateTime firstSampleTime = "2020-10-01 10:00:28.300", bool audioWasSkipped = false) Line 192
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(float[] samples = {float[462400]}, System.DateTime firstSampleTime = "2020-10-01 10:00:28.300", bool audioWasSkipped = false) Line 160
 	 	 	 	 	 	[External Code]

Not Flagged	>	36004	51	Worker Thread	SpeechToTextJob.serial: 2/2	ggml-vulkan-whisper.dll!std::string::size
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::string::size() Line 2346
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::hash<std::string>::_Do_hash(const std::string & _Keyval) Line 3354
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::_Conditionally_enabled_hash<std::string,1>::operator()(const std::string & _Keyval) Line 2339
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::_Uhash_compare<std::string,std::hash<std::string>,std::equal_to<std::string>>::operator()<std::string>(const std::string & _Keyval) Line 151
 	 	 	 	 	 	ggml-vulkan-whisper.dll!std::unordered_map<std::string,std::weak_ptr<vk_pipeline_struct>,std::hash<std::string>,std::equal_to<std::string>,std::allocator<std::pair<std::string const ,std::weak_ptr<vk_pipeline_struct>>>>::at(const std::string & _Keyval) Line 437
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_pipeline_allocate_descriptor_sets(std::shared_ptr<vk_device_struct> & device={...}) Line 912
 	 	 	 	 	 	ggml-vulkan-whisper.dll!ggml_backend_vk_graph_compute(ggml_backend * backend=0x0000032c791b0250, ggml_cgraph * cgraph=0x0000032bb8c20d38) Line 7589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_graph_compute_async(ggml_backend * backend=0x0000032c791b0250, ggml_cgraph * cgraph=0x0000032bb8c20d38) Line 333
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_compute_splits(ggml_backend_sched * sched=0x0000032bb871d490) Line 1397
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute_async(ggml_backend_sched * sched=0x0000032bb871d490, ggml_cgraph * graph=0x0000032cbd850060) Line 1589
 	 	 	 	 	 	ggml-base-whisper.dll!ggml_backend_sched_graph_compute(ggml_backend_sched * sched=0x0000032bb871d490, ggml_cgraph * graph=0x0000032cbd850060) Line 1572
 	 	 	 	 	 	whisper.dll!ggml_graph_compute_helper(ggml_backend_sched * sched=0x0000032bb871d490, ggml_cgraph * graph=0x0000032cbd850060, int n_threads=4) Line 183
 	 	 	 	 	 	whisper.dll!whisper_encode_internal(whisper_context & wctx={...}, whisper_state & wstate={...}, const int mel_offset=0, const int n_threads=4, bool(*)(void *) abort_callback=0x00007ff87a5f3318, void * abort_callback_data=0x0000000000000002) Line 2311
 	 	 	 	 	 	whisper.dll!whisper_full_with_state(whisper_context * ctx=0x0000032bb8ad0980, whisper_state * state=0x0000032bac961410, whisper_full_params params={...}, const float * samples=0x000002eb0b93b5e0, int n_samples=256000) Line 5573
 	 	 	 	 	 	[External Code]
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(System.ReadOnlySpan<float> samples = "System.ReadOnlySpan<Single>[256000]", System.DateTime firstSampleTime = "2020-10-01 10:13:00.000", bool audioWasSkipped = false) Line 192
 	 	 	 	 	 	Whisper.net.dll!Whisper.net.WhisperProcessor.Process(float[] samples = {float[256000]}, System.DateTime firstSampleTime = "2020-10-01 10:13:00.000", bool audioWasSkipped = false) Line 160
 	 	 	 	 	 	[External Code]


Running multiple concurrent threads+contexts for Cuda or CPU or even Vulkan CPU works as expected. Only the Vulkan GPU backend causes the crash. If I add a lock to prevent the 2 threads from calling WhisperProcessor.Process() at the same time, the crash goes away. This happens with the 1.7.3 release from a few weeks ago as well as whisper.cpp+ggml I pulled and built yesterday.

@ggerganov
Copy link
Member

cc @0cc4m

@0cc4m
Copy link
Collaborator

0cc4m commented Jan 29, 2025

Yeah, this is a known issue (ggerganov/llama.cpp#7575 ggerganov/llama.cpp#11371) that has not yet been fixed.

@ggerganov
Copy link
Member

Ok, no worries!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants