ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: d4872518540ec60bb448d1457e79ea42c4e8f724
Parent: b25cf020007042c20ef92a810eaa5514878c986d
Author: Randy Palamar
Date:   Thu, 18 Jun 2026 11:41:33 -0600

vulkan: re-add support for exporting general memory buffers

We need to do this to share with CUDA when that is supported.

Diffstat:
Mbeamformer.c | 4++--
Mbeamformer_core.c | 18++++++++++++++----
Mbeamformer_internal.h | 8++++++--
Mui.c | 2+-
Mvulkan.c | 47++++++++++++++++++++++++++++-------------------
Mvulkan.h | 7+++++++
6 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -222,7 +222,7 @@ beamformer_init(BeamformerInput *input) .flags = VulkanUsageFlag_TransferSource|VulkanUsageFlag_HostReadWrite, .timeline_count = countof(timelines), .timelines_used = timelines, - .label = s8("BeamformedData"), + .label = str8("BeamformedData"), }; vk_buffer_allocate(cs->backlog.buffer, &allocate_info); if (cs->backlog.buffer->size > 0) @@ -263,7 +263,7 @@ beamformer_init(BeamformerInput *input) ctx->shared_memory->capabilities.max_rf_data_size = cs->backlog.buffer->size / BeamformerMaxRawDataFramesInFlight; - ctx->shared_memory->capabilities.cuda = cuda_init != cuda_init_stub; + ctx->shared_memory->capabilities.cuda = cuda_supported(); // TODO(rnp): re-enable hilbert support, with and without cuda ctx->shared_memory->capabilities.hilbert = 0; diff --git a/beamformer_core.c b/beamformer_core.c @@ -101,7 +101,7 @@ beamformer_compute_plan_for_block(BeamformerComputeContext *cc, u32 block, Arena GPUBufferAllocateInfo allocate_info = { .size = sizeof(BeamformerComputeArrayParameters), .flags = VulkanUsageFlag_HostReadWrite, - .label = stream_to_s8(&label), + .label = stream_to_str8(&label), }; vk_buffer_allocate(&result->array_parameters, &allocate_info); assert((result->array_parameters.gpu_pointer & 63) == 0); @@ -154,7 +154,7 @@ beamformer_filter_update(BeamformerFilter *f, BeamformerFilterParameters fp, u32 GPUBufferAllocateInfo allocate_info = { .size = byte_size, .flags = VulkanUsageFlag_HostReadWrite, - .label = label, + .label = str8_from_s8(label), }; vk_buffer_allocate(&f->buffer, &allocate_info); } @@ -913,7 +913,12 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, i64 buffer_size = PING_PONG_BUFFER_SLOTS * round_up_to(cp->rf_size, 64); if (ctx->compute_context.ping_pong_buffer.size < buffer_size) { - GPUBufferAllocateInfo allocate_info = {.size = buffer_size, .label = s8("PingPongBuffer")}; + b32 cuda = cuda_supported(); + GPUBufferAllocateInfo allocate_info = { + .size = buffer_size, + .export = cuda ? &ctx->compute_context.ping_pong_export_handle : 0, + .label = str8("PingPongBuffer"), + }; vk_buffer_allocate(&ctx->compute_context.ping_pong_buffer, &allocate_info); BeamformerShaderResourceInfo shader_resource_infos[] = { @@ -924,7 +929,12 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, }, }; vk_bind_shader_resources(shader_resource_infos, countof(shader_resource_infos)); + // TODO(rnp): figure out how to share with CUDA + // IMPORTANT: on linux the handle is returned to os and should be cleared after import + // see usage of glImportMemoryFdEXT and surrounding code in ui.c for examples + if (cuda) { + } } if (cp->hadamard_order != (i32)cp->acquisition_count) @@ -1593,7 +1603,7 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload) GPUBufferAllocateInfo allocate_info = { .size = countof(rf->upload_complete_values) * rf->active_rf_size, .flags = VulkanUsageFlag_HostReadWrite, - .label = s8("RawRFBuffer"), + .label = str8("RawRFBuffer"), }; vk_buffer_allocate(&rf->buffer, &allocate_info); } diff --git a/beamformer_internal.h b/beamformer_internal.h @@ -112,7 +112,9 @@ typedef struct { VulkanTimeline *timelines_used; u32 timeline_count; - s8 label; + OSHandle *export; + + str8 label; } GPUBufferAllocateInfo; typedef struct { @@ -218,6 +220,7 @@ DEBUG_IMPORT renderdoc_end_frame_capture_fn *end_frame_capture; /////////////////////////////// // NOTE: CUDA Library Bindings +#define cuda_supported() (cuda_init != cuda_init_stub) #define CUDA_INIT_FN(name) void name(u32 *input_dims, u32 *decoded_dims) typedef CUDA_INIT_FN(cuda_init_fn); CUDA_INIT_FN(cuda_init_stub) {} @@ -434,7 +437,8 @@ typedef struct { */ #define PING_PONG_BUFFER_SLOTS (2 + 1) GPUBuffer ping_pong_buffer; - u32 ping_pong_input_index; + OSHandle ping_pong_export_handle; + u32 ping_pong_input_index; f32 processing_progress; b32 processing_compute; diff --git a/ui.c b/ui.c @@ -1567,7 +1567,7 @@ ui_beamformer_frame_view_copy_frame(BeamformerUI *ui, BeamformerFrameView *new, GPUBufferAllocateInfo allocate_info = { .size = frame_size, .flags = VulkanUsageFlag_TransferDestination, - .label = stream_to_s8(&sb), + .label = stream_to_str8(&sb), }; vk_buffer_allocate(&new->copy_buffer, &allocate_info); diff --git a/vulkan.c b/vulkan.c @@ -403,12 +403,12 @@ vk_renderdoc_instance_handle(void) #if BEAMFORMER_DEBUG #define vk_label_object(k, h, label, extra) vk_label_object_(VK_OBJECT_TYPE_##k, (u64)h, label, extra) function void -vk_label_object_(VkObjectType kind, u64 handle, s8 label, s8 extra) +vk_label_object_(VkObjectType kind, u64 handle, str8 label, str8 extra) { local_persist u8 buffer[1024]; Stream sb = arena_stream(arena_from_memory(buffer, sizeof(buffer))); - if (vulkan_config.instance.debug_utils && label.len > 0) { - stream_append_s8s(&sb, label, s8(" ("), extra, s8(")")); + if (vulkan_config.instance.debug_utils && label.length > 0) { + stream_append_s8s(&sb, s8_from_str8(label), s8(" ("), s8_from_str8(extra), s8(")")); stream_append_byte(&sb, 0); if (!sb.errors) { VkDebugUtilsObjectNameInfoEXT object_name_info = { @@ -626,9 +626,9 @@ vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_con vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline); - vk_label_object(PIPELINE, result.pipeline, name, s8("Pipeline")); - vk_label_object(PIPELINE_LAYOUT, result.layout, name, s8("Pipeline Layout")); - vk_label_object(SHADER_MODULE, module, name, s8("Module")); + vk_label_object(PIPELINE, result.pipeline, str8_from_s8(name), str8("Pipeline")); + vk_label_object(PIPELINE_LAYOUT, result.layout, str8_from_s8(name), str8("Pipeline Layout")); + vk_label_object(SHADER_MODULE, module, str8_from_s8(name), str8("Module")); vkDestroyShaderModule(vulkan_context->device, module, 0); } @@ -792,8 +792,8 @@ vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u3 assert(infos[0].kind < countof(extras)); assert(infos[1].kind < countof(extras)); - vk_label_object(PIPELINE, result.pipeline, infos[0].name, s8("Pipeline")); - vk_label_object(PIPELINE_LAYOUT, result.layout, infos[0].name, s8("Pipeline Layout")); + vk_label_object(PIPELINE, result.pipeline, str8_from_s8(infos[0].name), str8("Pipeline")); + vk_label_object(PIPELINE_LAYOUT, result.layout, str8_from_s8(infos[0].name), str8("Pipeline Layout")); //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]); //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]); } @@ -940,7 +940,8 @@ typedef struct { u32 queue_family_count; u32 queue_family_indices[VulkanTimeline_Count]; VkIndexType index_type; - s8 label; + OSHandle *export; + str8 label; } VulkanBufferAllocateInfo; function b32 @@ -976,8 +977,16 @@ vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai) if (ai->index_type != VK_INDEX_TYPE_NONE_KHR) buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + VkExternalMemoryBufferCreateInfo external_memory_buffer_create_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + + if (ai->export) buffer_create_info.pNext = &external_memory_buffer_create_info; + vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer); - vk_label_object(BUFFER, vb->buffer, ai->label, s8("Buffer")); + vk_label_object(BUFFER, vb->buffer, ai->label, str8("Buffer")); VkMemoryRequirements memory_requirements; vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements); @@ -1005,14 +1014,14 @@ vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai) b32 result = 0; // TODO(rnp): this may fail if the allocation is too big for the BAR size // it needs to handled properly - if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, 0)) { + if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, ai->export)) { result = 1; ai->gpu_buffer->size = size; vb->memory_size = size; vb->index_type = ai->index_type; - vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, s8("Memory")); + vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, str8("Memory")); if (host_read_write) vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer); @@ -1824,15 +1833,15 @@ vk_load_descriptor_block(void) static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), ""); vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets); - vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, s8("Beamformer Resources"), s8("Pool")); + vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, str8("Beamformer Resources"), str8("Pool")); DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) { Arena scratch = vk->arena; for EachElement(vk->descriptor_sets, it) { Stream sb = arena_stream(scratch); stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s")); - vk_label_object(DESCRIPTOR_SET, vk->descriptor_sets[it], stream_to_s8(&sb), s8("Set")); - vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_s8(&sb), s8("Set Layout")); + vk_label_object(DESCRIPTOR_SET, vk->descriptor_sets[it], stream_to_str8(&sb), str8("Set")); + vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_str8(&sb), str8("Set Layout")); } } @@ -2121,7 +2130,7 @@ vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 m .size = (u64)size, .flags = VulkanUsageFlag_HostReadWrite, .index_type = index_type, - .label = label, + .label = str8_from_s8(label), .queue_family_count = 1, .queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family, }; @@ -2274,9 +2283,9 @@ vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples, }; vkCreateImageView(vk->device, &image_view_info, 0, &vi->view); - vk_label_object(IMAGE, vi->image, label, s8("Image")); - vk_label_object(IMAGE_VIEW, vi->view, label, s8("Image View")); - vk_label_object(DEVICE_MEMORY, vi->memory, label, s8("Memory")); + vk_label_object(IMAGE, vi->image, str8_from_s8(label), str8("Image")); + vk_label_object(IMAGE_VIEW, vi->view, str8_from_s8(label), str8("Image View")); + vk_label_object(DEVICE_MEMORY, vi->memory, str8_from_s8(label), str8("Memory")); } else { vkDestroyImage(vk->device, vi->image, 0); vk_entity_release(e); diff --git a/vulkan.h b/vulkan.h @@ -120,6 +120,7 @@ typedef enum { VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2 = 1000059002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2 = 1000059006, VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO = 1000060000, + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO = 1000072000, VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO = 1000072001, VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO = 1000072002, VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR = 1000073003, @@ -2894,6 +2895,12 @@ typedef struct { VkStructureType sType; const void * pNext; VkExternalMemoryHandleTypeFlags handleTypes; +} VkExternalMemoryBufferCreateInfo; + +typedef struct { + VkStructureType sType; + const void * pNext; + VkExternalMemoryHandleTypeFlags handleTypes; } VkExternalMemoryImageCreateInfo; typedef struct {