beamformer_internal.h (17002B)
1 /* See LICENSE for license details. */ 2 #ifndef BEAMFORMER_INTERNAL_H 3 #define BEAMFORMER_INTERNAL_H 4 5 #include "beamformer.h" 6 7 #include "util.h" 8 #include "opengl.h" 9 10 #include "generated/beamformer.meta.c" 11 #include "generated/beamformer_shaders.c" 12 13 #include "external/raylib/src/raylib.h" 14 #include "external/raylib/src/rlgl.h" 15 16 #define beamformer_info(s) s8("[info] " s "\n") 17 18 #define os_path_separator() (s8){.data = &os_system_info()->path_separator_byte, .len = 1} 19 20 typedef struct { u64 value[1]; } VulkanHandle; 21 22 typedef enum { 23 VulkanTimeline_Graphics, 24 VulkanTimeline_Compute, 25 VulkanTimeline_Transfer, 26 VulkanTimeline_Count, 27 } VulkanTimeline; 28 29 typedef enum { 30 VulkanShaderKind_Vertex, 31 VulkanShaderKind_Mesh, 32 VulkanShaderKind_Fragment, 33 VulkanShaderKind_Compute, 34 VulkanShaderKind_Count, 35 } VulkanShaderKind; 36 37 typedef enum { 38 VulkanImageUsage_None, 39 VulkanImageUsage_Colour, 40 VulkanImageUsage_DepthStencil, 41 VulkanImageUsage_Count, 42 } VulkanImageUsage; 43 44 typedef enum { 45 VulkanUsageFlag_ImageSampling = 1 << 0, 46 VulkanUsageFlag_HostReadWrite = 1 << 1, // NOTE: not valid on images 47 /* NOTE: uses: 48 * - image-image copy operations 49 * - buffer-buffer copy operations 50 */ 51 VulkanUsageFlag_TransferSource = 1 << 2, 52 VulkanUsageFlag_TransferDestination = 1 << 3, 53 } VulkanUsageFlags; 54 55 typedef struct { 56 VulkanShaderKind kind; 57 s8 text; 58 s8 name; 59 } VulkanPipelineCreateInfo; 60 61 typedef struct { 62 VulkanHandle handle; 63 u64 gpu_pointer; 64 i64 size; 65 66 // NOTE: only used for render models 67 u64 index_count; 68 } GPUBuffer; 69 70 typedef struct { 71 VulkanHandle image; 72 u32 width; 73 u32 height; 74 u32 samples; 75 u32 mip_map_levels; 76 // TODO(rnp): this is only here for importing from OpenGL, move it back into handle later 77 u64 memory_size; 78 } GPUImage; 79 80 typedef enum { 81 GPUVendor_AMD = 0x1002, 82 GPUVendor_NVIDIA = 0x10DE, 83 GPUVendor_Qualcomm = 0x5143, 84 GPUVendor_Intel = 0x8086, 85 } GPUVendor; 86 87 typedef struct { 88 s8 name; 89 GPUVendor vendor; 90 91 f32 timestamp_period_ns; 92 93 u32 max_compute_shared_memory_size; 94 u16 max_msaa_samples; 95 u16 subgroup_size; 96 97 b32 cooperative_matrix; 98 99 u32 max_image_dimension_2D; 100 // NOTE(rnp): vulkan compute will output to a buffer so this won't be relevant 101 u32 max_image_dimension_3D; 102 103 u64 gpu_heap_size; 104 u64 gpu_heap_used; 105 } GPUInfo; 106 107 typedef struct { 108 i64 size; 109 VulkanUsageFlags flags; 110 111 // NOTE(rnp): only required if buffer will be used on multiple timelines 112 VulkanTimeline *timelines_used; 113 u32 timeline_count; 114 115 OSHandle *export; 116 117 str8 label; 118 } GPUBufferAllocateInfo; 119 120 typedef struct { 121 GPUBuffer *gpu_buffer; 122 u64 offset; 123 u64 size; 124 } GPUMemoryBarrierInfo; 125 126 typedef struct { 127 GPUBuffer model; 128 u32 vertex_count; 129 u32 normals_offset; 130 } RenderModel; 131 132 typedef struct { 133 BeamformerShaderResourceKind kind; 134 VulkanHandle handle; 135 u32 slot; 136 } BeamformerShaderResourceInfo; 137 138 #include "threads.c" 139 #include "util_os_ui.c" 140 #include "util_os.c" 141 142 /////////////////////////// 143 // NOTE: vulkan layer API 144 DEBUG_IMPORT void vk_load(OSLibrary vulkan, Arena *memory, Stream *error); 145 146 DEBUG_IMPORT GPUInfo *vk_gpu_info(void); 147 148 DEBUG_IMPORT void vk_buffer_allocate(GPUBuffer *, GPUBufferAllocateInfo *info); 149 DEBUG_IMPORT void vk_buffer_release(GPUBuffer *); 150 DEBUG_IMPORT void vk_buffer_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal); 151 DEBUG_IMPORT void vk_buffer_range_download(void *output, GPUBuffer *, u64 source_offset, u64 size, b32 non_temporal); 152 DEBUG_IMPORT u64 vk_round_up_to_sync_size(u64, u64 min); 153 154 // NOTE: images are 2D only, any other use case should just use a buffer and index in the shader 155 DEBUG_IMPORT void vk_image_allocate(GPUImage *, u32 width, u32 height, u32 mips, u32 samples, VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label); 156 DEBUG_IMPORT void vk_image_release(GPUImage *); 157 158 DEBUG_IMPORT void vk_render_model_allocate(GPUBuffer *, void *indices, u64 index_count, u64 model_size, s8 label); 159 DEBUG_IMPORT void vk_render_model_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal); 160 DEBUG_IMPORT void vk_render_model_release(GPUBuffer *); 161 162 DEBUG_IMPORT void vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count); 163 164 /* NOTE: Pipelines do not have bindings. Data should be passed using push constants. 165 * In particular the push constants should contain pointers to gpu memory using the 166 * BufferDeviceAddress extension. */ 167 // TODO(rnp): change this to accept SPIR-V directly and accept BakeParameters as specialization data 168 DEBUG_IMPORT VulkanHandle vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size); 169 DEBUG_IMPORT b32 vk_pipeline_valid(VulkanHandle); 170 DEBUG_IMPORT void vk_pipeline_release(VulkanHandle); 171 172 DEBUG_IMPORT b32 vk_buffer_needs_sync(GPUBuffer *); 173 174 DEBUG_IMPORT VulkanHandle vk_create_semaphore(OSHandle *export); 175 176 DEBUG_IMPORT b32 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns); 177 DEBUG_IMPORT u64 vk_host_signal_timeline(VulkanTimeline timeline); 178 179 DEBUG_IMPORT VulkanHandle vk_command_begin(VulkanTimeline timeline); 180 DEBUG_IMPORT void vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline); 181 DEBUG_IMPORT void vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count); 182 DEBUG_IMPORT void vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch); 183 DEBUG_IMPORT void vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values); 184 DEBUG_IMPORT void vk_command_timestamp(VulkanHandle command); 185 DEBUG_IMPORT void vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value); 186 // NOTE: extra semaphores only exist for synchronization with OpenGL and will be removed in the future 187 DEBUG_IMPORT u64 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore); 188 189 DEBUG_IMPORT void vk_command_begin_rendering(VulkanHandle command, GPUImage *restrict colour, GPUImage *restrict depth, GPUImage *restrict resolve); 190 DEBUG_IMPORT void vk_command_draw(VulkanHandle command, GPUBuffer *model); 191 DEBUG_IMPORT void vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset); 192 DEBUG_IMPORT void vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth); 193 DEBUG_IMPORT void vk_command_end_rendering(VulkanHandle command); 194 195 DEBUG_IMPORT void vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, GPUBuffer *restrict source, u64 source_offset, i64 size); 196 197 // NOTE: returns array of valid timestamps + 1, first element is the count. 198 // Calling thread may stall until results available. 199 DEBUG_IMPORT u64 * vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena); 200 201 #if BEAMFORMER_RENDERDOC_HOOKS 202 DEBUG_IMPORT void * vk_renderdoc_instance_handle(void); 203 204 DEBUG_IMPORT renderdoc_start_frame_capture_fn *start_frame_capture; 205 DEBUG_IMPORT renderdoc_set_capture_path_template_fn *set_capture_path_template; 206 DEBUG_IMPORT renderdoc_end_frame_capture_fn *end_frame_capture; 207 #define start_renderdoc_capture() do { \ 208 if (set_capture_path_template) set_capture_path_template("captures/ogl.rdc"); \ 209 if (start_frame_capture) start_frame_capture(vk_renderdoc_instance_handle(), 0); \ 210 } while(0) 211 #define end_renderdoc_capture() if (end_frame_capture) end_frame_capture(vk_renderdoc_instance_handle(), 0) 212 #define renderdoc_attached(...) (start_frame_capture != 0) 213 214 #else 215 #define start_renderdoc_capture(...) 216 #define end_renderdoc_capture(...) 217 #define renderdoc_attached(...) (0) 218 #endif 219 220 /////////////////////////////// 221 // NOTE: CUDA Library Bindings 222 223 #define cuda_supported() (cuda_init != cuda_init_stub) 224 #define CUDA_INIT_FN(name) void name(u32 *input_dims, u32 *decoded_dims) 225 typedef CUDA_INIT_FN(cuda_init_fn); 226 CUDA_INIT_FN(cuda_init_stub) {} 227 228 #define CUDA_REGISTER_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo) 229 typedef CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_fn); 230 CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_stub) {} 231 232 #define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx) 233 typedef CUDA_HILBERT_FN(cuda_hilbert_fn); 234 CUDA_HILBERT_FN(cuda_hilbert_stub) {} 235 236 #define CUDA_SET_CHANNEL_MAPPING_FN(name) void name(i16 *channel_mapping) 237 typedef CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_fn); 238 CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_stub) {} 239 240 #define CUDALibraryProcedureList \ 241 X(hilbert, "cuda_hilbert") \ 242 X(init, "init_cuda_configuration") \ 243 X(register_buffers, "register_cuda_buffers") \ 244 X(set_channel_mapping, "cuda_set_channel_mapping") 245 246 #define X(name, ...) DEBUG_IMPORT cuda_## name ##_fn *cuda_## name; 247 CUDALibraryProcedureList 248 #undef X 249 250 ///////////////////////////////////// 251 // NOTE: Core Beamformer Definitions 252 253 #include "beamformer_parameters.h" 254 #include "beamformer_shared_memory.c" 255 256 typedef struct { 257 BeamformerFilterParameters parameters; 258 f32 time_delay; 259 i32 length; 260 GPUBuffer buffer; 261 } BeamformerFilter; 262 263 // X(kind, format, elements) 264 #define BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST \ 265 X(Hadamard, f16, BeamformerMaxChannelCount * BeamformerMaxChannelCount) \ 266 X(FocalVectors, v2, BeamformerMaxChannelCount) \ 267 X(SparseElements, i16, BeamformerMaxChannelCount) \ 268 X(TransmitReceiveOrientations, u16, BeamformerMaxChannelCount) \ 269 270 typedef enum { 271 #define X(k, ...) BeamformerComputeArrayParameterKind_##k, 272 BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST 273 #undef X 274 BeamformerComputeArrayParameterKind_Count 275 } BeamformerComputeArrayParameterKind; 276 277 // NOTE(rnp): only used to calculate offsets, never used directly 278 #define X(name, type, elements) alignas(64) type name[elements]; 279 typedef struct {BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST} BeamformerComputeArrayParameters; 280 #undef X 281 282 typedef struct { 283 uv3 layout; 284 uv3 dispatch; 285 BeamformerDataKind input_data_kind; 286 BeamformerDataKind output_data_kind; 287 BeamformerShaderBakeParameters bake; 288 } BeamformerShaderDescriptor; 289 290 typedef struct BeamformerComputePlan BeamformerComputePlan; 291 struct BeamformerComputePlan { 292 BeamformerComputePipeline pipeline; 293 294 VulkanHandle vulkan_pipelines[BeamformerMaxComputeShaderStages]; 295 296 u32 first_image_shader_index; 297 u32 channel_count; 298 u32 raw_channel_byte_stride; 299 300 u32 dirty_programs; 301 302 BeamformerAcquisitionKind acquisition_kind; 303 u32 acquisition_count; 304 305 u32 rf_size; 306 i32 hadamard_order; 307 b32 iq_pipeline; 308 309 m4 voxel_transform; 310 m4 ui_voxel_transform; 311 312 iv3 output_points; 313 i32 average_frames; 314 315 // TODO(rnp): specialization constants 316 v2 xdc_element_pitch; 317 m4 xdc_transform; 318 // TODO(rnp): probably just compute this everytime 319 m4 das_voxel_transform; 320 321 GPUBuffer array_parameters; 322 323 BeamformerFilter filters[BeamformerFilterSlots]; 324 325 u128 shader_hashes[BeamformerMaxComputeShaderStages]; 326 BeamformerShaderDescriptor shader_descriptors[BeamformerMaxComputeShaderStages]; 327 328 BeamformerComputePlan *next; 329 }; 330 331 typedef struct { 332 u64 upload_complete_values[BeamformerMaxRawDataFramesInFlight]; 333 u64 compute_complete_values[BeamformerMaxRawDataFramesInFlight]; 334 335 GPUBuffer buffer; 336 337 u32 active_rf_size; 338 339 u64 timestamp; 340 341 u64 insertion_index; 342 u64 compute_index; 343 } BeamformerRFBuffer; 344 345 typedef struct { 346 BeamformerComputeStatsTable table; 347 f32 average_times[BeamformerShaderKind_Count]; 348 349 u64 last_rf_timer_count; 350 f32 rf_time_delta_average; 351 352 u32 latest_frame_index; 353 u32 latest_rf_index; 354 } ComputeShaderStats; 355 356 /* TODO(rnp): maybe this also gets used for CPU timing info as well */ 357 typedef enum { 358 ComputeTimingInfoKind_ComputeFrameBegin, 359 ComputeTimingInfoKind_ComputeFrameEnd, 360 ComputeTimingInfoKind_Shader, 361 ComputeTimingInfoKind_RF_Data, 362 } ComputeTimingInfoKind; 363 364 typedef struct { 365 u64 timer_count; 366 ComputeTimingInfoKind kind; 367 union { 368 struct { 369 static_assert(BeamformerShaderKind_Count <= U16_MAX, ""); 370 u16 shader; 371 u16 shader_slot; 372 }; 373 }; 374 } ComputeTimingInfo; 375 376 typedef struct { 377 u32 write_index; 378 u32 read_index; 379 b32 compute_frame_active; 380 381 u32 in_flight_shader_count; 382 BeamformerShaderKind in_flight_shader_ids[BeamformerMaxComputeShaderStages]; 383 384 ComputeTimingInfo buffer[4096]; 385 } ComputeTimingTable; 386 387 typedef struct { 388 BeamformerRFBuffer *rf_buffer; 389 BeamformerSharedMemory *shared_memory; 390 i64 shared_memory_size; 391 ComputeTimingTable *compute_timing_table; 392 i32 *compute_worker_sync; 393 } BeamformerUploadThreadContext; 394 395 typedef struct { 396 u64 buffer_offset; 397 u64 timeline_valid_value; 398 399 /* NOTE: for use when displaying either prebeamformed frames or on the current frame 400 * when we intend to recompute on the next frame */ 401 m4 voxel_transform; 402 403 iv3 points; 404 405 u32 id; 406 u32 compound_count; 407 BeamformerDataKind data_kind; 408 BeamformerAcquisitionKind acquisition_kind; 409 BeamformerViewPlaneTag view_plane_tag; 410 } BeamformerFrame; 411 412 /* NOTE(rnp): backing storage for beamformed frames. The amount of backlog frames 413 * is dependant on the currently requested output size. */ 414 typedef struct { 415 GPUBuffer buffer[1]; 416 417 u64 next_offset; 418 u64 counter; 419 420 BeamformerFrame frames[BeamformerMaxBacklogFrames]; 421 } BeamformerFrameBacklog; 422 423 typedef struct { 424 BeamformerRFBuffer rf_buffer; 425 426 BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlocks]; 427 BeamformerComputePlan *compute_plan_freelist; 428 429 VulkanHandle compute_internal_pipelines[BeamformerShaderKind_ComputeInternalCount]; 430 431 /* NOTE(rnp): used to ping pong data between compute stages. 432 * 433 * Allocate one extra slot for DAS output to allow overlap with the next 434 * channel chunk batch. To obtain optimal overlap we need 2 extra slots 435 * and we need to ping pong submissions between queues. This is not 436 * implemented so we only do 1 extra slot for now. 437 */ 438 #define PING_PONG_BUFFER_SLOTS (2 + 1) 439 GPUBuffer ping_pong_buffer; 440 OSHandle ping_pong_export_handle; 441 u32 ping_pong_input_index; 442 443 f32 processing_progress; 444 b32 processing_compute; 445 446 BeamformerFrameBacklog backlog; 447 } BeamformerComputeContext; 448 449 typedef struct { 450 OSThread handle; 451 452 Arena arena; 453 iptr user_context; 454 i32 sync_variable; 455 b32 awake; 456 } GLWorkerThreadContext; 457 458 typedef enum { 459 BeamformerState_Uninitialized = 0, 460 BeamformerState_Running, 461 BeamformerState_ShouldClose, 462 BeamformerState_Terminated, 463 } BeamformerState; 464 465 typedef struct { 466 BeamformerState state; 467 468 iv2 window_size; 469 470 Arena arena; 471 Arena ui_backing_store; 472 void *ui; 473 u32 ui_dirty_parameter_blocks; 474 475 u64 frame_timestamp; 476 477 Stream error_stream; 478 479 BeamformerSharedMemory *shared_memory; 480 i64 shared_memory_size; 481 482 BeamformerFrame *latest_frame; 483 484 // TODO(rnp): track elsewhere 485 b32 render_shader_updated; 486 487 /* NOTE: this will only be used when we are averaging */ 488 u32 averaged_frame_index; 489 BeamformerFrame averaged_frames[2]; 490 491 GLWorkerThreadContext upload_worker; 492 GLWorkerThreadContext compute_worker; 493 494 BeamformerComputeContext compute_context; 495 496 ComputeShaderStats compute_shader_stats[1]; 497 ComputeTimingTable compute_timing_table[1]; 498 499 BeamformWorkQueue beamform_work_queue[1]; 500 501 // TODO(rnp): this should go to the UI eventually 502 OSWindow main_window; 503 } BeamformerCtx; 504 #define BeamformerContextMemory(m) (BeamformerCtx *)align_pointer_up((m), alignof(BeamformerCtx)); 505 506 typedef enum { 507 BeamformerFileReloadKind_ComputeInternalShader, 508 BeamformerFileReloadKind_ComputeShader, 509 BeamformerFileReloadKind_RenderShader, 510 } BeamformerFileReloadKind; 511 512 typedef struct { 513 BeamformerShaderKind shader; 514 VulkanHandle * pipeline; 515 } BeamformerShaderReloadData; 516 517 typedef struct { 518 BeamformerShaderKind shader; 519 VulkanShaderKind shader_kind; 520 521 // NOTE(rnp): based on BakeShaders compile time value 522 s8 filename_or_data; 523 524 BeamformerShaderDescriptor *shader_descriptor; 525 526 uv3 layout; 527 } BeamformerShaderReloadInfo; 528 529 typedef struct { 530 BeamformerFileReloadKind kind; 531 union { 532 BeamformerShaderReloadData shader_reload; 533 }; 534 } BeamformerFileReloadContext; 535 536 #define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(BeamformerCtx *ctx, Arena *arena) 537 typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn); 538 539 #define BEAMFORMER_RF_UPLOAD_FN(name) void name(BeamformerUploadThreadContext *ctx) 540 typedef BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload_fn); 541 542 #define BEAMFORMER_DEBUG_UI_DEINIT_FN(name) void name(BeamformerCtx *ctx) 543 typedef BEAMFORMER_DEBUG_UI_DEINIT_FN(beamformer_debug_ui_deinit_fn); 544 545 #endif /* BEAMFORMER_INTERNAL_H */