vulkan.c (102619B)
1 /* See LICENSE for license details. */ 2 // TODO(rnp) 3 // [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays 4 // [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float 5 // [ ]: VK_KHR_robustness2 probably shouldn't be required but it also might not matter 6 7 #include "beamformer_internal.h" 8 #include "vulkan.h" 9 #include "external/glslang/glslang/Include/glslang_c_interface.h" 10 11 #define ForceSingleQueue (0) 12 13 #define glslang_info(s) s8("[glslang] " s) 14 #define vulkan_info(s) s8("[vulkan] " s) 15 16 #define ValidVulkanHandle(h) ((h).value[0] != 0) 17 18 #define MaxCommandBuffersInFlight BeamformerMaxRawDataFramesInFlight 19 #define MaxCommandBufferTimestamps (1024) 20 21 typedef enum { 22 VulkanQueueKind_Graphics, 23 VulkanQueueKind_Compute, 24 VulkanQueueKind_Transfer, 25 VulkanQueueKind_Count, 26 } VulkanQueueKind; 27 28 typedef enum { 29 VulkanMemoryKind_Device, 30 VulkanMemoryKind_BAR, 31 VulkanMemoryKind_Host, 32 VulkanMemoryKind_Count, 33 } VulkanMemoryKind; 34 35 typedef struct { 36 VkDeviceMemory memory; 37 VkBuffer buffer; 38 u64 memory_size; 39 40 void * host_pointer; 41 42 VulkanMemoryKind memory_kind; 43 44 // NOTE: only used when the buffer is backing a VulkanRenderModel. 45 VkIndexType index_type; 46 } VulkanBuffer; 47 48 typedef struct { 49 VkDeviceMemory memory; 50 VkImage image; 51 VkImageView view; 52 } VulkanImage; 53 54 typedef struct { 55 VkPipeline pipeline; 56 VkPipelineLayout layout; 57 VkShaderStageFlags stage_flags; 58 } VulkanPipeline; 59 60 typedef struct { 61 VkSemaphore semaphore; 62 u64 value; 63 } VulkanSemaphore; 64 65 typedef struct { 66 VulkanTimeline timeline; 67 u32 buffer_index; 68 69 // NOTE(rnp): since there may not be QueueKind_Count queues, when putting values into this 70 // array you must be careful to map through the queue_indices array in the vulkan_context. 71 u64 in_flight_wait_values[VulkanQueueKind_Count]; 72 } VulkanCommandBuffer; 73 74 typedef enum { 75 VulkanEntityKind_Buffer, 76 VulkanEntityKind_CommandBuffer, 77 VulkanEntityKind_Image, 78 VulkanEntityKind_Pipeline, 79 VulkanEntityKind_RenderModel, 80 VulkanEntityKind_Semaphore, 81 } VulkanEntityKind; 82 83 typedef struct VulkanEntity VulkanEntity; 84 struct VulkanEntity { 85 VulkanEntity * next; 86 VulkanEntityKind kind; 87 union { 88 VulkanBuffer buffer; 89 VulkanCommandBuffer command_buffer; 90 VulkanImage image; 91 VulkanPipeline pipeline; 92 VulkanSemaphore semaphore; 93 } as; 94 }; 95 96 typedef alignas(64) struct { 97 i32 lock; 98 99 u16 queue_family; 100 u16 queue_index; 101 VkQueue queue; 102 103 VulkanSemaphore timeline_semaphore; 104 105 VkPipelineStageFlags2 pipeline_stage_flags; 106 } VulkanQueue; 107 static_assert(alignof(VulkanQueue) == 64, "VulkanQueue must be placed on its own cacheline"); 108 109 typedef alignas(64) struct { 110 i32 lock; 111 u32 next_index; 112 113 VulkanPipeline *bound_pipeline; 114 115 VkCommandPool handle; 116 VkQueryPool query_pool; 117 VkCommandBuffer buffers[MaxCommandBuffersInFlight]; 118 119 u64 submission_values[MaxCommandBuffersInFlight]; 120 u32 queries_occupied[MaxCommandBuffersInFlight]; 121 } VulkanCommandPool; 122 123 typedef struct { 124 Arena arena; 125 i32 arena_lock; 126 127 VkInstance handle; 128 VkDevice device; 129 VkPhysicalDevice physical_device; 130 131 VkDescriptorPool descriptor_pool; 132 VkDescriptorSetLayout descriptor_set_layouts[BeamformerShaderResourceKind_Count]; 133 VkDescriptorSet descriptor_sets[BeamformerShaderResourceKind_Count]; 134 // NOTE(rnp): must store these if we want to allow partial updates easily 135 VkDescriptorBufferInfo descriptor_buffer_infos[BeamformerShaderBufferSlot_Count]; 136 137 // NOTE(rnp): fallback for when a shader fails to compile 138 VulkanPipeline default_compute_pipeline; 139 VulkanPipeline default_graphics_pipeline; 140 141 GPUInfo gpu_info; 142 143 struct { 144 u64 max_allocation_size; 145 u64 non_coherent_atom_size; 146 u8 gpu_heap_index; 147 i8 memory_type_indices[VulkanMemoryKind_Count]; 148 b8 memory_host_coherent[VulkanMemoryKind_Count]; 149 static_assert(VK_MAX_MEMORY_HEAPS < I8_MAX, ""); 150 static_assert(VK_MAX_MEMORY_TYPES < U8_MAX, ""); 151 } memory_info; 152 153 VulkanCommandPool * command_pools[VulkanTimeline_Count]; 154 VulkanQueue * queues[VulkanQueueKind_Count]; 155 // NOTE(rnp): there are a few places in the code where simply going through the queues map 156 // is not sufficient. those places need to know of the unique queues which unique queue 157 // is being referred to. that code uses this map instead. 158 u16 queue_indices[VulkanQueueKind_Count]; 159 u16 unique_queues; 160 161 VkFormat swap_chain_image_format; 162 VkFormat depth_stencil_format; 163 164 VulkanEntity * entity_freelist; 165 Arena entity_arena; 166 i32 entity_lock; 167 } VulkanContext; 168 169 read_only global const char *vk_required_instance_extensions[] = { 170 }; 171 172 #if OS_WINDOWS 173 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \ 174 X("VK_KHR_external_memory_win32") \ 175 X("VK_KHR_external_semaphore_win32") \ 176 177 #else 178 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \ 179 X("VK_KHR_external_memory_fd") \ 180 X("VK_KHR_external_semaphore_fd") \ 181 182 #endif 183 184 #define VK_REQUIRED_DEVICE_EXTENSIONS_LIST \ 185 X("VK_KHR_16bit_storage") \ 186 X("VK_KHR_external_memory") \ 187 X("VK_KHR_external_semaphore") \ 188 X("VK_KHR_robustness2") \ 189 X("VK_KHR_storage_buffer_storage_class") \ 190 X("VK_KHR_timeline_semaphore") \ 191 VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST 192 193 #define X(str) s8_comp(str), 194 read_only global s8 vk_required_device_extensions[] = {VK_REQUIRED_DEVICE_EXTENSIONS_LIST}; 195 #undef X 196 197 #define VK_OPTIONAL_DEVICE_EXTENSIONS_LIST \ 198 X(VK_KHR, cooperative_matrix) \ 199 200 #define X(p, s, ...) s8_comp(#p "_" #s), 201 read_only global s8 vk_optional_device_extensions[] = {VK_OPTIONAL_DEVICE_EXTENSIONS_LIST}; 202 #undef X 203 204 #define VK_REQUIRED_PHYSICAL_FEATURES \ 205 X(shaderInt16) \ 206 X(shaderInt64) \ 207 208 #define VK_REQUIRED_PHYSICAL_11_FEATURES \ 209 X(storageBuffer16BitAccess) \ 210 211 #define VK_REQUIRED_PHYSICAL_12_FEATURES \ 212 X(bufferDeviceAddress) \ 213 X(shaderFloat16) \ 214 X(timelineSemaphore) \ 215 X(vulkanMemoryModel) \ 216 217 #define VK_REQUIRED_PHYSICAL_13_FEATURES \ 218 X(dynamicRendering) \ 219 X(synchronization2) \ 220 221 #define VK_DEBUG_EXTENSIONS \ 222 X(VK_KHR, shader_non_semantic_info) \ 223 X(VK_KHR, shader_relaxed_extended_instruction) \ 224 225 #define X(p, s, ...) s8_comp(#p "_" #s), 226 read_only global s8 vk_debug_extensions[] = {VK_DEBUG_EXTENSIONS}; 227 #undef X 228 229 #define VK_INSTANCE_DEBUG_EXTENSIONS_LIST \ 230 X(VK_EXT, debug_utils) \ 231 232 #define X(p, s, ...) s8_comp(#p "_" #s), 233 read_only global s8 vk_instance_debug_extensions[] = {VK_INSTANCE_DEBUG_EXTENSIONS_LIST}; 234 #undef X 235 236 #if BEAMFORMER_DEBUG 237 #define VK_VALIDATION_LAYERS_LIST \ 238 X(KHRONOS, validation) \ 239 240 #else 241 #define VK_VALIDATION_LAYERS_LIST 242 #endif 243 244 read_only global str8 vk_validation_layers[] = { 245 #define X(vendor, name, ...) str8_comp("VK_LAYER_" #vendor "_" #name), 246 VK_VALIDATION_LAYERS_LIST 247 #undef X 248 }; 249 250 global struct { 251 u32 driver_api_version; 252 union { 253 struct { 254 #define X(_, name, ...) b8 name; 255 VK_OPTIONAL_DEVICE_EXTENSIONS_LIST 256 #undef X 257 }; 258 b8 E[countof(vk_optional_device_extensions)]; 259 } optional; 260 261 union { 262 struct { 263 #define X(_, name, ...) b8 name; 264 VK_DEBUG_EXTENSIONS 265 #undef X 266 }; 267 b8 E[countof(vk_debug_extensions)]; 268 } debug; 269 270 union { 271 struct { 272 #define X(_, name, ...) b8 name; 273 VK_INSTANCE_DEBUG_EXTENSIONS_LIST 274 #undef X 275 }; 276 b8 E[countof(vk_instance_debug_extensions)]; 277 } instance; 278 279 #if BEAMFORMER_DEBUG 280 struct { 281 union { 282 struct { 283 #define X(_, name, ...) b8 name; 284 VK_VALIDATION_LAYERS_LIST 285 #undef X 286 }; 287 b8 E[countof(vk_validation_layers)]; 288 } enabled; 289 290 union { 291 struct { 292 #define X(_, name, ...) u32 name; 293 VK_VALIDATION_LAYERS_LIST 294 #undef X 295 }; 296 u32 E[countof(vk_validation_layers)]; 297 } version; 298 } layers; 299 #endif 300 } vulkan_config; 301 302 #define MAX_ENABLED_EXTENSIONS ( countof(vk_required_device_extensions) \ 303 + countof(vk_optional_device_extensions) \ 304 + countof(vk_debug_extensions) \ 305 ) 306 307 global VulkanContext vulkan_context[1]; 308 309 /* NOTE(rnp): the idea here is to set reasonable development constraints. 310 * They should probably not match one to one with the maximums of the dev 311 * machine's hardware. Instead these are here to cause compile time failure 312 * for features which are not expected to work everywhere. */ 313 global glslang_resource_t glslc_resource_constraints[1] = {{ 314 .max_compute_work_group_count_x = 65535, 315 .max_compute_work_group_count_y = 65535, 316 .max_compute_work_group_count_z = 65535, 317 .max_compute_work_group_size_x = 1024, 318 .max_compute_work_group_size_y = 1024, 319 .max_compute_work_group_size_z = 1024, 320 321 // NOTE: taken from glslang defaults 322 .max_lights = 32, 323 .max_clip_planes = 6, 324 .max_texture_units = 32, 325 .max_texture_coords = 32, 326 .max_vertex_attribs = 64, 327 .max_vertex_uniform_components = 4096, 328 .max_varying_floats = 64, 329 .max_vertex_texture_image_units = 32, 330 .max_combined_texture_image_units = 80, 331 .max_texture_image_units = 32, 332 .max_fragment_uniform_components = 4096, 333 .max_draw_buffers = 32, 334 .max_vertex_uniform_vectors = 128, 335 .max_varying_vectors = 8, 336 .max_fragment_uniform_vectors = 16, 337 .max_vertex_output_vectors = 16, 338 .max_fragment_input_vectors = 15, 339 .min_program_texel_offset = -8, 340 .max_program_texel_offset = 7, 341 .max_clip_distances = 8, 342 .max_compute_uniform_components = 1024, 343 .max_compute_texture_image_units = 16, 344 .max_compute_image_uniforms = 8, 345 .max_compute_atomic_counters = 8, 346 .max_compute_atomic_counter_buffers = 1, 347 .max_varying_components = 60, 348 .max_vertex_output_components = 64, 349 .max_fragment_input_components = 128, 350 .max_image_units = 8, 351 .max_combined_image_units_and_fragment_outputs = 8, 352 .max_combined_shader_output_resources = 8, 353 .max_image_samples = 0, 354 .max_vertex_image_uniforms = 0, 355 .max_fragment_image_uniforms = 8, 356 .max_combined_image_uniforms = 8, 357 .max_viewports = 16, 358 .max_vertex_atomic_counters = 0, 359 .max_fragment_atomic_counters = 8, 360 .max_combined_atomic_counters = 8, 361 .max_atomic_counter_bindings = 1, 362 .max_vertex_atomic_counter_buffers = 0, 363 .max_fragment_atomic_counter_buffers = 1, 364 .max_combined_atomic_counter_buffers = 1, 365 .max_atomic_counter_buffer_size = 16384, 366 .max_transform_feedback_buffers = 4, 367 .max_transform_feedback_interleaved_components = 64, 368 .max_cull_distances = 8, 369 .max_combined_clip_and_cull_distances = 8, 370 .max_samples = 4, 371 .max_mesh_output_vertices_ext = 256, 372 .max_mesh_output_primitives_ext = 256, 373 .max_mesh_work_group_size_x_ext = 128, 374 .max_mesh_work_group_size_y_ext = 128, 375 .max_mesh_work_group_size_z_ext = 128, 376 .max_task_work_group_size_x_ext = 128, 377 .max_task_work_group_size_y_ext = 128, 378 .max_task_work_group_size_z_ext = 128, 379 .max_mesh_view_count_ext = 4, 380 .max_dual_source_draw_buffers_ext = 1, 381 382 .limits = { 383 .non_inductive_for_loops = 1, 384 .while_loops = 1, 385 .do_while_loops = 1, 386 .general_uniform_indexing = 1, 387 .general_attribute_matrix_vector_indexing = 1, 388 .general_varying_indexing = 1, 389 .general_sampler_indexing = 1, 390 .general_variable_indexing = 1, 391 .general_constant_matrix_vector_indexing = 1, 392 }, 393 }}; 394 395 #if BEAMFORMER_RENDERDOC_HOOKS 396 DEBUG_IMPORT void * 397 vk_renderdoc_instance_handle(void) 398 { 399 return *((void **)vulkan_context->handle); 400 } 401 #endif 402 403 #if BEAMFORMER_DEBUG 404 #define vk_label_object(k, h, label, extra) vk_label_object_(VK_OBJECT_TYPE_##k, (u64)h, label, extra) 405 function void 406 vk_label_object_(VkObjectType kind, u64 handle, str8 label, str8 extra) 407 { 408 local_persist u8 buffer[1024]; 409 Stream sb = arena_stream(arena_from_memory(buffer, sizeof(buffer))); 410 if (vulkan_config.instance.debug_utils && label.length > 0) { 411 stream_append_s8s(&sb, s8_from_str8(label), s8(" ("), s8_from_str8(extra), s8(")")); 412 stream_append_byte(&sb, 0); 413 if (!sb.errors) { 414 VkDebugUtilsObjectNameInfoEXT object_name_info = { 415 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, 416 .objectType = kind, 417 .objectHandle = handle, 418 .pObjectName = (char *)sb.data, 419 }; 420 vkSetDebugUtilsObjectNameEXT(vulkan_context->device, &object_name_info); 421 } 422 } 423 } 424 #else 425 #define vk_label_object(...) 426 #define vk_label_object_(...) 427 #endif 428 429 function VulkanEntity * 430 vk_entity_allocate(VulkanEntityKind kind) 431 { 432 VulkanEntity *result = 0; 433 DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock)) 434 { 435 result = SLLPopFreelist(vulkan_context->entity_freelist); 436 if (!result) result = push_array_no_zero(&vulkan_context->entity_arena, VulkanEntity, 1); 437 } 438 439 zero_struct(result); 440 result->kind = kind; 441 return result; 442 } 443 444 function void 445 vk_entity_release(VulkanEntity *entity) 446 { 447 DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock)) 448 { 449 SLLStackPush(vulkan_context->entity_freelist, entity, next); 450 } 451 } 452 453 function void * 454 vk_entity_data(VulkanHandle h, VulkanEntityKind kind) 455 { 456 VulkanEntity *e = (VulkanEntity *)h.value[0]; 457 assert(ValidVulkanHandle(h) && e->kind == kind); 458 return &e->as; 459 } 460 461 function VkCommandBuffer 462 vk_command_buffer(VulkanHandle h) 463 { 464 VulkanCommandBuffer *vcb = vk_entity_data(h, VulkanEntityKind_CommandBuffer); 465 VulkanCommandPool *vcp = vulkan_context->command_pools[vcb->timeline]; 466 VkCommandBuffer result = vcp->buffers[vcb->buffer_index]; 467 return result; 468 } 469 470 #define glslang_log(a, ...) glslang_log_(a, arg_list(s8, __VA_ARGS__)) 471 function void 472 glslang_log_(Arena arena, s8 *items, uz count) 473 { 474 Stream sb = arena_stream(arena); 475 stream_append_s8(&sb, glslang_info("")); 476 stream_append_s8s_(&sb, items, count); 477 if (sb.data[sb.widx - 1] != '\n') stream_append_byte(&sb, '\n'); 478 os_console_log(sb.data, sb.widx); 479 } 480 481 function s8 482 glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name) 483 { 484 /* NOTE(rnp): glslang's garbage c interface doesn't expose internal usage of strings with length */ 485 assert(shader_text.data[shader_text.len] == 0); 486 487 glslang_input_t input = { 488 .language = GLSLANG_SOURCE_GLSL, 489 .stage = kind, 490 .client = GLSLANG_CLIENT_VULKAN, 491 .client_version = GLSLANG_TARGET_VULKAN_1_4, 492 .target_language = GLSLANG_TARGET_SPV, 493 .target_language_version = GLSLANG_TARGET_SPV_1_6, 494 .code = (c8 *)shader_text.data, 495 .default_version = 460, 496 .default_profile = GLSLANG_NO_PROFILE, 497 .force_default_version_and_profile = 0, 498 .forward_compatible = 0, 499 .messages = GLSLANG_MSG_DEFAULT_BIT, 500 .resource = glslc_resource_constraints, 501 }; 502 glslang_shader_t *shader = glslang_shader_create(&input); 503 504 s8 error = {0}; 505 if (glslang_shader_preprocess(shader, &input)) { 506 if (!glslang_shader_parse(shader, &input)) 507 error = s8("parsing failed"); 508 } else { 509 error = s8("preprocessing failed"); 510 } 511 512 if (error.len) { 513 glslang_log(*arena, name, s8(": "), error, s8("\n"), 514 c_str_to_s8((c8 *)glslang_shader_get_info_log(shader)), 515 c_str_to_s8((c8 *)glslang_shader_get_info_debug_log(shader))); 516 glslang_shader_delete(shader); 517 shader = 0; 518 } 519 520 s8 result = {0}; 521 if (shader) { 522 glslang_program_t *program = glslang_program_create(); 523 glslang_program_add_shader(program, shader); 524 i32 messages = GLSLANG_MSG_DEBUG_INFO_BIT|GLSLANG_MSG_SPV_RULES_BIT|GLSLANG_MSG_VULKAN_RULES_BIT; 525 if (glslang_program_link(program, messages)) { 526 glslang_spv_options_t options = {.validate = 1,}; 527 528 if (vulkan_config.debug.shader_non_semantic_info && 529 vulkan_config.debug.shader_relaxed_extended_instruction) 530 { 531 options.generate_debug_info = 1; 532 options.emit_nonsemantic_shader_debug_info = 1; 533 options.emit_nonsemantic_shader_debug_source = 1; 534 } 535 536 glslang_program_add_source_text(program, kind, (c8 *)shader_text.data, shader_text.len); 537 glslang_program_SPIRV_generate_with_options(program, kind, &options); 538 539 u32 words = glslang_program_SPIRV_get_size(program); 540 result.data = (u8 *)push_array(arena, u32, words); 541 result.len = words * sizeof(u32); 542 glslang_program_SPIRV_get(program, (u32 *)result.data); 543 544 s8 spirv_msg = c_str_to_s8((c8 *)glslang_program_SPIRV_get_messages(program)); 545 if (spirv_msg.len) glslang_log(*arena, name, s8(": spirv info: "), spirv_msg); 546 } else { 547 glslang_log(*arena, name, s8(": shader linking failed\n"), 548 c_str_to_s8((c8 *)glslang_program_get_info_log(program)), 549 c_str_to_s8((c8 *)glslang_program_get_info_debug_log(program))); 550 } 551 glslang_shader_delete(shader); 552 glslang_program_delete(program); 553 } 554 555 return result; 556 } 557 558 function u32 559 vk_shader_kind_to_glslang_shader_kind(u32 kind) 560 { 561 u32 result = ctz_u64(kind); 562 return result; 563 } 564 565 function VkShaderModule 566 vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name) 567 { 568 VkShaderModule result = {0}; 569 s8 spirv = glsl_to_spirv(&arena, vk_shader_kind_to_glslang_shader_kind(kind), text, name); 570 VkShaderModuleCreateInfo create_info = { 571 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 572 .codeSize = (uz)spirv.len, 573 .pCode = (u32 *)spirv.data, 574 }; 575 if (spirv.len > 0) vkCreateShaderModule(vulkan_context->device, &create_info, 0, &result); 576 577 return result; 578 } 579 580 function VkShaderStageFlags 581 vk_stage_flags_from_shader_kind(VulkanShaderKind kind) 582 { 583 read_only local_persist VkShaderStageFlags map[VulkanShaderKind_Count + 1] = { 584 [VulkanShaderKind_Vertex] = VK_SHADER_STAGE_VERTEX_BIT, 585 [VulkanShaderKind_Mesh] = VK_SHADER_STAGE_MESH_BIT_EXT, 586 [VulkanShaderKind_Fragment] = VK_SHADER_STAGE_FRAGMENT_BIT, 587 [VulkanShaderKind_Compute] = VK_SHADER_STAGE_COMPUTE_BIT, 588 [VulkanShaderKind_Count] = 0, 589 }; 590 VkShaderStageFlags result = map[Clamp((u32)kind, 0, VulkanShaderKind_Count)]; 591 return result; 592 } 593 594 function VulkanPipeline 595 vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_constants_size) 596 { 597 VulkanPipeline result = {.stage_flags = VK_SHADER_STAGE_COMPUTE_BIT}; 598 VkShaderModule module = vk_compile_shader_module(arena, VK_SHADER_STAGE_COMPUTE_BIT, text, name); 599 if (module) { 600 VkPushConstantRange push_constant_range = { 601 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 602 .offset = 0, 603 .size = push_constants_size, 604 }; 605 606 VkPipelineLayoutCreateInfo pipeline_layout_create_info = { 607 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 608 .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), 609 .pSetLayouts = vulkan_context->descriptor_set_layouts, 610 .pushConstantRangeCount = push_constants_size ? 1 : 0, 611 .pPushConstantRanges = push_constants_size ? &push_constant_range : 0, 612 }; 613 614 vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_create_info, 0, &result.layout); 615 616 VkComputePipelineCreateInfo pipeline_create_info = { 617 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 618 .layout = result.layout, 619 .stage = { 620 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 621 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 622 .module = module, 623 .pName = "main", 624 }, 625 }; 626 627 vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline); 628 629 vk_label_object(PIPELINE, result.pipeline, str8_from_s8(name), str8("Pipeline")); 630 vk_label_object(PIPELINE_LAYOUT, result.layout, str8_from_s8(name), str8("Pipeline Layout")); 631 vk_label_object(SHADER_MODULE, module, str8_from_s8(name), str8("Module")); 632 633 vkDestroyShaderModule(vulkan_context->device, module, 0); 634 } 635 if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline; 636 637 return result; 638 } 639 640 function VulkanPipeline 641 vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size) 642 { 643 assume(count == 2); 644 645 VulkanPipeline result = {0}; 646 VkShaderModule modules[2]; 647 648 modules[0] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[0].kind), 649 infos[0].text, infos[0].name); 650 modules[1] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[1].kind), 651 infos[1].text, infos[1].name); 652 if (modules[0] && modules[1]) { 653 result.stage_flags = vk_stage_flags_from_shader_kind(infos[0].kind) 654 | vk_stage_flags_from_shader_kind(infos[1].kind); 655 656 VkPushConstantRange pcr = { 657 .stageFlags = result.stage_flags, 658 .offset = 0, 659 .size = push_constants_size, 660 }; 661 662 VkPipelineLayoutCreateInfo pipeline_layout_info = { 663 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 664 .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), 665 .pSetLayouts = vulkan_context->descriptor_set_layouts, 666 .pushConstantRangeCount = push_constants_size ? 1 : 0, 667 .pPushConstantRanges = push_constants_size ? &pcr : 0, 668 }; 669 670 vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_info, 0, &result.layout); 671 672 VkPipelineShaderStageCreateInfo shader_stage_create_infos[2] = { 673 { 674 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 675 .stage = vk_stage_flags_from_shader_kind(infos[0].kind), 676 .module = modules[0], 677 .pName = "main", 678 }, 679 { 680 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 681 .stage = vk_stage_flags_from_shader_kind(infos[1].kind), 682 .module = modules[1], 683 .pName = "main", 684 }, 685 }; 686 687 VkPipelineVertexInputStateCreateInfo vertex_input_info = { 688 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 689 }; 690 691 VkPipelineInputAssemblyStateCreateInfo input_assembly_info = { 692 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 693 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 694 }; 695 696 VkPipelineViewportStateCreateInfo viewport_info = { 697 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 698 .viewportCount = 1, 699 .scissorCount = 1, 700 }; 701 702 VkPipelineRasterizationStateCreateInfo rasterization_info = { 703 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 704 .polygonMode = VK_POLYGON_MODE_FILL, 705 .lineWidth = 1.0f, 706 .cullMode = VK_CULL_MODE_BACK_BIT, 707 .frontFace = VK_FRONT_FACE_CLOCKWISE, 708 }; 709 710 VkPipelineMultisampleStateCreateInfo multisampling_info = { 711 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 712 .rasterizationSamples = vulkan_context->gpu_info.max_msaa_samples, 713 }; 714 715 VkPipelineDepthStencilStateCreateInfo depth_test_create_info = { 716 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 717 .depthTestEnable = 1, 718 .depthWriteEnable = 1, 719 .depthCompareOp = VK_COMPARE_OP_LESS, 720 .depthBoundsTestEnable = 1, 721 .stencilTestEnable = 0, 722 .front = {0}, 723 .back = {0}, 724 .minDepthBounds = 0.0f, 725 .maxDepthBounds = 1.0f, 726 }; 727 728 u32 colour_mask = VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT; 729 VkPipelineColorBlendAttachmentState blend_state = { 730 .colorWriteMask = colour_mask, 731 .blendEnable = 1, 732 .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA, 733 .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, 734 .colorBlendOp = VK_BLEND_OP_ADD, 735 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, 736 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, 737 .alphaBlendOp = VK_BLEND_OP_ADD, 738 }; 739 740 VkPipelineColorBlendStateCreateInfo colour_blend_state_create = { 741 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 742 .logicOpEnable = 0, 743 .logicOp = VK_LOGIC_OP_COPY, 744 .attachmentCount = 1, 745 .pAttachments = &blend_state, 746 }; 747 748 VkDynamicState dynamic_states[] = { 749 VK_DYNAMIC_STATE_VIEWPORT, 750 VK_DYNAMIC_STATE_SCISSOR, 751 }; 752 753 VkPipelineDynamicStateCreateInfo dynamic_state_info = { 754 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 755 .dynamicStateCount = countof(dynamic_states), 756 .pDynamicStates = dynamic_states, 757 }; 758 759 //VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_SRGB; 760 VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_UNORM; 761 VkPipelineRenderingCreateInfo rendering_create_info = { 762 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, 763 .colorAttachmentCount = 1, 764 .pColorAttachmentFormats = &colour_attachment_format, 765 .depthAttachmentFormat = vulkan_context->depth_stencil_format, 766 .stencilAttachmentFormat = vulkan_context->depth_stencil_format, 767 }; 768 769 VkGraphicsPipelineCreateInfo pci = { 770 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 771 .pNext = &rendering_create_info, 772 .stageCount = countof(shader_stage_create_infos), 773 .pStages = shader_stage_create_infos, 774 .pVertexInputState = &vertex_input_info, 775 .pInputAssemblyState = &input_assembly_info, 776 .pViewportState = &viewport_info, 777 .pRasterizationState = &rasterization_info, 778 .pMultisampleState = &multisampling_info, 779 .pDepthStencilState = &depth_test_create_info, 780 .pColorBlendState = &colour_blend_state_create, 781 .pDynamicState = &dynamic_state_info, 782 .layout = result.layout, 783 }; 784 785 vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline); 786 787 s8 extras[] = { 788 [VulkanShaderKind_Vertex] = s8_comp("Vertex Module"), 789 [VulkanShaderKind_Mesh] = s8_comp("Mesh Module"), 790 [VulkanShaderKind_Fragment] = s8_comp("Fragment Module"), 791 }; 792 assert(infos[0].kind < countof(extras)); 793 assert(infos[1].kind < countof(extras)); 794 795 vk_label_object(PIPELINE, result.pipeline, str8_from_s8(infos[0].name), str8("Pipeline")); 796 vk_label_object(PIPELINE_LAYOUT, result.layout, str8_from_s8(infos[0].name), str8("Pipeline Layout")); 797 //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]); 798 //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]); 799 } 800 801 if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0); 802 if (modules[1]) vkDestroyShaderModule(vulkan_context->device, modules[1], 0); 803 804 if (result.pipeline == 0) result = vulkan_context->default_graphics_pipeline; 805 806 return result; 807 } 808 809 function VulkanSemaphore 810 vk_make_semaphore(OSHandle *export) 811 { 812 VulkanContext *vk = vulkan_context; 813 814 VkSemaphoreCreateInfo sci = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; 815 VkExportSemaphoreCreateInfo esci = { 816 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, 817 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT 818 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 819 }; 820 VkSemaphoreTypeCreateInfo stc = { 821 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, 822 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, 823 }; 824 825 if (export) sci.pNext = &esci; 826 else sci.pNext = &stc; 827 828 VulkanSemaphore result = {0}; 829 830 vkCreateSemaphore(vk->device, &sci, 0, &result.semaphore); 831 832 if (export) { 833 if (OS_WINDOWS) { 834 VkSemaphoreGetWin32HandleInfoKHR ghi = { 835 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, 836 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, 837 .semaphore = result.semaphore, 838 }; 839 void *handle; 840 vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle); 841 export->value[0] = (u64)handle; 842 } else { 843 VkSemaphoreGetFdInfoKHR ghi = { 844 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, 845 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 846 .semaphore = result.semaphore, 847 }; 848 i32 handle; 849 vkGetSemaphoreFdKHR(vk->device, &ghi, &handle); 850 export->value[0] = (u64)handle; 851 } 852 } 853 854 return result; 855 } 856 857 function void 858 vk_release_memory(VkDeviceMemory memory, u64 size) 859 { 860 VulkanContext *vk = vulkan_context; 861 vkFreeMemory(vk->device, memory, 0); 862 atomic_add_u64(&vk->gpu_info.gpu_heap_used, -size); 863 } 864 865 function b32 866 vk_allocate_memory(VkDeviceMemory *memory, u64 size, VulkanMemoryKind kind, VkMemoryAllocateFlags flags, 867 VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, OSHandle *export) 868 { 869 VulkanContext *vk = vulkan_context; 870 871 VkExportMemoryAllocateInfo export_info = { 872 .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, 873 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 874 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 875 }; 876 877 VkMemoryAllocateFlagsInfo memory_allocate_flags_info = { 878 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, 879 .flags = flags, 880 .pNext = dedicated_allocate_info, 881 }; 882 883 if (export) { 884 export_info.pNext = dedicated_allocate_info; 885 memory_allocate_flags_info.pNext = &export_info; 886 } 887 888 VkMemoryAllocateInfo memory_allocate_info = { 889 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 890 .allocationSize = size, 891 .memoryTypeIndex = vk->memory_info.memory_type_indices[kind], 892 .pNext = &memory_allocate_flags_info, 893 }; 894 895 b32 result = vkAllocateMemory(vk->device, &memory_allocate_info, 0, memory) == VK_SUCCESS; 896 if (result) { 897 atomic_add_u64(&vk->gpu_info.gpu_heap_used, memory_allocate_info.allocationSize); 898 899 if (export) { 900 if (OS_WINDOWS) { 901 VkMemoryGetWin32HandleInfoKHR handle_info = { 902 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, 903 .memory = *memory, 904 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, 905 }; 906 void *handle; 907 vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle); 908 export->value[0] = (u64)handle; 909 } else { 910 VkMemoryGetFdInfoKHR fd_info = { 911 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, 912 .memory = *memory, 913 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 914 }; 915 i32 fd; 916 vkGetMemoryFdKHR(vk->device, &fd_info, &fd); 917 export->value[0] = (u64)fd; 918 } 919 } 920 } 921 return result; 922 } 923 924 function u32 925 vk_index_size(VkIndexType type) 926 { 927 u32 result = 0; 928 switch (type) { 929 case VK_INDEX_TYPE_UINT16:{ result = 2; }break; 930 case VK_INDEX_TYPE_UINT32:{ result = 4; }break; 931 InvalidDefaultCase; 932 } 933 return result; 934 } 935 936 typedef struct { 937 GPUBuffer *gpu_buffer; 938 u64 size; 939 VulkanUsageFlags flags; 940 u32 queue_family_count; 941 u32 queue_family_indices[VulkanTimeline_Count]; 942 VkIndexType index_type; 943 OSHandle *export; 944 str8 label; 945 } VulkanBufferAllocateInfo; 946 947 function b32 948 vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai) 949 { 950 VulkanContext *vk = vulkan_context; 951 952 // TODO(rnp): this probably should be handled, its usually 4GB. likely 953 // need to chain multiple allocations and handle it in shader code 954 u64 clamp_size = vk->memory_info.max_allocation_size & ~(vk->memory_info.non_coherent_atom_size - 1); 955 956 // NOTE(rnp): renderdoc can't handle buffers that are too close to the allocation size limit 957 if (renderdoc_attached()) 958 clamp_size -= MB(8); 959 960 u64 size = Min(ai->size, clamp_size); 961 962 VkBufferCreateInfo buffer_create_info = { 963 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 964 .usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT|VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 965 .size = size, 966 .sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, 967 .queueFamilyIndexCount = ai->queue_family_count, 968 .pQueueFamilyIndices = ai->queue_family_indices, 969 }; 970 971 if (ai->flags & VulkanUsageFlag_TransferSource) 972 buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; 973 974 if (ai->flags & VulkanUsageFlag_TransferDestination) 975 buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; 976 977 if (ai->index_type != VK_INDEX_TYPE_NONE_KHR) 978 buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; 979 980 VkExternalMemoryBufferCreateInfo external_memory_buffer_create_info = { 981 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, 982 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 983 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 984 }; 985 986 if (ai->export) buffer_create_info.pNext = &external_memory_buffer_create_info; 987 988 vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer); 989 vk_label_object(BUFFER, vb->buffer, ai->label, str8("Buffer")); 990 991 VkMemoryRequirements memory_requirements; 992 vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements); 993 994 assert((u64)size <= memory_requirements.size); 995 size = memory_requirements.size; 996 997 VkMemoryDedicatedAllocateInfo dedicated_allocate_info = { 998 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 999 .buffer = vb->buffer, 1000 }; 1001 1002 /* NOTE(rnp): to create a CPU writable buffer: 1003 * 1. try to allocate and map the entire buffer 1004 * - this may fail if the buffer is bigger than the BAR size 1005 * (unknowable from vulkan), or the memory space has become 1006 * too fragmented (unlikely) 1007 * 2. if allocation or mapping fails we must chain a host buffer 1008 * for staging. If this happens in practice we should add 1009 * the ability to import an existing external allocation 1010 */ 1011 b32 host_read_write = (ai->flags & VulkanUsageFlag_HostReadWrite) != 0; 1012 vb->memory_kind = host_read_write ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device; 1013 1014 b32 result = 0; 1015 // TODO(rnp): this may fail if the allocation is too big for the BAR size 1016 // it needs to handled properly 1017 if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, ai->export)) { 1018 result = 1; 1019 ai->gpu_buffer->size = size; 1020 vb->memory_size = size; 1021 1022 vb->index_type = ai->index_type; 1023 1024 vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, str8("Memory")); 1025 1026 if (host_read_write) 1027 vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer); 1028 1029 vkBindBufferMemory(vk->device, vb->buffer, vb->memory, 0); 1030 VkBufferDeviceAddressInfo buffer_device_address_info = { 1031 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, 1032 .buffer = vb->buffer, 1033 }; 1034 ai->gpu_buffer->gpu_pointer = vkGetBufferDeviceAddress(vk->device, &buffer_device_address_info); 1035 } 1036 return result; 1037 } 1038 1039 function void 1040 vk_load_instance(Arena arena, Stream *err) 1041 { 1042 #define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(0, #name); 1043 VkBaseProcedureList 1044 #undef X 1045 1046 u32 enabled_validation_layers_count = 0; 1047 const char *enabled_validation_layers[countof(vk_validation_layers)]; 1048 1049 u32 enabled_instance_extensions_count = 0; 1050 const char *enabled_instance_extensions[countof(vk_required_instance_extensions) + countof(vk_instance_debug_extensions)]; 1051 1052 static_assert(countof(vk_required_instance_extensions) == 0, ""); 1053 //for EachElement(vk_required_instance_extensions, it) 1054 // enabled_instance_extensions[enabled_instance_extensions_count++] = vk_required_instance_extensions[it]; 1055 1056 #if BEAMFORMER_DEBUG 1057 { 1058 u32 layer_count = 0; 1059 vkEnumerateInstanceLayerProperties(&layer_count, 0); 1060 1061 VkLayerProperties *layers = push_array(&arena, VkLayerProperties, layer_count); 1062 str8 *layer_str8s = push_array(&arena, str8, layer_count); 1063 vkEnumerateInstanceLayerProperties(&layer_count, layers); 1064 1065 for (u32 i = 0; i < layer_count; i++) 1066 layer_str8s[i] = str8_from_c_str(layers[i].layerName); 1067 1068 for EachElement(vk_validation_layers, it) { 1069 for(u32 i = 0; i < layer_count; i++) { 1070 if (str8_equal(vk_validation_layers[it], layer_str8s[i])) { 1071 u32 index = enabled_validation_layers_count++; 1072 enabled_validation_layers[index] = (char *)vk_validation_layers[it].data; 1073 vulkan_config.layers.enabled.E[it] = 1; 1074 vulkan_config.layers.version.E[it] = layers[i].specVersion; 1075 break; 1076 } 1077 } 1078 } 1079 1080 if (countof(vk_validation_layers) != enabled_validation_layers_count) { 1081 i32 missing_count = countof(vk_validation_layers) - enabled_validation_layers_count; 1082 stream_append_s8s(err, vulkan_info("missing validation layer"), 1083 missing_count > 1 ? s8("s:") : s8(":"), s8("\n")); 1084 1085 for EachElement(vk_validation_layers, it) 1086 if (vulkan_config.layers.enabled.E[it] == 0) 1087 stream_append_s8s(err, s8(" "), s8_from_str8(vk_validation_layers[it]), s8("\n")); 1088 } 1089 1090 u32 instance_extension_count = 0; 1091 vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, 0); 1092 1093 VkExtensionProperties *instance_extensions = push_array(&arena, VkExtensionProperties, instance_extension_count); 1094 s8 *instance_ext_s8s = push_array(&arena, s8, instance_extension_count); 1095 vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, instance_extensions); 1096 for EachIndex(instance_extension_count, it) 1097 instance_ext_s8s[it] = c_str_to_s8(instance_extensions[it].extensionName); 1098 1099 for EachElement(vk_instance_debug_extensions, it) { 1100 for EachIndex(instance_extension_count, i) { 1101 if (s8_equal(vk_instance_debug_extensions[it], instance_ext_s8s[i])) { 1102 u32 index = enabled_instance_extensions_count++; 1103 enabled_instance_extensions[index] = (char *)vk_instance_debug_extensions[it].data; 1104 vulkan_config.instance.E[it] = 1; 1105 break; 1106 } 1107 } 1108 } 1109 } 1110 #endif 1111 1112 VkApplicationInfo app_info = { 1113 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, 1114 .pApplicationName = BEAMFORMER_NAME_STRING, 1115 .applicationVersion = 0, 1116 .pEngineName = "No Engine", 1117 .engineVersion = 0, 1118 .apiVersion = VK_MAKE_API_VERSION(1, 3, 0, 0), 1119 }; 1120 1121 VkInstanceCreateInfo instance_create_info = { 1122 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, 1123 .pApplicationInfo = &app_info, 1124 .ppEnabledExtensionNames = enabled_instance_extensions, 1125 .enabledExtensionCount = enabled_instance_extensions_count, 1126 .ppEnabledLayerNames = enabled_validation_layers, 1127 .enabledLayerCount = enabled_validation_layers_count, 1128 }; 1129 1130 #if 0 && BEAMFORMER_DEBUG 1131 VkValidationFeatureEnableEXT validation_feature_enables[] = { 1132 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, 1133 VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, 1134 VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT, 1135 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, 1136 }; 1137 1138 VkValidationFeaturesEXT validation_features = { 1139 .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, 1140 .enabledValidationFeatureCount = countof(validation_feature_enables), 1141 .pEnabledValidationFeatures = validation_feature_enables, 1142 }; 1143 1144 instance_create_info.pNext = &validation_features; 1145 #endif 1146 1147 vkCreateInstance(&instance_create_info, 0, &vulkan_context->handle); 1148 1149 #define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(vulkan_context->handle, #name); 1150 VkInstanceProcedureList 1151 #undef X 1152 } 1153 1154 function void 1155 vk_load_physical_device(Arena arena, Stream *err) 1156 { 1157 VulkanContext *vk = vulkan_context; 1158 1159 u32 device_count; 1160 vkEnumeratePhysicalDevices(vk->handle, &device_count, 0); 1161 1162 VkPhysicalDevice *devices = push_array(&arena, typeof(*devices), device_count); 1163 vkEnumeratePhysicalDevices(vk->handle, &device_count, devices); 1164 1165 i32 best_index = -1, best_score = -1; 1166 for (u32 i = 0; i < device_count; i++) { 1167 Arena scratch = arena; 1168 VkPhysicalDeviceProperties2 *dp = push_struct(&scratch, typeof(*dp)); 1169 dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 1170 vkGetPhysicalDeviceProperties2(devices[i], dp); 1171 1172 i32 score = 0; 1173 if (dp->properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) 1174 score++; 1175 1176 if (score > best_score) { 1177 best_score = score; 1178 best_index = (i32)i; 1179 } 1180 } 1181 1182 vk->physical_device = best_index >= 0 ? devices[best_index] : 0; 1183 if (!vk->physical_device) 1184 fatal(vulkan_info("failed to find a suitable GPU\n")); 1185 1186 VkPhysicalDeviceProperties2 dp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; 1187 VkPhysicalDeviceVulkan11Properties v11p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES}; 1188 dp.pNext = &v11p; 1189 1190 vkGetPhysicalDeviceProperties2(vk->physical_device, &dp); 1191 1192 stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n")); 1193 stream_append_s8(err, vulkan_info("Vulkan Version: ")); 1194 { 1195 u32 dv = dp.properties.apiVersion; 1196 stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv)); 1197 } 1198 1199 { 1200 Arena scratch = arena; 1201 u32 extension_count = 0; 1202 vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, 0); 1203 VkExtensionProperties *extensions = push_array(&scratch, VkExtensionProperties, extension_count); 1204 vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, extensions); 1205 1206 s8 *ext_str8s = push_array(&scratch, s8, extension_count); 1207 for (u32 index = 0; index < extension_count; index++) 1208 ext_str8s[index] = c_str_to_s8(extensions[index].extensionName); 1209 1210 b8 *supported = push_array(&scratch, b8, countof(vk_required_device_extensions)); 1211 for EachIndex(extension_count, index) 1212 for EachElement(vk_required_device_extensions, it) 1213 supported[it] |= s8_equal(vk_required_device_extensions[it], ext_str8s[index]); 1214 1215 u32 supported_count = 0; 1216 for EachElement(vk_required_device_extensions, it) 1217 supported_count += supported[it]; 1218 1219 u32 missing_count = countof(vk_required_device_extensions) - supported_count; 1220 if (missing_count) { 1221 stream_append_s8s(err, vulkan_info("fatal error: missing required device extension"), 1222 missing_count > 1 ? s8("s") : s8(""), s8(":\n")); 1223 for EachElement(vk_required_device_extensions, it) { 1224 if (!supported[it]) { 1225 s8 name = vk_required_device_extensions[it]; 1226 stream_append_s8s(err, vulkan_info(" "), name, s8("\n")); 1227 } 1228 } 1229 fatal(stream_to_s8(err)); 1230 } 1231 1232 for EachIndex(extension_count, index) 1233 for EachElement(vk_optional_device_extensions, it) 1234 vulkan_config.optional.E[it] |= s8_equal(vk_optional_device_extensions[it], ext_str8s[index]); 1235 1236 #if BEAMFORMER_DEBUG 1237 for EachIndex(extension_count, index) 1238 for EachElement(vk_debug_extensions, it) 1239 vulkan_config.debug.E[it] |= s8_equal(vk_debug_extensions[it], ext_str8s[index]); 1240 #endif 1241 } 1242 1243 { 1244 VkPhysicalDeviceFeatures2 df = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; 1245 VkPhysicalDeviceVulkan11Features v11f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES}; 1246 VkPhysicalDeviceVulkan12Features v12f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES}; 1247 VkPhysicalDeviceVulkan13Features v13f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES}; 1248 df.pNext = &v11f; 1249 v11f.pNext = &v12f; 1250 v12f.pNext = &v13f; 1251 vkGetPhysicalDeviceFeatures2(vk->physical_device, &df); 1252 1253 { 1254 b32 all_supported = 1; 1255 #define X(name, ...) all_supported &= df.features.name; 1256 VK_REQUIRED_PHYSICAL_FEATURES 1257 #undef X 1258 1259 if (!all_supported) { 1260 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1261 #define X(name, ...) if (!df.features.name) stream_append_s8(err, s8(" " #name "\n")); 1262 VK_REQUIRED_PHYSICAL_FEATURES 1263 #undef X 1264 fatal(stream_to_s8(err)); 1265 } 1266 } 1267 1268 { 1269 b32 all_supported = 1; 1270 #define X(name, ...) all_supported &= v11f.name; 1271 VK_REQUIRED_PHYSICAL_11_FEATURES 1272 #undef X 1273 1274 if (!all_supported) { 1275 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1276 #define X(name, ...) if (!v11f.name) stream_append_s8(err, s8(" " #name "\n")); 1277 VK_REQUIRED_PHYSICAL_11_FEATURES 1278 #undef X 1279 fatal(stream_to_s8(err)); 1280 } 1281 } 1282 1283 { 1284 b32 all_supported = 1; 1285 #define X(name, ...) all_supported &= v12f.name; 1286 VK_REQUIRED_PHYSICAL_12_FEATURES 1287 #undef X 1288 1289 if (!all_supported) { 1290 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1291 #define X(name, ...) if (!v12f.name) stream_append_s8(err, s8(" " #name "\n")); 1292 VK_REQUIRED_PHYSICAL_12_FEATURES 1293 #undef X 1294 fatal(stream_to_s8(err)); 1295 } 1296 } 1297 1298 { 1299 b32 all_supported = 1; 1300 #define X(name, ...) all_supported &= v13f.name; 1301 VK_REQUIRED_PHYSICAL_13_FEATURES 1302 #undef X 1303 1304 if (!all_supported) { 1305 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1306 #define X(name, ...) if (!v13f.name) stream_append_s8(err, s8(" " #name "\n")); 1307 VK_REQUIRED_PHYSICAL_13_FEATURES 1308 #undef X 1309 fatal(stream_to_s8(err)); 1310 } 1311 } 1312 1313 if (vulkan_config.optional.cooperative_matrix) { 1314 Arena scratch = arena; 1315 u32 property_count = 0; 1316 vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, 0); 1317 1318 VkCooperativeMatrixPropertiesKHR *mat = push_array(&scratch, VkCooperativeMatrixPropertiesKHR, property_count); 1319 1320 // NOTE(rnp): validation layer stupidity 1321 for EachIndex(property_count, it) 1322 mat[it].sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR; 1323 1324 vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, mat); 1325 b32 supported = 0; 1326 // TODO(rnp): for now the requirements are hardcoded, it is possible to support a couple 1327 // variations if needed. 1328 for EachIndex(property_count, it) { 1329 b32 match = 1; 1330 supported &= mat[it].scope == VK_SCOPE_SUBGROUP_KHR; 1331 1332 supported &= mat[it].MSize == 16; 1333 supported &= mat[it].NSize == 16; 1334 supported &= mat[it].KSize == 16; 1335 1336 supported &= mat[it].AType == VK_COMPONENT_TYPE_FLOAT16_KHR; 1337 supported &= mat[it].BType == VK_COMPONENT_TYPE_FLOAT16_KHR; 1338 supported &= mat[it].CType == VK_COMPONENT_TYPE_FLOAT32_KHR; 1339 supported &= mat[it].ResultType == VK_COMPONENT_TYPE_FLOAT32_KHR; 1340 1341 supported |= match; 1342 } 1343 vk->gpu_info.cooperative_matrix = supported; 1344 } 1345 } 1346 1347 VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2}; 1348 vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp); 1349 1350 VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties; 1351 1352 // NOTE(rnp): vulkan spec says that highest performance memory types must 1353 // come first. just take the first one found. 1354 1355 for (u32 i = 0; i < bmp->memoryHeapCount; i++) { 1356 if (bmp->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { 1357 vk->memory_info.gpu_heap_index = i; 1358 break; 1359 } 1360 } 1361 1362 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1363 if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { 1364 assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index); 1365 vk->memory_info.memory_type_indices[VulkanMemoryKind_Device] = i; 1366 break; 1367 } 1368 } 1369 1370 // TODO(rnp): it is possible that this isn't available. for devices like that we would need 1371 // to copy into a staging buffer then DMA. For now that is unsupported. 1372 u32 bar_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; 1373 i32 bar_index = -1; 1374 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1375 if ((bmp->memoryTypes[i].propertyFlags & bar_flags) == bar_flags) { 1376 assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index); 1377 bar_index = (i32)i; 1378 break; 1379 } 1380 } 1381 1382 // TODO(rnp): this shouldn't be fatal 1383 if (bar_index == -1) { 1384 stream_append_s8(err, vulkan_info("fatal error: GPU does not support host bar memory\n")); 1385 fatal(stream_to_s8(err)); 1386 } 1387 1388 vk->memory_info.memory_type_indices[VulkanMemoryKind_BAR] = bar_index; 1389 1390 vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = -1; 1391 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1392 if ((bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == 0) { 1393 if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { 1394 vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = (i8)i; 1395 break; 1396 } 1397 } 1398 } 1399 1400 if (vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] == -1) { 1401 stream_append_s8(err, vulkan_info("fatal error: vulkan driver does not provide host visible memory\n")); 1402 fatal(stream_to_s8(err)); 1403 } 1404 1405 for EachElement(vk->memory_info.memory_type_indices, it) { 1406 u32 ti = vk->memory_info.memory_type_indices[it]; 1407 u32 flags = bmp->memoryTypes[ti].propertyFlags; 1408 vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; 1409 } 1410 1411 vulkan_config.driver_api_version = dp.properties.apiVersion; 1412 vk->memory_info.max_allocation_size = v11p.maxMemoryAllocationSize; 1413 vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize; 1414 vk->gpu_info.vendor = dp.properties.vendorID; 1415 vk->gpu_info.gpu_heap_size = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size; 1416 vk->gpu_info.timestamp_period_ns = dp.properties.limits.timestampPeriod; 1417 vk->gpu_info.max_image_dimension_2D = dp.properties.limits.maxImageDimension2D; 1418 vk->gpu_info.max_image_dimension_3D = dp.properties.limits.maxImageDimension3D; 1419 vk->gpu_info.max_msaa_samples = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts); 1420 vk->gpu_info.subgroup_size = v11p.subgroupSize; 1421 vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize; 1422 1423 // IMPORTANT(rnp): memory must only be pushed at the end of the function 1424 vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName)); 1425 1426 #if BEAMFORMER_DEBUG 1427 { 1428 b32 mismatch = 0; 1429 for EachElement(vk_validation_layers, it) { 1430 u32 lv = vulkan_config.layers.version.E[it]; 1431 u32 dv = vulkan_config.driver_api_version; 1432 if (lv < dv) { 1433 mismatch = 1; 1434 stream_append_s8s(err, vulkan_info("warning: validaton layer \""), 1435 s8_from_str8(vk_validation_layers[it]), s8("\" version: ")); 1436 stream_appendf(err, "%u.%u.%u", VK_API_VERSION_MAJOR(lv), VK_API_VERSION_MINOR(lv), VK_API_VERSION_PATCH(lv)); 1437 stream_append_s8(err, s8(" lower than driver API version: ")); 1438 stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv)); 1439 } 1440 } 1441 1442 if (mismatch) 1443 stream_append_s8(err, vulkan_info("DO NOT report any bugs without updating your validation layers!\n")); 1444 } 1445 #endif 1446 } 1447 1448 function void 1449 vk_load_queues(Arena *memory, Stream *err) 1450 { 1451 /////////////////////////////////////////////////////// 1452 // NOTE(rnp): try to allocate an appropriate queue for 1453 // each of the following tasks: 1454 // * UI Rendering (Graphics) 1455 // * Beamforming (Compute) 1456 // * Upload (Transfer) 1457 // Then create a logical device ready for use 1458 1459 VulkanContext *vk = vulkan_context; 1460 1461 u32 queue_family_count; 1462 vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, 0); 1463 1464 TempArena arena_save = begin_temp_arena(memory); 1465 VkQueueFamilyProperties *queues = push_array(memory, typeof(*queues), queue_family_count); 1466 vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, queues); 1467 1468 i32 queue_indices[VulkanQueueKind_Count]; 1469 for EachElement(queue_indices, it) queue_indices[it] = -1; 1470 1471 /////////////////////////////////////////////////////////////// 1472 // NOTE(rnp): start by assigning queue families for each queue 1473 1474 /* NOTE(rnp): try for exclusive transfer queue */ 1475 #if !ForceSingleQueue 1476 { 1477 u32 mask = VK_QUEUE_GRAPHICS_BIT|VK_QUEUE_COMPUTE_BIT|VK_QUEUE_TRANSFER_BIT; 1478 u32 max_timestamp_bits = 0; 1479 for (u32 index = 0; index < queue_family_count; index++) { 1480 if ((queues[index].queueFlags & mask) == VK_QUEUE_TRANSFER_BIT) { 1481 if (queues[index].timestampValidBits > max_timestamp_bits) { 1482 max_timestamp_bits = queues[index].timestampValidBits; 1483 queue_indices[VulkanQueueKind_Transfer] = (i32)index; 1484 } 1485 } 1486 } 1487 } 1488 1489 /* NOTE(rnp): try for compute separate from graphics */ 1490 for (u32 index = 0; index < queue_family_count; index++) { 1491 if ((queues[index].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0 && 1492 (queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) 1493 { 1494 queue_indices[VulkanQueueKind_Compute] = (i32)index; 1495 break; 1496 } 1497 } 1498 #endif /* !ForceSingleQueue */ 1499 1500 /* NOTE(rnp): find graphics family and verify it is exclusive */ 1501 b32 multi_graphics = 0; 1502 for (u32 index = 0; index < queue_family_count; index++) { 1503 if ((queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) { 1504 // TODO(rnp): check for presentation support 1505 multi_graphics = queue_indices[VulkanQueueKind_Graphics] != -1; 1506 queue_indices[VulkanQueueKind_Graphics] = (i32)index; 1507 } 1508 } 1509 1510 if (multi_graphics) 1511 stream_append_s8(err, vulkan_info("warning: multiple queue families reported graphics support\n")); 1512 1513 if (queue_indices[VulkanQueueKind_Graphics] == -1) { 1514 stream_append_s8(err, vulkan_info("fatal error: GPU does not support graphics presentation\n")); 1515 fatal(stream_to_s8(err)); 1516 } 1517 1518 if (queue_indices[VulkanQueueKind_Compute] == -1) 1519 if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) 1520 queue_indices[VulkanQueueKind_Compute] = queue_indices[VulkanQueueKind_Graphics]; 1521 1522 if (queue_indices[VulkanQueueKind_Compute] == -1) { 1523 stream_append_s8(err, vulkan_info("fatal error: GPU does not support compute\n")); 1524 fatal(stream_to_s8(err)); 1525 } 1526 1527 if (queue_indices[VulkanQueueKind_Transfer] == -1) { 1528 if ((queues[queue_indices[VulkanQueueKind_Compute]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0) 1529 queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Compute]; 1530 else if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0) 1531 queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Graphics]; 1532 } 1533 1534 if (queue_indices[VulkanQueueKind_Transfer] == -1) { 1535 stream_append_s8(err, vulkan_info("fatal error: GPU does not support data transfer\n")); 1536 fatal(stream_to_s8(err)); 1537 } 1538 1539 ///////////////////////////////////////////////////////////////// 1540 // NOTE(rnp): if queues share families try to allocate subqueues 1541 1542 u32 assigned_subindices[VulkanQueueKind_Count] = {0}; 1543 i32 queue_subindices[VulkanQueueKind_Count] = {0}; 1544 1545 assigned_subindices[VulkanQueueKind_Graphics] += 1; 1546 1547 if (queue_indices[VulkanQueueKind_Compute] == queue_indices[VulkanQueueKind_Graphics]) { 1548 if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount) 1549 queue_subindices[VulkanQueueKind_Compute] = assigned_subindices[VulkanQueueKind_Graphics]++; 1550 } else { 1551 assigned_subindices[VulkanQueueKind_Compute] += 1; 1552 } 1553 1554 if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Graphics]) { 1555 if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount) 1556 queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Graphics]++; 1557 } else if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Compute]) { 1558 if (assigned_subindices[VulkanQueueKind_Compute] < queues[queue_indices[VulkanQueueKind_Compute]].queueCount) 1559 queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Compute]++; 1560 } else { 1561 assigned_subindices[VulkanQueueKind_Transfer] += 1; 1562 } 1563 1564 for EachElement(assigned_subindices, it) 1565 vk->unique_queues += assigned_subindices[it]; 1566 1567 end_temp_arena(arena_save); 1568 1569 ///////////////////////////////////////////// 1570 // NOTE(rnp): fill in info and create device 1571 for EachElement(vk->queues, it) { 1572 u32 index = queue_subindices[it]; 1573 for (i32 i = 0; i < queue_indices[it]; i++) 1574 index += assigned_subindices[i]; 1575 vk->queue_indices[it] = index; 1576 } 1577 1578 for EachElement(vk->queues, it) { 1579 if (vk->queues[vk->queue_indices[it]] == 0) { 1580 vk->queues[vk->queue_indices[it]] = push_struct(memory, VulkanQueue); 1581 vk->queues[vk->queue_indices[it]]->queue_family = queue_indices[it]; 1582 vk->queues[vk->queue_indices[it]]->queue_index = queue_subindices[it]; 1583 } 1584 vk->queues[it] = vk->queues[vk->queue_indices[it]]; 1585 } 1586 1587 for EachElement(vk->command_pools, it) 1588 vk->command_pools[it] = push_struct(memory, VulkanCommandPool); 1589 1590 VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count]; 1591 1592 f32 queue_priorities[VulkanQueueKind_Count][VulkanQueueKind_Count]; 1593 for (u32 i = 0; i < VulkanQueueKind_Count; i++) 1594 for (u32 j = 0; j < VulkanQueueKind_Count; j++) 1595 queue_priorities[i][j] = 1.0f; 1596 queue_priorities[queue_indices[VulkanQueueKind_Compute]][queue_subindices[VulkanQueueKind_Compute]] = 0.5f; 1597 1598 u32 queue_create_index = 0; 1599 b32 queue_info_filled[VulkanQueueKind_Count] = {0}; 1600 for (u32 q = 0; q < vk->unique_queues; q++) { 1601 u32 base_q = queue_indices[q]; 1602 if (!queue_info_filled[base_q]) { 1603 queue_create_infos[queue_create_index++] = (VkDeviceQueueCreateInfo){ 1604 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, 1605 .queueFamilyIndex = base_q, 1606 .queueCount = assigned_subindices[q], 1607 .pQueuePriorities = queue_priorities[q], 1608 }; 1609 } 1610 queue_info_filled[base_q] = 1; 1611 } 1612 1613 u32 enabled_count = 0; 1614 const char *enabled_extensions[MAX_ENABLED_EXTENSIONS]; 1615 1616 for EachElement(vk_required_device_extensions, it) 1617 enabled_extensions[enabled_count++] = (char *)vk_required_device_extensions[it].data; 1618 1619 for EachElement(vk_optional_device_extensions, it) 1620 if (vulkan_config.optional.E[it]) 1621 enabled_extensions[enabled_count++] = (char *)vk_optional_device_extensions[it].data; 1622 1623 for EachElement(vk_debug_extensions, it) 1624 if (vulkan_config.debug.E[it]) 1625 enabled_extensions[enabled_count++] = (char *)vk_debug_extensions[it].data; 1626 1627 VkDeviceCreateInfo device_create_info = { 1628 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, 1629 .pQueueCreateInfos = queue_create_infos, 1630 .queueCreateInfoCount = queue_create_index, 1631 .ppEnabledExtensionNames = enabled_extensions, 1632 .enabledExtensionCount = enabled_count, 1633 }; 1634 1635 VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR pdsre = { 1636 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR, 1637 .shaderRelaxedExtendedInstruction = 1, 1638 }; 1639 if (vulkan_config.debug.shader_relaxed_extended_instruction) { 1640 pdsre.pNext = (void *)device_create_info.pNext; 1641 device_create_info.pNext = &pdsre; 1642 } 1643 1644 VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_mat_features = { 1645 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR, 1646 .cooperativeMatrix = 1, 1647 .cooperativeMatrixRobustBufferAccess = 0, 1648 }; 1649 if (vk->gpu_info.cooperative_matrix) { 1650 coop_mat_features.pNext = (void *)device_create_info.pNext; 1651 device_create_info.pNext = &coop_mat_features; 1652 } 1653 1654 VkPhysicalDeviceRobustness2FeaturesKHR robust2 = { 1655 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_KHR, 1656 .pNext = (void *)device_create_info.pNext, 1657 .nullDescriptor = 1, 1658 }; 1659 device_create_info.pNext = &robust2; 1660 1661 VkPhysicalDeviceVulkan13Features v13f = { 1662 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, 1663 .pNext = (void *)device_create_info.pNext, 1664 #define X(name, ...) .name = 1, 1665 VK_REQUIRED_PHYSICAL_13_FEATURES 1666 #undef X 1667 }; 1668 device_create_info.pNext = &v13f; 1669 1670 VkPhysicalDeviceVulkan12Features v12f = { 1671 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, 1672 .pNext = (void *)device_create_info.pNext, 1673 #define X(name, ...) .name = 1, 1674 VK_REQUIRED_PHYSICAL_12_FEATURES 1675 #undef X 1676 }; 1677 device_create_info.pNext = &v12f; 1678 1679 VkPhysicalDeviceVulkan11Features v11f = { 1680 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, 1681 .pNext = (void *)device_create_info.pNext, 1682 #define X(name, ...) .name = 1, 1683 VK_REQUIRED_PHYSICAL_11_FEATURES 1684 #undef X 1685 }; 1686 device_create_info.pNext = &v11f; 1687 1688 VkPhysicalDeviceFeatures2 device_features = { 1689 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, 1690 .pNext = (void *)device_create_info.pNext, 1691 .features = { 1692 #define X(name, ...) .name = 1, 1693 VK_REQUIRED_PHYSICAL_FEATURES 1694 #undef X 1695 }, 1696 }; 1697 device_create_info.pNext = &device_features; 1698 1699 vkCreateDevice(vk->physical_device, &device_create_info, 0, &vk->device); 1700 1701 #define X(name, ...) name = (name##_fn *)vkGetDeviceProcAddr(vk->device, #name); 1702 VkDeviceProcedureList 1703 #undef X 1704 1705 for (u32 q = 0; q < vk->unique_queues; q++) { 1706 VulkanQueue *qp = vk->queues[q]; 1707 vkGetDeviceQueue(vk->device, qp->queue_family, qp->queue_index, &qp->queue); 1708 1709 qp->timeline_semaphore = vk_make_semaphore(0); 1710 } 1711 1712 vk->queues[VulkanQueueKind_Graphics]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; 1713 vk->queues[VulkanQueueKind_Compute]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; 1714 1715 for EachElement(vk->command_pools, it) { 1716 VulkanCommandPool *vcp = vk->command_pools[it]; 1717 1718 VkCommandPoolCreateInfo command_pool_create_info = { 1719 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, 1720 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 1721 .queueFamilyIndex = vk->queues[it]->queue_family, 1722 }; 1723 1724 vkCreateCommandPool(vk->device, &command_pool_create_info, 0, &vcp->handle); 1725 1726 VkCommandBufferAllocateInfo command_buffer_allocate_info = { 1727 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 1728 .commandPool = vcp->handle, 1729 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1730 .commandBufferCount = countof(vcp->buffers), 1731 }; 1732 vkAllocateCommandBuffers(vk->device, &command_buffer_allocate_info, vcp->buffers); 1733 1734 VkQueryPoolCreateInfo query_pool_create_info = { 1735 .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 1736 .queryType = VK_QUERY_TYPE_TIMESTAMP, 1737 .queryCount = MaxCommandBuffersInFlight * MaxCommandBufferTimestamps, 1738 }; 1739 vkCreateQueryPool(vk->device, &query_pool_create_info, 0, &vcp->query_pool); 1740 } 1741 } 1742 1743 function void 1744 vk_load_graphics(void) 1745 { 1746 VulkanContext *vk = vulkan_context; 1747 1748 // NOTE: swap chain image format 1749 { 1750 } 1751 1752 // NOTE: depth/stencil format 1753 { 1754 VkFormat depth_formats[] = { 1755 VK_FORMAT_D32_SFLOAT_S8_UINT, 1756 VK_FORMAT_D24_UNORM_S8_UINT, 1757 VK_FORMAT_D16_UNORM_S8_UINT, 1758 }; 1759 1760 vk->depth_stencil_format = VK_FORMAT_UNDEFINED; 1761 for EachElement(depth_formats, it) { 1762 VkFormatProperties3 format_properties3 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3}; 1763 VkFormatProperties2 format_properties2 = { 1764 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, 1765 .pNext = &format_properties3, 1766 }; 1767 vkGetPhysicalDeviceFormatProperties2(vk->physical_device, depth_formats[it], &format_properties2); 1768 if (format_properties3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT) { 1769 vk->depth_stencil_format = depth_formats[it]; 1770 break; 1771 } 1772 } 1773 } 1774 } 1775 1776 function void 1777 vk_load_descriptor_block(void) 1778 { 1779 // NOTE(rnp): 1780 // * One Descriptor Pool 1781 // * One Descriptor Set Per Resource Kind 1782 // * Shaders know the ResourceKind enumeration 1783 // * Shaders know the per set binding points 1784 1785 VulkanContext *vk = vulkan_context; 1786 1787 // NOTE(rnp): Pool 1788 VkDescriptorPoolSize pool_sizes[] = { 1789 { 1790 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1791 .descriptorCount = BeamformerShaderBufferSlot_Count, 1792 }, 1793 }; 1794 static_assert(countof(pool_sizes) == BeamformerShaderResourceKind_Count, ""); 1795 1796 VkDescriptorPoolCreateInfo pool_create_info = { 1797 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 1798 .maxSets = BeamformerShaderResourceKind_Count, 1799 .poolSizeCount = countof(pool_sizes), 1800 .pPoolSizes = pool_sizes, 1801 }; 1802 1803 vkCreateDescriptorPool(vk->device, &pool_create_info, 0, &vk->descriptor_pool); 1804 1805 // NOTE(rnp): Set Layouts 1806 VkDescriptorSetLayoutCreateInfo layout_create_info = { 1807 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1808 }; 1809 1810 { 1811 VkDescriptorSetLayoutBinding layout_bindings[BeamformerShaderBufferSlot_Count]; 1812 for EachEnumValue(BeamformerShaderBufferSlot, it) { 1813 layout_bindings[it] = (VkDescriptorSetLayoutBinding){ 1814 .binding = it, 1815 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1816 .descriptorCount = 1, 1817 .stageFlags = VK_SHADER_STAGE_ALL, 1818 }; 1819 } 1820 layout_create_info.bindingCount = countof(layout_bindings), 1821 layout_create_info.pBindings = layout_bindings, 1822 vkCreateDescriptorSetLayout(vk->device, &layout_create_info, 0, 1823 vk->descriptor_set_layouts + BeamformerShaderResourceKind_Buffer); 1824 } 1825 1826 // NOTE(rnp): Sets 1827 VkDescriptorSetAllocateInfo set_allocate_info = { 1828 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 1829 .descriptorPool = vk->descriptor_pool, 1830 .descriptorSetCount = countof(vk->descriptor_sets), 1831 .pSetLayouts = vk->descriptor_set_layouts, 1832 }; 1833 static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), ""); 1834 vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets); 1835 1836 vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, str8("Beamformer Resources"), str8("Pool")); 1837 1838 DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) { 1839 Arena scratch = vk->arena; 1840 for EachElement(vk->descriptor_sets, it) { 1841 Stream sb = arena_stream(scratch); 1842 stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s")); 1843 vk_label_object(DESCRIPTOR_SET, vk->descriptor_sets[it], stream_to_str8(&sb), str8("Set")); 1844 vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_str8(&sb), str8("Set Layout")); 1845 } 1846 } 1847 1848 // NOTE(rnp): junk API requirement that doesn't allow 0 initialization 1849 for EachElement(vk->descriptor_buffer_infos, it) 1850 vk->descriptor_buffer_infos[it].range = VK_WHOLE_SIZE; 1851 } 1852 1853 /////////////////////// 1854 // NOTE(rnp): User API 1855 1856 DEBUG_IMPORT void 1857 vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err) 1858 { 1859 #define X(name, ...) name = (name##_fn *)os_lookup_symbol(vulkan_library_handle, #name); 1860 VkLoaderProcedureList 1861 #undef X 1862 1863 if (!vkGetInstanceProcAddr) { 1864 stream_append_s8(err, vulkan_info("fatal error: failed to find \"vkGetInstanceProcAddr\"\n")); 1865 fatal(stream_to_s8(err)); 1866 } 1867 1868 VulkanContext *vk = vulkan_context; 1869 vk->entity_arena = sub_arena_end(memory, KB(64), KB(4)); 1870 vk->arena = sub_arena_end(memory, KB(96), KB(4)); 1871 1872 vk_load_instance(vk->arena, err); 1873 vk_load_physical_device(vk->arena, err); 1874 vk_load_queues(&vk->arena, err); 1875 vk_load_graphics(); 1876 vk_load_descriptor_block(); 1877 1878 read_only local_persist s8 default_compute_shader = s8("" 1879 "#version 430 core\n" 1880 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1881 "void main() {}\n" 1882 "\n"); 1883 vk->default_compute_pipeline = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader, 1884 s8("error_compute_shader"), 256); 1885 1886 read_only local_persist s8 default_vertex_shader = s8("" 1887 "#version 430 core\n" 1888 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1889 "void main() {gl_Position = vec4(0);}\n" 1890 "\n"); 1891 read_only local_persist s8 default_fragment_shader = s8("" 1892 "#version 430 core\n" 1893 "layout(location = 0) out vec4 out_colour;" 1894 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1895 "void main() {out_colour = vec4(0.5f, 0.0f, 0.5f, 1.0f);}\n" 1896 "\n"); 1897 1898 VulkanPipelineCreateInfo pipeline_create_infos[2] = { 1899 { 1900 .kind = VulkanShaderKind_Vertex, 1901 .text = default_vertex_shader, 1902 .name = s8("error_vertex_shader"), 1903 }, 1904 { 1905 .kind = VulkanShaderKind_Fragment, 1906 .text = default_fragment_shader, 1907 .name = s8("error_fragment_shader"), 1908 }, 1909 }; 1910 vk->default_graphics_pipeline = vk_graphics_pipeline_from_infos(vk->arena, pipeline_create_infos, 2, 256); 1911 1912 // TODO: setup ui render pipeline 1913 1914 if (err->widx > 0) { 1915 os_console_log(err->data, err->widx); 1916 stream_reset(err, 0); 1917 } 1918 } 1919 1920 DEBUG_IMPORT GPUInfo * 1921 vk_gpu_info(void) 1922 { 1923 return &vulkan_context->gpu_info; 1924 } 1925 1926 function void 1927 vk_vulkan_buffer_release(VulkanBuffer *vb) 1928 { 1929 VulkanContext *vk = vulkan_context; 1930 VulkanEntity *e = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as)); 1931 // TODO(rnp): this happens implicitly, probably just delete this if block 1932 if (vb->host_pointer) 1933 vkUnmapMemory(vk->device, vb->memory); 1934 1935 if (vb->buffer) 1936 vkDestroyBuffer(vk->device, vb->buffer, 0); 1937 1938 vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? vb->memory_size : 0); 1939 vk_entity_release(e); 1940 } 1941 1942 DEBUG_IMPORT void 1943 vk_buffer_release(GPUBuffer *b) 1944 { 1945 if ValidVulkanHandle(b->handle) 1946 vk_vulkan_buffer_release(vk_entity_data(b->handle, VulkanEntityKind_Buffer)); 1947 zero_struct(b); 1948 } 1949 1950 DEBUG_IMPORT void 1951 vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info) 1952 { 1953 VulkanContext *vk = vulkan_context; 1954 1955 vk_buffer_release(b); 1956 1957 assert(info->size > 0); 1958 1959 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer); 1960 VulkanBufferAllocateInfo vulkan_buffer_allocate_info = { 1961 .gpu_buffer = b, 1962 .size = (u64)info->size, 1963 .flags = info->flags, 1964 .index_type = VK_INDEX_TYPE_NONE_KHR, 1965 .label = info->label, 1966 .export = info->export, 1967 }; 1968 1969 u32 queue_index_hit_count[VulkanQueueKind_Count] = {0}; 1970 for (u32 it = 0; it < info->timeline_count; it++) 1971 queue_index_hit_count[vk->queue_indices[info->timelines_used[it]]]++; 1972 1973 for EachElement(queue_index_hit_count, it) { 1974 if (queue_index_hit_count[it] > 0) { 1975 u32 index = vulkan_buffer_allocate_info.queue_family_count++; 1976 vulkan_buffer_allocate_info.queue_family_indices[index] = vk->queues[vk->queue_indices[it]]->queue_family; 1977 } 1978 } 1979 1980 if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { 1981 b->handle.value[0] = (u64)e; 1982 } else { 1983 vk_entity_release(e); 1984 } 1985 } 1986 1987 DEBUG_IMPORT b32 1988 vk_buffer_needs_sync(GPUBuffer *b) 1989 { 1990 b32 result = 0; 1991 if ValidVulkanHandle(b->handle) { 1992 VulkanBuffer *vb = vk_entity_data(b->handle, VulkanEntityKind_Buffer); 1993 1994 // TODO(rnp): not correct check. need to check if we used transfer queue 1995 result = vb->memory_kind != VulkanMemoryKind_BAR; 1996 } 1997 1998 return result; 1999 } 2000 2001 DEBUG_IMPORT u64 2002 vk_round_up_to_sync_size(u64 size, u64 min) 2003 { 2004 iz round = (iz)Max(min, vulkan_context->memory_info.non_coherent_atom_size); 2005 u64 result = (u64)round_up_to((iz)size, round); 2006 return result; 2007 } 2008 2009 function force_inline void 2010 vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 destination_offset, u64 source_offset, u64 size, b32 non_temporal) 2011 { 2012 VulkanContext *vk = vulkan_context; 2013 2014 switch (source->memory_kind) { 2015 case VulkanMemoryKind_BAR: 2016 { 2017 switch (destination->memory_kind) { 2018 case VulkanMemoryKind_Host:{ 2019 if (destination->memory) { 2020 // TODO(rnp): there is likely a more efficient way of doing this in this case 2021 InvalidCodePath; 2022 } else { 2023 assert(source->host_pointer); 2024 b32 coherent = vk->memory_info.memory_host_coherent[source->memory_kind]; 2025 if (!coherent) { 2026 u64 nca_size = vk->memory_info.non_coherent_atom_size; 2027 VkMappedMemoryRange mrs[1] = {{ 2028 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 2029 .memory = source->memory, 2030 .offset = source_offset - (source_offset % nca_size), 2031 .size = vk_round_up_to_sync_size(size, nca_size), 2032 }}; 2033 vkInvalidateMappedMemoryRanges(vk->device, countof(mrs), mrs); 2034 } 2035 2036 void *dest = (u8 *)destination->host_pointer + destination_offset; 2037 void *src = (u8 *)source->host_pointer + source_offset; 2038 2039 // NOTE(rnp): don't trash the CPU cache for large data stores 2040 if (non_temporal) memory_copy_non_temporal(dest, src, size); 2041 else mem_copy(dest, src, size); 2042 } 2043 }break; 2044 InvalidDefaultCase; 2045 } 2046 }break; 2047 2048 case VulkanMemoryKind_Host:{ 2049 switch (destination->memory_kind) { 2050 case VulkanMemoryKind_BAR:{ 2051 assert(destination->host_pointer); 2052 2053 void *dest = (u8 *)destination->host_pointer + destination_offset; 2054 void *src = (u8 *)source->host_pointer + source_offset; 2055 2056 // NOTE(rnp): don't trash the CPU cache for large data stores 2057 if (non_temporal) memory_copy_non_temporal(dest, src, size); 2058 else mem_copy(dest, src, size); 2059 2060 b32 coherent = vk->memory_info.memory_host_coherent[destination->memory_kind]; 2061 if (!coherent) { 2062 u64 nca_size = vk->memory_info.non_coherent_atom_size; 2063 VkMappedMemoryRange mrs[1] = {{ 2064 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 2065 .memory = destination->memory, 2066 .offset = destination_offset - (destination_offset % nca_size), 2067 .size = vk_round_up_to_sync_size(size, nca_size), 2068 }}; 2069 vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs); 2070 } 2071 }break; 2072 InvalidDefaultCase; 2073 2074 } 2075 }break; 2076 2077 // TODO(rnp): use transfer queue when not mapped 2078 InvalidDefaultCase; 2079 } 2080 } 2081 2082 DEBUG_IMPORT void 2083 vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal) 2084 { 2085 VulkanBuffer *db = vk_entity_data(b->handle, VulkanEntityKind_Buffer); 2086 VulkanBuffer sb = { 2087 .host_pointer = data, 2088 .memory_kind = VulkanMemoryKind_Host, 2089 }; 2090 vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal); 2091 } 2092 2093 DEBUG_IMPORT void 2094 vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal) 2095 { 2096 VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); 2097 VulkanBuffer db = { 2098 .host_pointer = destination, 2099 .memory_kind = VulkanMemoryKind_Host, 2100 }; 2101 vk_buffer_buffer_copy(&db, sb, 0, offset, size, non_temporal); 2102 } 2103 2104 DEBUG_IMPORT void 2105 vk_render_model_release(GPUBuffer *model) 2106 { 2107 if ValidVulkanHandle(model->handle) 2108 vk_vulkan_buffer_release(vk_entity_data(model->handle, VulkanEntityKind_RenderModel)); 2109 zero_struct(model); 2110 } 2111 2112 DEBUG_IMPORT void 2113 vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 model_size, s8 label) 2114 { 2115 vk_render_model_release(model); 2116 2117 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_RenderModel); 2118 2119 assert(index_count <= U32_MAX); 2120 VkIndexType index_type; 2121 if (index_count <= U16_MAX) index_type = VK_INDEX_TYPE_UINT16; 2122 else index_type = VK_INDEX_TYPE_UINT32; 2123 2124 i64 indices_size = round_up_to(vk_index_size(index_type) * index_count, 64); 2125 2126 i64 size = round_up_to(model_size + indices_size, 64); 2127 assert(size > 0); 2128 2129 VulkanBufferAllocateInfo vulkan_buffer_allocate_info = { 2130 .gpu_buffer = model, 2131 .size = (u64)size, 2132 .flags = VulkanUsageFlag_HostReadWrite, 2133 .index_type = index_type, 2134 .label = str8_from_s8(label), 2135 .queue_family_count = 1, 2136 .queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family, 2137 }; 2138 if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { 2139 model->handle.value[0] = (u64)e; 2140 model->index_count = index_count; 2141 model->gpu_pointer += indices_size; 2142 2143 VulkanBuffer sb = { 2144 .host_pointer = indices, 2145 .memory_kind = VulkanMemoryKind_Host, 2146 }; 2147 2148 vk_buffer_buffer_copy(&e->as.buffer, &sb, 0, 0, vk_index_size(index_type) * index_count, 0); 2149 } else { 2150 vk_entity_release(e); 2151 } 2152 } 2153 2154 DEBUG_IMPORT void 2155 vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal) 2156 { 2157 VulkanBuffer *db = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); 2158 VulkanBuffer sb = { 2159 .host_pointer = data, 2160 .memory_kind = VulkanMemoryKind_Host, 2161 }; 2162 2163 offset += round_up_to(vk_index_size(db->index_type) * model->index_count, 64); 2164 2165 vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal); 2166 } 2167 2168 DEBUG_IMPORT void 2169 vk_image_release(GPUImage *image) 2170 { 2171 if ValidVulkanHandle(image->image) { 2172 VulkanContext *vk = vulkan_context; 2173 VulkanImage *vi = vk_entity_data(image->image, VulkanEntityKind_Image); 2174 2175 vkDestroyImageView(vk->device, vi->view, 0); 2176 vkDestroyImage(vk->device, vi->image, 0); 2177 vk_release_memory(vi->memory, image->memory_size); 2178 2179 vk_entity_release((VulkanEntity *)image->image.value[0]); 2180 } 2181 zero_struct(image); 2182 } 2183 2184 DEBUG_IMPORT void 2185 vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples, 2186 VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label) 2187 { 2188 assert(IsPowerOfTwo(samples)); 2189 2190 vk_image_release(image); 2191 2192 VulkanContext *vk = vulkan_context; 2193 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Image); 2194 VulkanImage *vi = &e->as.image; 2195 2196 image->image.value[0] = (u64)e; 2197 image->width = Min(width, vk->gpu_info.max_image_dimension_2D); 2198 image->height = Min(height, vk->gpu_info.max_image_dimension_2D); 2199 image->mip_map_levels = Max(mips, 1); 2200 image->samples = Min(samples, vk->gpu_info.max_msaa_samples); 2201 2202 VkFormat usage_format_map[VulkanImageUsage_Count + 1] = { 2203 [VulkanImageUsage_None] = VK_FORMAT_UNDEFINED, 2204 //[VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_SRGB, 2205 [VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_UNORM, 2206 [VulkanImageUsage_DepthStencil] = vk->depth_stencil_format, 2207 [VulkanImageUsage_Count] = VK_FORMAT_UNDEFINED, 2208 }; 2209 2210 read_only local_persist VkImageUsageFlagBits usage_extra_bit_map[VulkanImageUsage_Count + 1] = { 2211 [VulkanImageUsage_None] = 0, 2212 [VulkanImageUsage_Colour] = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 2213 [VulkanImageUsage_DepthStencil] = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 2214 [VulkanImageUsage_Count] = 0, 2215 }; 2216 2217 read_only local_persist VkImageAspectFlags usage_image_aspect_map[VulkanImageUsage_Count + 1] = { 2218 [VulkanImageUsage_None] = 0, 2219 [VulkanImageUsage_Colour] = VK_IMAGE_ASPECT_COLOR_BIT, 2220 [VulkanImageUsage_DepthStencil] = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT, 2221 [VulkanImageUsage_Count] = 0, 2222 }; 2223 2224 usage = Clamp((u32)usage, 0, VulkanImageUsage_Count); 2225 VkImageUsageFlagBits usage_flags = usage_extra_bit_map[usage]; 2226 2227 if (flags & VulkanUsageFlag_ImageSampling) usage_flags |= VK_IMAGE_USAGE_SAMPLED_BIT; 2228 if (flags & VulkanUsageFlag_TransferSource) usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; 2229 if (flags & VulkanUsageFlag_TransferDestination) usage_flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; 2230 2231 u32 queue_family = vk->queues[VulkanQueueKind_Graphics]->queue_family; 2232 VkImageCreateInfo image_create_info = { 2233 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 2234 .flags = export ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0, 2235 .imageType = VK_IMAGE_TYPE_2D, 2236 .format = usage_format_map[usage], 2237 .extent = {image->width, image->height, 1}, 2238 .mipLevels = image->mip_map_levels, 2239 .arrayLayers = 1, 2240 .samples = image->samples, 2241 .tiling = VK_IMAGE_TILING_OPTIMAL, 2242 .usage = usage_flags, 2243 // NOTE(rnp): needed if multiple queue families are accessed 2244 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 2245 .queueFamilyIndexCount = 1, 2246 .pQueueFamilyIndices = &queue_family, 2247 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2248 }; 2249 2250 VkExternalMemoryImageCreateInfo external_memory_image_create_info = { 2251 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, 2252 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 2253 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 2254 }; 2255 2256 if (export) image_create_info.pNext = &external_memory_image_create_info; 2257 2258 vkCreateImage(vk->device, &image_create_info, 0, &vi->image); 2259 2260 VkMemoryRequirements memory_requirements; 2261 vkGetImageMemoryRequirements(vk->device, vi->image, &memory_requirements); 2262 2263 VkMemoryDedicatedAllocateInfo dedicated_allocate_info = { 2264 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 2265 .image = vi->image, 2266 }; 2267 2268 if (vk_allocate_memory(&vi->memory, memory_requirements.size, VulkanMemoryKind_Device, 0, &dedicated_allocate_info, export)) { 2269 image->memory_size = memory_requirements.size; 2270 vkBindImageMemory(vk->device, vi->image, vi->memory, 0); 2271 2272 VkImageViewCreateInfo image_view_info = { 2273 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 2274 .image = vi->image, 2275 .viewType = VK_IMAGE_VIEW_TYPE_2D, 2276 .format = usage_format_map[usage], 2277 .subresourceRange = { 2278 .aspectMask = usage_image_aspect_map[usage], 2279 .baseMipLevel = 0, 2280 .levelCount = 1, 2281 .baseArrayLayer = 0, 2282 .layerCount = 1, 2283 }, 2284 }; 2285 vkCreateImageView(vk->device, &image_view_info, 0, &vi->view); 2286 2287 vk_label_object(IMAGE, vi->image, str8_from_s8(label), str8("Image")); 2288 vk_label_object(IMAGE_VIEW, vi->view, str8_from_s8(label), str8("Image View")); 2289 vk_label_object(DEVICE_MEMORY, vi->memory, str8_from_s8(label), str8("Memory")); 2290 } else { 2291 vkDestroyImage(vk->device, vi->image, 0); 2292 vk_entity_release(e); 2293 zero_struct(image); 2294 } 2295 } 2296 2297 DEBUG_IMPORT VulkanHandle 2298 vk_create_semaphore(OSHandle *export) 2299 { 2300 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Semaphore); 2301 e->as.semaphore = vk_make_semaphore(export); 2302 VulkanHandle result = {(u64)e}; 2303 return result; 2304 } 2305 2306 DEBUG_IMPORT b32 2307 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns) 2308 { 2309 b32 result = 0; 2310 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2311 VulkanContext *vk = vulkan_context; 2312 VulkanQueue *vq = vk->queues[timeline]; 2313 VkSemaphoreWaitInfo semaphore_wait_info = { 2314 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, 2315 .pSemaphores = &vq->timeline_semaphore.semaphore, 2316 .semaphoreCount = 1, 2317 .pValues = &value, 2318 }; 2319 result = vkWaitSemaphores(vk->device, &semaphore_wait_info, timeout_ns) == VK_SUCCESS; 2320 } 2321 return result; 2322 } 2323 2324 DEBUG_IMPORT u64 2325 vk_host_signal_timeline(VulkanTimeline timeline) 2326 { 2327 u64 result = -1; 2328 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2329 VulkanContext *vk = vulkan_context; 2330 VulkanQueue *vq = vk->queues[timeline]; 2331 VulkanSemaphore *vs = &vq->timeline_semaphore; 2332 result = ++vs->value; 2333 VkSemaphoreSignalInfo ssi = { 2334 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, 2335 .semaphore = vs->semaphore, 2336 .value = result, 2337 }; 2338 vkSignalSemaphore(vk->device, &ssi); 2339 } 2340 return result; 2341 } 2342 2343 DEBUG_IMPORT VulkanHandle 2344 vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size) 2345 { 2346 assert(Between(count, 1, 2)); 2347 assert(count == 2 || infos[0].kind == VulkanShaderKind_Compute); 2348 2349 VulkanHandle result = {0}; 2350 DeferLoop(take_lock(&vulkan_context->arena_lock, -1), release_lock(&vulkan_context->arena_lock)) 2351 { 2352 Arena arena = vulkan_context->arena; 2353 2354 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Pipeline); 2355 result = (VulkanHandle){(u64)e}; 2356 2357 if (count == 2) e->as.pipeline = vk_graphics_pipeline_from_infos(arena, infos, count, push_constants_size); 2358 else e->as.pipeline = vk_compute_pipeline_from_shader_text(arena, infos[0].text, infos[0].name, push_constants_size); 2359 } 2360 return result; 2361 } 2362 2363 DEBUG_IMPORT b32 2364 vk_pipeline_valid(VulkanHandle h) 2365 { 2366 b32 result = 0; 2367 if ValidVulkanHandle(h) { 2368 VulkanPipeline *vp = vk_entity_data(h, VulkanEntityKind_Pipeline); 2369 if (vp->stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) 2370 result = vp->pipeline != vulkan_context->default_compute_pipeline.pipeline; 2371 else 2372 result = vp->pipeline != vulkan_context->default_graphics_pipeline.pipeline; 2373 } 2374 return result; 2375 } 2376 2377 DEBUG_IMPORT void 2378 vk_pipeline_release(VulkanHandle h) 2379 { 2380 if (vk_pipeline_valid(h)) { 2381 VulkanEntity *e = (VulkanEntity *)h.value[0]; 2382 VulkanTimeline timeline; 2383 if (e->as.pipeline.stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) timeline = VulkanTimeline_Compute; 2384 else timeline = VulkanTimeline_Graphics; 2385 2386 // NOTE(rnp): block more command buffers from being recorded 2387 VulkanCommandPool *vcp = vulkan_context->command_pools[timeline]; 2388 DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) { 2389 u32 index = (vcp->next_index - 1) % countof(vcp->buffers); 2390 vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2391 vkDestroyPipeline(vulkan_context->device, e->as.pipeline.pipeline, 0); 2392 vkDestroyPipelineLayout(vulkan_context->device, e->as.pipeline.layout, 0); 2393 2394 if (&e->as.pipeline == vcp->bound_pipeline) 2395 vcp->bound_pipeline = 0; 2396 } 2397 vk_entity_release(e); 2398 } 2399 } 2400 2401 DEBUG_IMPORT void 2402 vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count) 2403 { 2404 VulkanContext *vk = vulkan_context; 2405 2406 VkWriteDescriptorSet write_sets[BeamformerShaderResourceKind_Count] = {0}; 2407 2408 for EachIndex(info_count, it) { 2409 switch (infos[it].kind) { 2410 case BeamformerShaderResourceKind_Buffer:{ 2411 VulkanBuffer *vb = vk_entity_data(infos[it].handle, VulkanEntityKind_Buffer); 2412 vk->descriptor_buffer_infos[infos[it].slot].buffer = vb->buffer; 2413 vk->descriptor_buffer_infos[infos[it].slot].offset = 0; 2414 vk->descriptor_buffer_infos[infos[it].slot].range = vb->memory_size; 2415 }break; 2416 2417 InvalidDefaultCase; 2418 } 2419 } 2420 2421 write_sets[BeamformerShaderResourceKind_Buffer].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; 2422 write_sets[BeamformerShaderResourceKind_Buffer].dstSet = vk->descriptor_sets[BeamformerShaderResourceKind_Buffer]; 2423 write_sets[BeamformerShaderResourceKind_Buffer].dstBinding = 0; 2424 write_sets[BeamformerShaderResourceKind_Buffer].descriptorCount = countof(vk->descriptor_buffer_infos); 2425 write_sets[BeamformerShaderResourceKind_Buffer].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 2426 write_sets[BeamformerShaderResourceKind_Buffer].pBufferInfo = vk->descriptor_buffer_infos; 2427 2428 vkUpdateDescriptorSets(vk->device, countof(write_sets), write_sets, 0, 0); 2429 } 2430 2431 DEBUG_IMPORT VulkanHandle 2432 vk_command_begin(VulkanTimeline timeline) 2433 { 2434 VulkanHandle result = {0}; 2435 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2436 VulkanContext *vk = vulkan_context; 2437 VulkanCommandPool *vcp = vk->command_pools[timeline]; 2438 2439 take_lock(&vcp->lock, -1); 2440 2441 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_CommandBuffer); 2442 VulkanCommandBuffer *vcb = &e->as.command_buffer; 2443 vcb->timeline = timeline; 2444 vcb->buffer_index = vcp->next_index++ % countof(vcp->buffers); 2445 2446 u32 index = vcb->buffer_index; 2447 // TODO(rnp): probably not the best to have this here but it will likely not be hit 2448 b32 wait_result = vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2449 assert(wait_result); 2450 2451 vcp->queries_occupied[index] = 0; 2452 2453 VkCommandBufferBeginInfo buffer_begin_info = { 2454 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 2455 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 2456 }; 2457 2458 vkBeginCommandBuffer(vcp->buffers[index], &buffer_begin_info); 2459 vkCmdResetQueryPool(vcp->buffers[index], vcp->query_pool, index * MaxCommandBufferTimestamps, 2460 MaxCommandBufferTimestamps); 2461 2462 result = (VulkanHandle){(u64)e}; 2463 } 2464 return result; 2465 } 2466 2467 DEBUG_IMPORT void 2468 vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline) 2469 { 2470 if ValidVulkanHandle(command) { 2471 VulkanContext *vk = vulkan_context; 2472 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2473 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2474 2475 VulkanPipeline *vp = 0; 2476 if ValidVulkanHandle(pipeline) { 2477 vp = vk_entity_data(pipeline, VulkanEntityKind_Pipeline); 2478 } else if (vcb->timeline == VulkanTimeline_Compute) { 2479 vp = &vk->default_compute_pipeline; 2480 } else if (vcb->timeline == VulkanTimeline_Graphics) { 2481 vp = &vk->default_graphics_pipeline; 2482 } else { 2483 InvalidCodePath; 2484 } 2485 2486 read_only local_persist VkPipelineBindPoint bind_point_lut[VulkanTimeline_Count] = { 2487 [VulkanTimeline_Graphics] = VK_PIPELINE_BIND_POINT_GRAPHICS, 2488 [VulkanTimeline_Compute] = VK_PIPELINE_BIND_POINT_COMPUTE, 2489 [VulkanTimeline_Transfer] = -1, 2490 }; 2491 2492 VkPipelineBindPoint bind_point = bind_point_lut[vcb->timeline]; 2493 assert(bind_point != (VkPipelineBindPoint)-1); 2494 2495 vkCmdBindPipeline(vcp->buffers[vcb->buffer_index], bind_point, vp->pipeline); 2496 vkCmdBindDescriptorSets(vcp->buffers[vcb->buffer_index], bind_point, vp->layout, 2497 0, countof(vk->descriptor_sets), vk->descriptor_sets, 0, 0); 2498 vcp->bound_pipeline = vp; 2499 } 2500 } 2501 2502 DEBUG_IMPORT void 2503 vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count) 2504 { 2505 if ValidVulkanHandle(command) { 2506 VulkanContext *vk = vulkan_context; 2507 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2508 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2509 VulkanQueue *vq = vk->queues[vcb->timeline]; 2510 2511 DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) 2512 { 2513 Arena arena = vk->arena; 2514 u32 valid_count = 0; 2515 VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count); 2516 for (u64 it = 0; it < count; it++) { 2517 if ValidVulkanHandle(barriers[it].gpu_buffer->handle) { 2518 u32 index = valid_count++; 2519 VulkanBuffer *vb = vk_entity_data(barriers[it].gpu_buffer->handle, VulkanEntityKind_Buffer); 2520 memory_barriers[index].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2; 2521 memory_barriers[index].srcStageMask = vq->pipeline_stage_flags; 2522 memory_barriers[index].srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT; 2523 memory_barriers[index].dstStageMask = vq->pipeline_stage_flags; 2524 memory_barriers[index].dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT; 2525 memory_barriers[index].srcQueueFamilyIndex = vq->queue_family; 2526 memory_barriers[index].dstQueueFamilyIndex = vq->queue_family; 2527 memory_barriers[index].buffer = vb->buffer; 2528 memory_barriers[index].offset = barriers[it].offset; 2529 memory_barriers[index].size = barriers[it].size; 2530 } 2531 } 2532 2533 VkDependencyInfo dependancy_info = { 2534 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, 2535 .bufferMemoryBarrierCount = valid_count, 2536 .pBufferMemoryBarriers = memory_barriers, 2537 }; 2538 2539 vkCmdPipelineBarrier2(vcp->buffers[vcb->buffer_index], &dependancy_info); 2540 } 2541 } 2542 } 2543 2544 DEBUG_IMPORT void 2545 vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch) 2546 { 2547 assert(dispatch.x <= U16_MAX); 2548 assert(dispatch.y <= U16_MAX); 2549 assert(dispatch.z <= U16_MAX); 2550 if ValidVulkanHandle(command) { 2551 VkCommandBuffer cmd = vk_command_buffer(command); 2552 vkCmdDispatch(cmd, dispatch.x, dispatch.y, dispatch.z); 2553 } 2554 } 2555 2556 DEBUG_IMPORT void 2557 vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values) 2558 { 2559 if ValidVulkanHandle(command) { 2560 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2561 VulkanCommandPool *vcp = vulkan_context->command_pools[vcb->timeline]; 2562 VulkanPipeline *vp = vcp->bound_pipeline; 2563 2564 assert(vp); 2565 2566 vkCmdPushConstants(vcp->buffers[vcb->buffer_index], vp->layout, vp->stage_flags, offset, size, values); 2567 } 2568 } 2569 2570 DEBUG_IMPORT void 2571 vk_command_timestamp(VulkanHandle command) 2572 { 2573 if ValidVulkanHandle(command) { 2574 VulkanContext *vk = vulkan_context; 2575 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2576 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2577 2578 read_only local_persist VkPipelineStageFlags2 stage_lut[VulkanTimeline_Count] = { 2579 [VulkanTimeline_Graphics] = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT, 2580 [VulkanTimeline_Compute] = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, 2581 [VulkanTimeline_Transfer] = -1, 2582 }; 2583 2584 VkPipelineStageFlags2 stage = stage_lut[vcb->timeline]; 2585 assert(stage != (VkPipelineStageFlags2)-1); 2586 2587 if (vcp->queries_occupied[vcb->buffer_index] < MaxCommandBufferTimestamps) { 2588 u32 query_index = vcp->queries_occupied[vcb->buffer_index]++; 2589 vkCmdWriteTimestamp2(vcp->buffers[vcb->buffer_index], stage, vcp->query_pool, 2590 vcb->buffer_index * MaxCommandBufferTimestamps + query_index); 2591 } 2592 } 2593 } 2594 2595 DEBUG_IMPORT void 2596 vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value) 2597 { 2598 if (ValidVulkanHandle(command) && Between(timeline, 0, VulkanTimeline_Count - 1)) { 2599 VulkanContext *vk = vulkan_context; 2600 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2601 2602 u32 wait_index = vk->queue_indices[timeline]; 2603 vcb->in_flight_wait_values[wait_index] = Max(value, vcb->in_flight_wait_values[wait_index]); 2604 } 2605 } 2606 2607 DEBUG_IMPORT u64 2608 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore) 2609 { 2610 u64 result = -1; 2611 if ValidVulkanHandle(command) { 2612 VulkanContext *vk = vulkan_context; 2613 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2614 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2615 VulkanQueue *vq = vk->queues[vcb->timeline]; 2616 VulkanSemaphore *vs = &vq->timeline_semaphore; 2617 2618 vkEndCommandBuffer(vcp->buffers[vcb->buffer_index]); 2619 2620 DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock)) { 2621 VkCommandBufferSubmitInfo command_buffer_submit_info = { 2622 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, 2623 .commandBuffer = vcp->buffers[vcb->buffer_index], 2624 }; 2625 2626 result = ++vs->value; 2627 2628 u32 signal_submit_info_count = 1; 2629 VkSemaphoreSubmitInfo signal_submit_infos[2] = {{ 2630 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2631 .semaphore = vs->semaphore, 2632 .value = result, 2633 .stageMask = vq->pipeline_stage_flags, 2634 }}; 2635 2636 if ValidVulkanHandle(finished_semaphore) { 2637 VulkanSemaphore *fs = vk_entity_data(finished_semaphore, VulkanEntityKind_Semaphore); 2638 signal_submit_infos[signal_submit_info_count++] = (VkSemaphoreSubmitInfo){ 2639 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2640 .semaphore = fs->semaphore, 2641 .stageMask = vq->pipeline_stage_flags, 2642 }; 2643 } 2644 2645 u32 wait_submit_info_count = 0; 2646 VkSemaphoreSubmitInfo wait_submit_infos[VulkanQueueKind_Count + 1]; 2647 for (u32 i = 0; i < vk->unique_queues; i++) { 2648 u32 queue_index = vk->queue_indices[i]; 2649 if (vcb->in_flight_wait_values[queue_index] > 0) { 2650 VulkanQueue *q = vk->queues[queue_index]; 2651 VkSemaphoreSubmitInfo wait_ssi = { 2652 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2653 .semaphore = q->timeline_semaphore.semaphore, 2654 .value = vcb->in_flight_wait_values[queue_index], 2655 .stageMask = q->pipeline_stage_flags, 2656 }; 2657 wait_submit_infos[wait_submit_info_count++] = wait_ssi; 2658 } 2659 } 2660 2661 if ValidVulkanHandle(wait_semaphore) { 2662 VulkanSemaphore *ws = vk_entity_data(wait_semaphore, VulkanEntityKind_Semaphore); 2663 wait_submit_infos[wait_submit_info_count++] = (VkSemaphoreSubmitInfo){ 2664 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2665 .semaphore = ws->semaphore, 2666 .stageMask = vq->pipeline_stage_flags, 2667 }; 2668 } 2669 2670 VkSubmitInfo2 submit_info = { 2671 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, 2672 .commandBufferInfoCount = 1, 2673 .pCommandBufferInfos = &command_buffer_submit_info, 2674 .waitSemaphoreInfoCount = wait_submit_info_count, 2675 .pWaitSemaphoreInfos = wait_submit_infos, 2676 .signalSemaphoreInfoCount = signal_submit_info_count, 2677 .pSignalSemaphoreInfos = signal_submit_infos, 2678 }; 2679 2680 vkQueueSubmit2(vq->queue, 1, &submit_info, 0); 2681 2682 vcp->bound_pipeline = 0; 2683 2684 atomic_store_u64(vcp->submission_values + vcb->buffer_index, result); 2685 } 2686 2687 release_lock(&vcp->lock); 2688 2689 vk_entity_release((VulkanEntity *)command.value[0]); 2690 } 2691 return result; 2692 } 2693 2694 DEBUG_IMPORT void 2695 vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *depth, GPUImage *resolve) 2696 { 2697 if ValidVulkanHandle(command) { 2698 VkCommandBuffer cmd = vk_command_buffer(command); 2699 2700 assert((colour->width == depth->width) && (colour->height == depth->height)); 2701 2702 VulkanImage *ci = vk_entity_data(colour->image, VulkanEntityKind_Image); 2703 VulkanImage *di = vk_entity_data(depth->image, VulkanEntityKind_Image); 2704 VulkanImage *ri = 0; 2705 if (resolve) ri = vk_entity_data(resolve->image, VulkanEntityKind_Image); 2706 2707 // NOTE: Layout Transitions 2708 { 2709 u32 image_memory_barrier_count = 2; 2710 VkImageMemoryBarrier2 image_memory_barriers[3] = { 2711 { 2712 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2713 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, 2714 .srcAccessMask = 0, 2715 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, 2716 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 2717 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2718 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2719 .image = ci->image, 2720 .subresourceRange = { 2721 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2722 .baseMipLevel = 0, 2723 .levelCount = 1, 2724 .baseArrayLayer = 0, 2725 .layerCount = 1, 2726 }, 2727 }, 2728 { 2729 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2730 .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, 2731 .srcAccessMask = 0, 2732 .dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, 2733 .dstAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 2734 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2735 .newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 2736 .image = di->image, 2737 .subresourceRange = { 2738 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT, 2739 .baseMipLevel = 0, 2740 .levelCount = 1, 2741 .baseArrayLayer = 0, 2742 .layerCount = 1, 2743 }, 2744 }, 2745 }; 2746 2747 if (resolve) image_memory_barriers[image_memory_barrier_count++] = (VkImageMemoryBarrier2){ 2748 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2749 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, 2750 .srcAccessMask = 0, 2751 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT|VK_PIPELINE_STAGE_2_RESOLVE_BIT, 2752 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 2753 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2754 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2755 .image = ri->image, 2756 .subresourceRange = { 2757 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2758 .baseMipLevel = 0, 2759 .levelCount = 1, 2760 .baseArrayLayer = 0, 2761 .layerCount = 1, 2762 }, 2763 }; 2764 2765 VkDependencyInfo dependency_info = { 2766 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, 2767 .imageMemoryBarrierCount = image_memory_barrier_count, 2768 .pImageMemoryBarriers = image_memory_barriers, 2769 }; 2770 2771 vkCmdPipelineBarrier2(cmd, &dependency_info); 2772 } 2773 2774 VkRenderingAttachmentInfo colour_attachment = { 2775 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, 2776 .imageView = ci->view, 2777 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2778 .resolveMode = ri ? VK_RESOLVE_MODE_AVERAGE_BIT : 0, 2779 .resolveImageView = ri ? ri->view : 0, 2780 .resolveImageLayout = ri ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : 0, 2781 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, 2782 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 2783 .clearValue = {.color = {{0.0f, 0.0f, 0.0f, 0.0f}}}, 2784 }; 2785 2786 VkRenderingAttachmentInfo depth_stencil_attachment = { 2787 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, 2788 .imageView = di->view, 2789 .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 2790 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, 2791 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 2792 .clearValue = {.depthStencil = {1.0f, 0}}, 2793 }; 2794 2795 VkRenderingInfo rendering_info = { 2796 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, 2797 .renderArea = {.offset = {0}, .extent = {colour->width, colour->height}}, 2798 .layerCount = 1, 2799 .colorAttachmentCount = 1, 2800 .pColorAttachments = &colour_attachment, 2801 .pDepthAttachment = &depth_stencil_attachment, 2802 .pStencilAttachment = &depth_stencil_attachment, 2803 }; 2804 2805 vkCmdBeginRendering(cmd, &rendering_info); 2806 } 2807 } 2808 2809 DEBUG_IMPORT void 2810 vk_command_draw(VulkanHandle command, GPUBuffer *model) 2811 { 2812 if (ValidVulkanHandle(command) && ValidVulkanHandle(model->handle)) { 2813 VkCommandBuffer cmd = vk_command_buffer(command); 2814 VulkanBuffer *vb = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); 2815 vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type); 2816 vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0); 2817 } 2818 } 2819 2820 DEBUG_IMPORT void 2821 vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset) 2822 { 2823 if ValidVulkanHandle(command) { 2824 VkCommandBuffer cmd = vk_command_buffer(command); 2825 VkRect2D scissor = {.offset = {x_offset, y_offset}, .extent = {width, height}}; 2826 vkCmdSetScissor(cmd, 0, 1, &scissor); 2827 } 2828 } 2829 2830 DEBUG_IMPORT void 2831 vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth) 2832 { 2833 if ValidVulkanHandle(command) { 2834 VkCommandBuffer cmd = vk_command_buffer(command); 2835 VkViewport viewport = {x_offset, y_offset, width, height, min_depth, max_depth}; 2836 vkCmdSetViewport(cmd, 0, 1, &viewport); 2837 } 2838 } 2839 2840 DEBUG_IMPORT void 2841 vk_command_end_rendering(VulkanHandle command) 2842 { 2843 if ValidVulkanHandle(command) vkCmdEndRendering(vk_command_buffer(command)); 2844 } 2845 2846 DEBUG_IMPORT void 2847 vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, 2848 GPUBuffer *restrict source, u64 source_offset, i64 size) 2849 { 2850 if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->handle) && ValidVulkanHandle(source->handle)) { 2851 VkCommandBuffer cmd = vk_command_buffer(command); 2852 VulkanBuffer *db = vk_entity_data(destination->handle, VulkanEntityKind_Buffer); 2853 VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); 2854 2855 VkBufferCopy2 buffer_copy = { 2856 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2, 2857 .srcOffset = source_offset, 2858 .dstOffset = 0, 2859 .size = size, 2860 }; 2861 2862 VkCopyBufferInfo2 copy_buffer_info = { 2863 .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2, 2864 .srcBuffer = sb->buffer, 2865 .dstBuffer = db->buffer, 2866 .regionCount = 1, 2867 .pRegions = &buffer_copy, 2868 }; 2869 2870 vkCmdCopyBuffer2(cmd, ©_buffer_info); 2871 } 2872 } 2873 2874 DEBUG_IMPORT u64 * 2875 vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena) 2876 { 2877 u64 *result = 0; 2878 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2879 VulkanContext *vk = vulkan_context; 2880 VulkanCommandPool *vcp = vk->command_pools[timeline]; 2881 DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) { 2882 u32 index = (vcp->next_index - 1) % countof(vcp->buffers); 2883 u32 count = vcp->queries_occupied[index]; 2884 if (count > 0) { 2885 result = push_array(arena, u64, count + 1); 2886 result[0] = count; 2887 2888 vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2889 2890 vkGetQueryPoolResults(vk->device, vcp->query_pool, index * MaxCommandBufferTimestamps, count, 2891 count * sizeof(u64), result + 1, 8, VK_QUERY_RESULT_WAIT_BIT); 2892 } 2893 } 2894 } else { 2895 result = push_array(arena, u64, 1); 2896 } 2897 return result; 2898 }