ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

vulkan.c (102619B)


      1 /* See LICENSE for license details. */
      2 // TODO(rnp)
      3 // [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays
      4 // [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float
      5 // [ ]: VK_KHR_robustness2 probably shouldn't be required but it also might not matter
      6 
      7 #include "beamformer_internal.h"
      8 #include "vulkan.h"
      9 #include "external/glslang/glslang/Include/glslang_c_interface.h"
     10 
     11 #define ForceSingleQueue (0)
     12 
     13 #define glslang_info(s) s8("[glslang] " s)
     14 #define vulkan_info(s)  s8("[vulkan]  " s)
     15 
     16 #define ValidVulkanHandle(h) ((h).value[0] != 0)
     17 
     18 #define MaxCommandBuffersInFlight  BeamformerMaxRawDataFramesInFlight
     19 #define MaxCommandBufferTimestamps (1024)
     20 
     21 typedef enum {
     22 	VulkanQueueKind_Graphics,
     23 	VulkanQueueKind_Compute,
     24 	VulkanQueueKind_Transfer,
     25 	VulkanQueueKind_Count,
     26 } VulkanQueueKind;
     27 
     28 typedef enum {
     29 	VulkanMemoryKind_Device,
     30 	VulkanMemoryKind_BAR,
     31 	VulkanMemoryKind_Host,
     32 	VulkanMemoryKind_Count,
     33 } VulkanMemoryKind;
     34 
     35 typedef struct {
     36 	VkDeviceMemory    memory;
     37 	VkBuffer          buffer;
     38 	u64               memory_size;
     39 
     40 	void *            host_pointer;
     41 
     42 	VulkanMemoryKind  memory_kind;
     43 
     44 	// NOTE: only used when the buffer is backing a VulkanRenderModel.
     45 	VkIndexType       index_type;
     46 } VulkanBuffer;
     47 
     48 typedef struct {
     49 	VkDeviceMemory    memory;
     50 	VkImage           image;
     51 	VkImageView       view;
     52 } VulkanImage;
     53 
     54 typedef struct {
     55 	VkPipeline         pipeline;
     56 	VkPipelineLayout   layout;
     57 	VkShaderStageFlags stage_flags;
     58 } VulkanPipeline;
     59 
     60 typedef struct {
     61 	VkSemaphore semaphore;
     62 	u64         value;
     63 } VulkanSemaphore;
     64 
     65 typedef struct {
     66 	VulkanTimeline timeline;
     67 	u32            buffer_index;
     68 
     69 	// NOTE(rnp): since there may not be QueueKind_Count queues, when putting values into this
     70 	// array you must be careful to map through the queue_indices array in the vulkan_context.
     71 	u64 in_flight_wait_values[VulkanQueueKind_Count];
     72 } VulkanCommandBuffer;
     73 
     74 typedef enum {
     75 	VulkanEntityKind_Buffer,
     76 	VulkanEntityKind_CommandBuffer,
     77 	VulkanEntityKind_Image,
     78 	VulkanEntityKind_Pipeline,
     79 	VulkanEntityKind_RenderModel,
     80 	VulkanEntityKind_Semaphore,
     81 } VulkanEntityKind;
     82 
     83 typedef struct VulkanEntity VulkanEntity;
     84 struct VulkanEntity {
     85 	VulkanEntity *   next;
     86 	VulkanEntityKind kind;
     87 	union {
     88 		VulkanBuffer        buffer;
     89 		VulkanCommandBuffer command_buffer;
     90 		VulkanImage         image;
     91 		VulkanPipeline      pipeline;
     92 		VulkanSemaphore     semaphore;
     93 	} as;
     94 };
     95 
     96 typedef alignas(64) struct {
     97 	i32 lock;
     98 
     99 	u16     queue_family;
    100 	u16     queue_index;
    101 	VkQueue queue;
    102 
    103 	VulkanSemaphore timeline_semaphore;
    104 
    105 	VkPipelineStageFlags2 pipeline_stage_flags;
    106 } VulkanQueue;
    107 static_assert(alignof(VulkanQueue) == 64, "VulkanQueue must be placed on its own cacheline");
    108 
    109 typedef alignas(64) struct {
    110 	i32             lock;
    111 	u32             next_index;
    112 
    113 	VulkanPipeline *bound_pipeline;
    114 
    115 	VkCommandPool   handle;
    116 	VkQueryPool     query_pool;
    117 	VkCommandBuffer buffers[MaxCommandBuffersInFlight];
    118 
    119 	u64             submission_values[MaxCommandBuffersInFlight];
    120 	u32             queries_occupied[MaxCommandBuffersInFlight];
    121 } VulkanCommandPool;
    122 
    123 typedef struct {
    124 	Arena             arena;
    125 	i32               arena_lock;
    126 
    127 	VkInstance        handle;
    128 	VkDevice          device;
    129 	VkPhysicalDevice  physical_device;
    130 
    131 	VkDescriptorPool       descriptor_pool;
    132 	VkDescriptorSetLayout  descriptor_set_layouts[BeamformerShaderResourceKind_Count];
    133 	VkDescriptorSet        descriptor_sets[BeamformerShaderResourceKind_Count];
    134 	// NOTE(rnp): must store these if we want to allow partial updates easily
    135 	VkDescriptorBufferInfo descriptor_buffer_infos[BeamformerShaderBufferSlot_Count];
    136 
    137 	// NOTE(rnp): fallback for when a shader fails to compile
    138 	VulkanPipeline    default_compute_pipeline;
    139 	VulkanPipeline    default_graphics_pipeline;
    140 
    141 	GPUInfo           gpu_info;
    142 
    143 	struct {
    144 		u64             max_allocation_size;
    145 		u64             non_coherent_atom_size;
    146 		u8              gpu_heap_index;
    147 		i8              memory_type_indices[VulkanMemoryKind_Count];
    148 		b8              memory_host_coherent[VulkanMemoryKind_Count];
    149 		static_assert(VK_MAX_MEMORY_HEAPS < I8_MAX, "");
    150 		static_assert(VK_MAX_MEMORY_TYPES < U8_MAX, "");
    151 	} memory_info;
    152 
    153 	VulkanCommandPool * command_pools[VulkanTimeline_Count];
    154 	VulkanQueue *       queues[VulkanQueueKind_Count];
    155 	// NOTE(rnp): there are a few places in the code where simply going through the queues map
    156 	// is not sufficient. those places need to know of the unique queues which unique queue
    157 	// is being referred to. that code uses this map instead.
    158 	u16               queue_indices[VulkanQueueKind_Count];
    159 	u16               unique_queues;
    160 
    161 	VkFormat          swap_chain_image_format;
    162 	VkFormat          depth_stencil_format;
    163 
    164 	VulkanEntity *    entity_freelist;
    165 	Arena             entity_arena;
    166 	i32               entity_lock;
    167 } VulkanContext;
    168 
    169 read_only global const char *vk_required_instance_extensions[] = {
    170 };
    171 
    172 #if OS_WINDOWS
    173 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \
    174 	X("VK_KHR_external_memory_win32") \
    175 	X("VK_KHR_external_semaphore_win32") \
    176 
    177 #else
    178 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \
    179 	X("VK_KHR_external_memory_fd") \
    180 	X("VK_KHR_external_semaphore_fd") \
    181 
    182 #endif
    183 
    184 #define VK_REQUIRED_DEVICE_EXTENSIONS_LIST \
    185 	X("VK_KHR_16bit_storage") \
    186 	X("VK_KHR_external_memory") \
    187 	X("VK_KHR_external_semaphore") \
    188 	X("VK_KHR_robustness2") \
    189 	X("VK_KHR_storage_buffer_storage_class") \
    190 	X("VK_KHR_timeline_semaphore") \
    191 	VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST
    192 
    193 #define X(str) s8_comp(str),
    194 read_only global s8 vk_required_device_extensions[] = {VK_REQUIRED_DEVICE_EXTENSIONS_LIST};
    195 #undef X
    196 
    197 #define VK_OPTIONAL_DEVICE_EXTENSIONS_LIST \
    198 	X(VK_KHR, cooperative_matrix) \
    199 
    200 #define X(p, s, ...) s8_comp(#p "_" #s),
    201 read_only global s8 vk_optional_device_extensions[] = {VK_OPTIONAL_DEVICE_EXTENSIONS_LIST};
    202 #undef X
    203 
    204 #define VK_REQUIRED_PHYSICAL_FEATURES \
    205 	X(shaderInt16) \
    206 	X(shaderInt64) \
    207 
    208 #define VK_REQUIRED_PHYSICAL_11_FEATURES \
    209 	X(storageBuffer16BitAccess) \
    210 
    211 #define VK_REQUIRED_PHYSICAL_12_FEATURES \
    212 	X(bufferDeviceAddress) \
    213 	X(shaderFloat16) \
    214 	X(timelineSemaphore) \
    215 	X(vulkanMemoryModel) \
    216 
    217 #define VK_REQUIRED_PHYSICAL_13_FEATURES \
    218 	X(dynamicRendering) \
    219 	X(synchronization2) \
    220 
    221 #define VK_DEBUG_EXTENSIONS \
    222 	X(VK_KHR, shader_non_semantic_info) \
    223 	X(VK_KHR, shader_relaxed_extended_instruction) \
    224 
    225 #define X(p, s, ...) s8_comp(#p "_" #s),
    226 read_only global s8 vk_debug_extensions[] = {VK_DEBUG_EXTENSIONS};
    227 #undef X
    228 
    229 #define VK_INSTANCE_DEBUG_EXTENSIONS_LIST \
    230 	X(VK_EXT, debug_utils) \
    231 
    232 #define X(p, s, ...) s8_comp(#p "_" #s),
    233 read_only global s8 vk_instance_debug_extensions[] = {VK_INSTANCE_DEBUG_EXTENSIONS_LIST};
    234 #undef X
    235 
    236 #if BEAMFORMER_DEBUG
    237 #define VK_VALIDATION_LAYERS_LIST \
    238 	X(KHRONOS, validation) \
    239 
    240 #else
    241 #define VK_VALIDATION_LAYERS_LIST
    242 #endif
    243 
    244 read_only global str8 vk_validation_layers[] = {
    245 	#define X(vendor, name, ...) str8_comp("VK_LAYER_" #vendor "_" #name),
    246 	VK_VALIDATION_LAYERS_LIST
    247 	#undef X
    248 };
    249 
    250 global struct {
    251 	u32 driver_api_version;
    252 	union {
    253 		struct {
    254 			#define X(_, name, ...) b8 name;
    255 			VK_OPTIONAL_DEVICE_EXTENSIONS_LIST
    256 			#undef X
    257 		};
    258 		b8 E[countof(vk_optional_device_extensions)];
    259 	} optional;
    260 
    261 	union {
    262 		struct {
    263 			#define X(_, name, ...) b8 name;
    264 			VK_DEBUG_EXTENSIONS
    265 			#undef X
    266 		};
    267 		b8 E[countof(vk_debug_extensions)];
    268 	} debug;
    269 
    270 	union {
    271 		struct {
    272 			#define X(_, name, ...) b8 name;
    273 			VK_INSTANCE_DEBUG_EXTENSIONS_LIST
    274 			#undef X
    275 		};
    276 		b8 E[countof(vk_instance_debug_extensions)];
    277 	} instance;
    278 
    279 	#if BEAMFORMER_DEBUG
    280 	struct {
    281 		union {
    282 			struct {
    283 				#define X(_, name, ...) b8 name;
    284 				VK_VALIDATION_LAYERS_LIST
    285 				#undef X
    286 			};
    287 			b8 E[countof(vk_validation_layers)];
    288 		} enabled;
    289 
    290 		union {
    291 			struct {
    292 				#define X(_, name, ...) u32 name;
    293 				VK_VALIDATION_LAYERS_LIST
    294 				#undef X
    295 			};
    296 			u32 E[countof(vk_validation_layers)];
    297 		} version;
    298 	} layers;
    299 	#endif
    300 } vulkan_config;
    301 
    302 #define MAX_ENABLED_EXTENSIONS (  countof(vk_required_device_extensions) \
    303                                 + countof(vk_optional_device_extensions) \
    304                                 + countof(vk_debug_extensions) \
    305                                )
    306 
    307 global VulkanContext vulkan_context[1];
    308 
    309 /* NOTE(rnp): the idea here is to set reasonable development constraints.
    310  * They should probably not match one to one with the maximums of the dev
    311  * machine's hardware. Instead these are here to cause compile time failure
    312  * for features which are not expected to work everywhere. */
    313 global glslang_resource_t glslc_resource_constraints[1] = {{
    314 	.max_compute_work_group_count_x = 65535,
    315 	.max_compute_work_group_count_y = 65535,
    316 	.max_compute_work_group_count_z = 65535,
    317 	.max_compute_work_group_size_x  = 1024,
    318 	.max_compute_work_group_size_y  = 1024,
    319 	.max_compute_work_group_size_z  = 1024,
    320 
    321 	// NOTE: taken from glslang defaults
    322 	.max_lights = 32,
    323 	.max_clip_planes = 6,
    324 	.max_texture_units = 32,
    325 	.max_texture_coords = 32,
    326 	.max_vertex_attribs = 64,
    327 	.max_vertex_uniform_components = 4096,
    328 	.max_varying_floats = 64,
    329 	.max_vertex_texture_image_units = 32,
    330 	.max_combined_texture_image_units = 80,
    331 	.max_texture_image_units = 32,
    332 	.max_fragment_uniform_components = 4096,
    333 	.max_draw_buffers = 32,
    334 	.max_vertex_uniform_vectors = 128,
    335 	.max_varying_vectors = 8,
    336 	.max_fragment_uniform_vectors = 16,
    337 	.max_vertex_output_vectors = 16,
    338 	.max_fragment_input_vectors = 15,
    339 	.min_program_texel_offset = -8,
    340 	.max_program_texel_offset = 7,
    341 	.max_clip_distances = 8,
    342 	.max_compute_uniform_components = 1024,
    343 	.max_compute_texture_image_units = 16,
    344 	.max_compute_image_uniforms = 8,
    345 	.max_compute_atomic_counters = 8,
    346 	.max_compute_atomic_counter_buffers = 1,
    347 	.max_varying_components = 60,
    348 	.max_vertex_output_components = 64,
    349 	.max_fragment_input_components = 128,
    350 	.max_image_units = 8,
    351 	.max_combined_image_units_and_fragment_outputs = 8,
    352 	.max_combined_shader_output_resources = 8,
    353 	.max_image_samples = 0,
    354 	.max_vertex_image_uniforms = 0,
    355 	.max_fragment_image_uniforms = 8,
    356 	.max_combined_image_uniforms = 8,
    357 	.max_viewports = 16,
    358 	.max_vertex_atomic_counters = 0,
    359 	.max_fragment_atomic_counters = 8,
    360 	.max_combined_atomic_counters = 8,
    361 	.max_atomic_counter_bindings = 1,
    362 	.max_vertex_atomic_counter_buffers = 0,
    363 	.max_fragment_atomic_counter_buffers = 1,
    364 	.max_combined_atomic_counter_buffers = 1,
    365 	.max_atomic_counter_buffer_size = 16384,
    366 	.max_transform_feedback_buffers = 4,
    367 	.max_transform_feedback_interleaved_components = 64,
    368 	.max_cull_distances = 8,
    369 	.max_combined_clip_and_cull_distances = 8,
    370 	.max_samples = 4,
    371 	.max_mesh_output_vertices_ext = 256,
    372 	.max_mesh_output_primitives_ext = 256,
    373 	.max_mesh_work_group_size_x_ext = 128,
    374 	.max_mesh_work_group_size_y_ext = 128,
    375 	.max_mesh_work_group_size_z_ext = 128,
    376 	.max_task_work_group_size_x_ext = 128,
    377 	.max_task_work_group_size_y_ext = 128,
    378 	.max_task_work_group_size_z_ext = 128,
    379 	.max_mesh_view_count_ext = 4,
    380 	.max_dual_source_draw_buffers_ext = 1,
    381 
    382 	.limits = {
    383 		.non_inductive_for_loops                  = 1,
    384 		.while_loops                              = 1,
    385 		.do_while_loops                           = 1,
    386 		.general_uniform_indexing                 = 1,
    387 		.general_attribute_matrix_vector_indexing = 1,
    388 		.general_varying_indexing                 = 1,
    389 		.general_sampler_indexing                 = 1,
    390 		.general_variable_indexing                = 1,
    391 		.general_constant_matrix_vector_indexing  = 1,
    392 	},
    393 }};
    394 
    395 #if BEAMFORMER_RENDERDOC_HOOKS
    396 DEBUG_IMPORT void *
    397 vk_renderdoc_instance_handle(void)
    398 {
    399 	return *((void **)vulkan_context->handle);
    400 }
    401 #endif
    402 
    403 #if BEAMFORMER_DEBUG
    404 #define vk_label_object(k, h, label, extra) vk_label_object_(VK_OBJECT_TYPE_##k, (u64)h, label, extra)
    405 function void
    406 vk_label_object_(VkObjectType kind, u64 handle, str8 label, str8 extra)
    407 {
    408 	local_persist u8 buffer[1024];
    409 	Stream sb = arena_stream(arena_from_memory(buffer, sizeof(buffer)));
    410 	if (vulkan_config.instance.debug_utils && label.length > 0) {
    411 		stream_append_s8s(&sb, s8_from_str8(label), s8(" ("), s8_from_str8(extra), s8(")"));
    412 		stream_append_byte(&sb, 0);
    413 		if (!sb.errors) {
    414 			VkDebugUtilsObjectNameInfoEXT object_name_info = {
    415 				.sType        = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
    416 				.objectType   = kind,
    417 				.objectHandle = handle,
    418 				.pObjectName  = (char *)sb.data,
    419 			};
    420 			vkSetDebugUtilsObjectNameEXT(vulkan_context->device, &object_name_info);
    421 		}
    422 	}
    423 }
    424 #else
    425 #define vk_label_object(...)
    426 #define vk_label_object_(...)
    427 #endif
    428 
    429 function VulkanEntity *
    430 vk_entity_allocate(VulkanEntityKind kind)
    431 {
    432 	VulkanEntity *result = 0;
    433 	DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock))
    434 	{
    435 		result = SLLPopFreelist(vulkan_context->entity_freelist);
    436 		if (!result) result = push_array_no_zero(&vulkan_context->entity_arena, VulkanEntity, 1);
    437 	}
    438 
    439 	zero_struct(result);
    440 	result->kind = kind;
    441 	return result;
    442 }
    443 
    444 function void
    445 vk_entity_release(VulkanEntity *entity)
    446 {
    447 	DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock))
    448 	{
    449 		SLLStackPush(vulkan_context->entity_freelist, entity, next);
    450 	}
    451 }
    452 
    453 function void *
    454 vk_entity_data(VulkanHandle h, VulkanEntityKind kind)
    455 {
    456 	VulkanEntity *e = (VulkanEntity *)h.value[0];
    457 	assert(ValidVulkanHandle(h) && e->kind == kind);
    458 	return &e->as;
    459 }
    460 
    461 function VkCommandBuffer
    462 vk_command_buffer(VulkanHandle h)
    463 {
    464 	VulkanCommandBuffer *vcb = vk_entity_data(h, VulkanEntityKind_CommandBuffer);
    465 	VulkanCommandPool   *vcp = vulkan_context->command_pools[vcb->timeline];
    466 	VkCommandBuffer result = vcp->buffers[vcb->buffer_index];
    467 	return result;
    468 }
    469 
    470 #define glslang_log(a, ...) glslang_log_(a, arg_list(s8, __VA_ARGS__))
    471 function void
    472 glslang_log_(Arena arena, s8 *items, uz count)
    473 {
    474 	Stream sb = arena_stream(arena);
    475 	stream_append_s8(&sb, glslang_info(""));
    476 	stream_append_s8s_(&sb, items, count);
    477 	if (sb.data[sb.widx - 1] != '\n') stream_append_byte(&sb, '\n');
    478 	os_console_log(sb.data, sb.widx);
    479 }
    480 
    481 function s8
    482 glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name)
    483 {
    484 	/* NOTE(rnp): glslang's garbage c interface doesn't expose internal usage of strings with length */
    485 	assert(shader_text.data[shader_text.len] == 0);
    486 
    487 	glslang_input_t input = {
    488 		.language                          = GLSLANG_SOURCE_GLSL,
    489 		.stage                             = kind,
    490 		.client                            = GLSLANG_CLIENT_VULKAN,
    491 		.client_version                    = GLSLANG_TARGET_VULKAN_1_4,
    492 		.target_language                   = GLSLANG_TARGET_SPV,
    493 		.target_language_version           = GLSLANG_TARGET_SPV_1_6,
    494 		.code                              = (c8 *)shader_text.data,
    495 		.default_version                   = 460,
    496 		.default_profile                   = GLSLANG_NO_PROFILE,
    497 		.force_default_version_and_profile = 0,
    498 		.forward_compatible                = 0,
    499 		.messages                          = GLSLANG_MSG_DEFAULT_BIT,
    500 		.resource                          = glslc_resource_constraints,
    501 	};
    502 	glslang_shader_t *shader = glslang_shader_create(&input);
    503 
    504 	s8 error = {0};
    505 	if (glslang_shader_preprocess(shader, &input)) {
    506 		if (!glslang_shader_parse(shader, &input))
    507 			error = s8("parsing failed");
    508 	} else {
    509 		error = s8("preprocessing failed");
    510 	}
    511 
    512 	if (error.len) {
    513 		glslang_log(*arena, name, s8(": "), error, s8("\n"),
    514 		            c_str_to_s8((c8 *)glslang_shader_get_info_log(shader)),
    515 		            c_str_to_s8((c8 *)glslang_shader_get_info_debug_log(shader)));
    516 		glslang_shader_delete(shader);
    517 		shader = 0;
    518 	}
    519 
    520 	s8 result = {0};
    521 	if (shader) {
    522 		glslang_program_t *program = glslang_program_create();
    523 		glslang_program_add_shader(program, shader);
    524 		i32 messages = GLSLANG_MSG_DEBUG_INFO_BIT|GLSLANG_MSG_SPV_RULES_BIT|GLSLANG_MSG_VULKAN_RULES_BIT;
    525 		if (glslang_program_link(program, messages)) {
    526 			glslang_spv_options_t options = {.validate = 1,};
    527 
    528 			if (vulkan_config.debug.shader_non_semantic_info &&
    529 			    vulkan_config.debug.shader_relaxed_extended_instruction)
    530 			{
    531 				options.generate_debug_info                  = 1;
    532 				options.emit_nonsemantic_shader_debug_info   = 1;
    533 				options.emit_nonsemantic_shader_debug_source = 1;
    534 			}
    535 
    536 			glslang_program_add_source_text(program, kind, (c8 *)shader_text.data, shader_text.len);
    537 			glslang_program_SPIRV_generate_with_options(program, kind, &options);
    538 
    539 			u32 words   = glslang_program_SPIRV_get_size(program);
    540 			result.data = (u8 *)push_array(arena, u32, words);
    541 			result.len  = words * sizeof(u32);
    542 			glslang_program_SPIRV_get(program, (u32 *)result.data);
    543 
    544 			s8 spirv_msg = c_str_to_s8((c8 *)glslang_program_SPIRV_get_messages(program));
    545 			if (spirv_msg.len) glslang_log(*arena, name, s8(": spirv info: "), spirv_msg);
    546 		} else {
    547 			glslang_log(*arena, name, s8(": shader linking failed\n"),
    548 			            c_str_to_s8((c8 *)glslang_program_get_info_log(program)),
    549 			            c_str_to_s8((c8 *)glslang_program_get_info_debug_log(program)));
    550 		}
    551 		glslang_shader_delete(shader);
    552 		glslang_program_delete(program);
    553 	}
    554 
    555 	return result;
    556 }
    557 
    558 function u32
    559 vk_shader_kind_to_glslang_shader_kind(u32 kind)
    560 {
    561 	u32 result = ctz_u64(kind);
    562 	return result;
    563 }
    564 
    565 function VkShaderModule
    566 vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name)
    567 {
    568 	VkShaderModule result = {0};
    569 	s8 spirv = glsl_to_spirv(&arena, vk_shader_kind_to_glslang_shader_kind(kind), text, name);
    570 	VkShaderModuleCreateInfo create_info = {
    571 		.sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    572 		.codeSize = (uz)spirv.len,
    573 		.pCode    = (u32 *)spirv.data,
    574 	};
    575 	if (spirv.len > 0) vkCreateShaderModule(vulkan_context->device, &create_info, 0, &result);
    576 
    577 	return result;
    578 }
    579 
    580 function VkShaderStageFlags
    581 vk_stage_flags_from_shader_kind(VulkanShaderKind kind)
    582 {
    583 	read_only local_persist VkShaderStageFlags map[VulkanShaderKind_Count + 1] = {
    584 		[VulkanShaderKind_Vertex]   = VK_SHADER_STAGE_VERTEX_BIT,
    585 		[VulkanShaderKind_Mesh]     = VK_SHADER_STAGE_MESH_BIT_EXT,
    586 		[VulkanShaderKind_Fragment] = VK_SHADER_STAGE_FRAGMENT_BIT,
    587 		[VulkanShaderKind_Compute]  = VK_SHADER_STAGE_COMPUTE_BIT,
    588 		[VulkanShaderKind_Count]    = 0,
    589 	};
    590 	VkShaderStageFlags result = map[Clamp((u32)kind, 0, VulkanShaderKind_Count)];
    591 	return result;
    592 }
    593 
    594 function VulkanPipeline
    595 vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_constants_size)
    596 {
    597 	VulkanPipeline result = {.stage_flags = VK_SHADER_STAGE_COMPUTE_BIT};
    598 	VkShaderModule module = vk_compile_shader_module(arena, VK_SHADER_STAGE_COMPUTE_BIT, text, name);
    599 	if (module) {
    600 		VkPushConstantRange push_constant_range = {
    601 			.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    602 			.offset     = 0,
    603 			.size       = push_constants_size,
    604 		};
    605 
    606 		VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
    607 			.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    608 			.setLayoutCount         = countof(vulkan_context->descriptor_set_layouts),
    609 			.pSetLayouts            = vulkan_context->descriptor_set_layouts,
    610 			.pushConstantRangeCount = push_constants_size ? 1 : 0,
    611 			.pPushConstantRanges    = push_constants_size ? &push_constant_range : 0,
    612 		};
    613 
    614 		vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_create_info, 0, &result.layout);
    615 
    616 		VkComputePipelineCreateInfo pipeline_create_info = {
    617 			.sType  = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
    618 			.layout = result.layout,
    619 			.stage  = {
    620 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    621 				.stage  = VK_SHADER_STAGE_COMPUTE_BIT,
    622 				.module = module,
    623 				.pName  = "main",
    624 			},
    625 		};
    626 
    627 		vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline);
    628 
    629 		vk_label_object(PIPELINE,        result.pipeline, str8_from_s8(name), str8("Pipeline"));
    630 		vk_label_object(PIPELINE_LAYOUT, result.layout,   str8_from_s8(name), str8("Pipeline Layout"));
    631 		vk_label_object(SHADER_MODULE,   module,          str8_from_s8(name), str8("Module"));
    632 
    633 		vkDestroyShaderModule(vulkan_context->device, module, 0);
    634 	}
    635 	if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline;
    636 
    637 	return result;
    638 }
    639 
    640 function VulkanPipeline
    641 vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
    642 {
    643 	assume(count == 2);
    644 
    645 	VulkanPipeline result = {0};
    646 	VkShaderModule modules[2];
    647 
    648 	modules[0] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[0].kind),
    649 	                                      infos[0].text, infos[0].name);
    650 	modules[1] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[1].kind),
    651 	                                      infos[1].text, infos[1].name);
    652 	if (modules[0] && modules[1]) {
    653 		result.stage_flags = vk_stage_flags_from_shader_kind(infos[0].kind)
    654 		                     | vk_stage_flags_from_shader_kind(infos[1].kind);
    655 
    656 		VkPushConstantRange pcr = {
    657 			.stageFlags = result.stage_flags,
    658 			.offset     = 0,
    659 			.size       = push_constants_size,
    660 		};
    661 
    662 		VkPipelineLayoutCreateInfo pipeline_layout_info = {
    663 			.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    664 			.setLayoutCount         = countof(vulkan_context->descriptor_set_layouts),
    665 			.pSetLayouts            = vulkan_context->descriptor_set_layouts,
    666 			.pushConstantRangeCount = push_constants_size ? 1    : 0,
    667 			.pPushConstantRanges    = push_constants_size ? &pcr : 0,
    668 		};
    669 
    670 		vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_info, 0, &result.layout);
    671 
    672 		VkPipelineShaderStageCreateInfo shader_stage_create_infos[2] = {
    673 			{
    674 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    675 				.stage  = vk_stage_flags_from_shader_kind(infos[0].kind),
    676 				.module = modules[0],
    677 				.pName  = "main",
    678 			},
    679 			{
    680 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    681 				.stage  = vk_stage_flags_from_shader_kind(infos[1].kind),
    682 				.module = modules[1],
    683 				.pName  = "main",
    684 			},
    685 		};
    686 
    687 		VkPipelineVertexInputStateCreateInfo vertex_input_info = {
    688 			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
    689 		};
    690 
    691 		VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {
    692 			.sType    = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
    693 			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
    694 		};
    695 
    696 		VkPipelineViewportStateCreateInfo viewport_info = {
    697 			.sType         = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
    698 			.viewportCount = 1,
    699 			.scissorCount  = 1,
    700 		};
    701 
    702 		VkPipelineRasterizationStateCreateInfo rasterization_info = {
    703 			.sType       = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
    704 			.polygonMode = VK_POLYGON_MODE_FILL,
    705 			.lineWidth   = 1.0f,
    706 			.cullMode    = VK_CULL_MODE_BACK_BIT,
    707 			.frontFace   = VK_FRONT_FACE_CLOCKWISE,
    708 		};
    709 
    710 		VkPipelineMultisampleStateCreateInfo multisampling_info = {
    711 			.sType                = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
    712 			.rasterizationSamples = vulkan_context->gpu_info.max_msaa_samples,
    713 		};
    714 
    715 		VkPipelineDepthStencilStateCreateInfo depth_test_create_info = {
    716 			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
    717 			.depthTestEnable       = 1,
    718 			.depthWriteEnable      = 1,
    719 			.depthCompareOp        = VK_COMPARE_OP_LESS,
    720 			.depthBoundsTestEnable = 1,
    721 			.stencilTestEnable     = 0,
    722 			.front                 = {0},
    723 			.back                  = {0},
    724 			.minDepthBounds        = 0.0f,
    725 			.maxDepthBounds        = 1.0f,
    726 		};
    727 
    728 		u32 colour_mask = VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT;
    729 		VkPipelineColorBlendAttachmentState blend_state = {
    730 			.colorWriteMask      = colour_mask,
    731 			.blendEnable         = 1,
    732 			.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
    733 			.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
    734 			.colorBlendOp        = VK_BLEND_OP_ADD,
    735 			.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
    736 			.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
    737 			.alphaBlendOp        = VK_BLEND_OP_ADD,
    738 		};
    739 
    740 		VkPipelineColorBlendStateCreateInfo colour_blend_state_create = {
    741 			.sType           = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
    742 			.logicOpEnable   = 0,
    743 			.logicOp         = VK_LOGIC_OP_COPY,
    744 			.attachmentCount = 1,
    745 			.pAttachments    = &blend_state,
    746 		};
    747 
    748 		VkDynamicState dynamic_states[] = {
    749 			VK_DYNAMIC_STATE_VIEWPORT,
    750 			VK_DYNAMIC_STATE_SCISSOR,
    751 		};
    752 
    753 		VkPipelineDynamicStateCreateInfo dynamic_state_info = {
    754 			.sType             = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
    755 			.dynamicStateCount = countof(dynamic_states),
    756 			.pDynamicStates    = dynamic_states,
    757 		};
    758 
    759 		//VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_SRGB;
    760 		VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_UNORM;
    761 		VkPipelineRenderingCreateInfo rendering_create_info = {
    762 			.sType                   = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
    763 			.colorAttachmentCount    = 1,
    764 			.pColorAttachmentFormats = &colour_attachment_format,
    765 			.depthAttachmentFormat   = vulkan_context->depth_stencil_format,
    766 			.stencilAttachmentFormat = vulkan_context->depth_stencil_format,
    767 		};
    768 
    769 		VkGraphicsPipelineCreateInfo pci = {
    770 			.sType               = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
    771 			.pNext               = &rendering_create_info,
    772 			.stageCount          = countof(shader_stage_create_infos),
    773 			.pStages             = shader_stage_create_infos,
    774 			.pVertexInputState   = &vertex_input_info,
    775 			.pInputAssemblyState = &input_assembly_info,
    776 			.pViewportState      = &viewport_info,
    777 			.pRasterizationState = &rasterization_info,
    778 			.pMultisampleState   = &multisampling_info,
    779 			.pDepthStencilState  = &depth_test_create_info,
    780 			.pColorBlendState    = &colour_blend_state_create,
    781 			.pDynamicState       = &dynamic_state_info,
    782 			.layout              = result.layout,
    783 		};
    784 
    785 		vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline);
    786 
    787 		s8 extras[] = {
    788 			[VulkanShaderKind_Vertex]   = s8_comp("Vertex Module"),
    789 			[VulkanShaderKind_Mesh]     = s8_comp("Mesh Module"),
    790 			[VulkanShaderKind_Fragment] = s8_comp("Fragment Module"),
    791 		};
    792 		assert(infos[0].kind < countof(extras));
    793 		assert(infos[1].kind < countof(extras));
    794 
    795 		vk_label_object(PIPELINE,        result.pipeline, str8_from_s8(infos[0].name), str8("Pipeline"));
    796 		vk_label_object(PIPELINE_LAYOUT, result.layout,   str8_from_s8(infos[0].name), str8("Pipeline Layout"));
    797 		//vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]);
    798 		//vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]);
    799 	}
    800 
    801 	if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0);
    802 	if (modules[1]) vkDestroyShaderModule(vulkan_context->device, modules[1], 0);
    803 
    804 	if (result.pipeline == 0) result = vulkan_context->default_graphics_pipeline;
    805 
    806 	return result;
    807 }
    808 
    809 function VulkanSemaphore
    810 vk_make_semaphore(OSHandle *export)
    811 {
    812 	VulkanContext *vk = vulkan_context;
    813 
    814 	VkSemaphoreCreateInfo       sci  = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
    815 	VkExportSemaphoreCreateInfo esci = {
    816 		.sType       = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
    817 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
    818 		                          : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
    819 	};
    820 	VkSemaphoreTypeCreateInfo stc = {
    821 		.sType         = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
    822 		.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
    823 	};
    824 
    825 	if (export) sci.pNext = &esci;
    826 	else        sci.pNext = &stc;
    827 
    828 	VulkanSemaphore result = {0};
    829 
    830 	vkCreateSemaphore(vk->device, &sci, 0, &result.semaphore);
    831 
    832 	if (export) {
    833 		if (OS_WINDOWS) {
    834 			VkSemaphoreGetWin32HandleInfoKHR ghi = {
    835 				.sType      = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
    836 				.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
    837 				.semaphore  = result.semaphore,
    838 			};
    839 			void *handle;
    840 			vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle);
    841 			export->value[0] = (u64)handle;
    842 		} else {
    843 			VkSemaphoreGetFdInfoKHR ghi = {
    844 				.sType      = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
    845 				.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
    846 				.semaphore  = result.semaphore,
    847 			};
    848 			i32 handle;
    849 			vkGetSemaphoreFdKHR(vk->device, &ghi, &handle);
    850 			export->value[0] = (u64)handle;
    851 		}
    852 	}
    853 
    854 	return result;
    855 }
    856 
    857 function void
    858 vk_release_memory(VkDeviceMemory memory, u64 size)
    859 {
    860 	VulkanContext *vk = vulkan_context;
    861 	vkFreeMemory(vk->device, memory, 0);
    862 	atomic_add_u64(&vk->gpu_info.gpu_heap_used, -size);
    863 }
    864 
    865 function b32
    866 vk_allocate_memory(VkDeviceMemory *memory, u64 size, VulkanMemoryKind kind, VkMemoryAllocateFlags flags,
    867                    VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, OSHandle *export)
    868 {
    869 	VulkanContext *vk = vulkan_context;
    870 
    871 	VkExportMemoryAllocateInfo export_info = {
    872 		.sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
    873 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
    874 		                          : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
    875 	};
    876 
    877 	VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
    878 		.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
    879 		.flags = flags,
    880 		.pNext = dedicated_allocate_info,
    881 	};
    882 
    883 	if (export) {
    884 		export_info.pNext = dedicated_allocate_info;
    885 		memory_allocate_flags_info.pNext = &export_info;
    886 	}
    887 
    888 	VkMemoryAllocateInfo memory_allocate_info = {
    889 		.sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
    890 		.allocationSize  = size,
    891 		.memoryTypeIndex = vk->memory_info.memory_type_indices[kind],
    892 		.pNext           = &memory_allocate_flags_info,
    893 	};
    894 
    895 	b32 result = vkAllocateMemory(vk->device, &memory_allocate_info, 0, memory) == VK_SUCCESS;
    896 	if (result) {
    897 		atomic_add_u64(&vk->gpu_info.gpu_heap_used, memory_allocate_info.allocationSize);
    898 
    899 		if (export) {
    900 			if (OS_WINDOWS) {
    901 				VkMemoryGetWin32HandleInfoKHR handle_info = {
    902 					.sType      = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
    903 					.memory     = *memory,
    904 					.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
    905 				};
    906 				void *handle;
    907 				vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle);
    908 				export->value[0] = (u64)handle;
    909 			} else {
    910 				VkMemoryGetFdInfoKHR fd_info = {
    911 					.sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
    912 					.memory     = *memory,
    913 					.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
    914 				};
    915 				i32 fd;
    916 				vkGetMemoryFdKHR(vk->device, &fd_info, &fd);
    917 				export->value[0] = (u64)fd;
    918 			}
    919 		}
    920 	}
    921 	return result;
    922 }
    923 
    924 function u32
    925 vk_index_size(VkIndexType type)
    926 {
    927 	u32 result = 0;
    928 	switch (type) {
    929 	case VK_INDEX_TYPE_UINT16:{ result = 2; }break;
    930 	case VK_INDEX_TYPE_UINT32:{ result = 4; }break;
    931 	InvalidDefaultCase;
    932 	}
    933 	return result;
    934 }
    935 
    936 typedef struct {
    937 	GPUBuffer        *gpu_buffer;
    938 	u64               size;
    939 	VulkanUsageFlags  flags;
    940 	u32               queue_family_count;
    941 	u32               queue_family_indices[VulkanTimeline_Count];
    942 	VkIndexType       index_type;
    943 	OSHandle         *export;
    944 	str8              label;
    945 } VulkanBufferAllocateInfo;
    946 
    947 function b32
    948 vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai)
    949 {
    950 	VulkanContext *vk = vulkan_context;
    951 
    952 	// TODO(rnp): this probably should be handled, its usually 4GB. likely
    953 	// need to chain multiple allocations and handle it in shader code
    954 	u64 clamp_size = vk->memory_info.max_allocation_size & ~(vk->memory_info.non_coherent_atom_size - 1);
    955 
    956 	// NOTE(rnp): renderdoc can't handle buffers that are too close to the allocation size limit
    957 	if (renderdoc_attached())
    958 		clamp_size -= MB(8);
    959 
    960 	u64 size = Min(ai->size, clamp_size);
    961 
    962 	VkBufferCreateInfo buffer_create_info = {
    963 		.sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
    964 		.usage       = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT|VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
    965 		.size        = size,
    966 		.sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE,
    967 		.queueFamilyIndexCount = ai->queue_family_count,
    968 		.pQueueFamilyIndices   = ai->queue_family_indices,
    969 	};
    970 
    971 	if (ai->flags & VulkanUsageFlag_TransferSource)
    972 		buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
    973 
    974 	if (ai->flags & VulkanUsageFlag_TransferDestination)
    975 		buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
    976 
    977 	if (ai->index_type != VK_INDEX_TYPE_NONE_KHR)
    978 		buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
    979 
    980 	VkExternalMemoryBufferCreateInfo external_memory_buffer_create_info = {
    981 		.sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
    982 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
    983 		                          : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
    984 	};
    985 
    986 	if (ai->export) buffer_create_info.pNext = &external_memory_buffer_create_info;
    987 
    988 	vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer);
    989 	vk_label_object(BUFFER, vb->buffer, ai->label, str8("Buffer"));
    990 
    991 	VkMemoryRequirements memory_requirements;
    992 	vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements);
    993 
    994 	assert((u64)size <= memory_requirements.size);
    995 	size = memory_requirements.size;
    996 
    997 	VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
    998 		.sType  = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
    999 		.buffer = vb->buffer,
   1000 	};
   1001 
   1002 	/* NOTE(rnp): to create a CPU writable buffer:
   1003 	 * 1. try to allocate and map the entire buffer
   1004 	 *    - this may fail if the buffer is bigger than the BAR size
   1005 	 *      (unknowable from vulkan), or the memory space has become
   1006 	 *      too fragmented (unlikely)
   1007 	 * 2. if allocation or mapping fails we must chain a host buffer
   1008 	 *    for staging. If this happens in practice we should add
   1009 	 *    the ability to import an existing external allocation
   1010 	 */
   1011 	b32 host_read_write = (ai->flags & VulkanUsageFlag_HostReadWrite) != 0;
   1012 	vb->memory_kind = host_read_write ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device;
   1013 
   1014 	b32 result = 0;
   1015 	// TODO(rnp): this may fail if the allocation is too big for the BAR size
   1016 	// it needs to handled properly
   1017 	if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, ai->export)) {
   1018 		result  = 1;
   1019 		ai->gpu_buffer->size = size;
   1020 		vb->memory_size = size;
   1021 
   1022 		vb->index_type = ai->index_type;
   1023 
   1024 		vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, str8("Memory"));
   1025 
   1026 		if (host_read_write)
   1027 			vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer);
   1028 
   1029 		vkBindBufferMemory(vk->device, vb->buffer, vb->memory, 0);
   1030 		VkBufferDeviceAddressInfo buffer_device_address_info = {
   1031 			.sType  = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
   1032 			.buffer = vb->buffer,
   1033 		};
   1034 		ai->gpu_buffer->gpu_pointer = vkGetBufferDeviceAddress(vk->device, &buffer_device_address_info);
   1035 	}
   1036 	return result;
   1037 }
   1038 
   1039 function void
   1040 vk_load_instance(Arena arena, Stream *err)
   1041 {
   1042 	#define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(0, #name);
   1043 	VkBaseProcedureList
   1044 	#undef X
   1045 
   1046 	u32 enabled_validation_layers_count = 0;
   1047 	const char *enabled_validation_layers[countof(vk_validation_layers)];
   1048 
   1049 	u32 enabled_instance_extensions_count = 0;
   1050 	const char *enabled_instance_extensions[countof(vk_required_instance_extensions) + countof(vk_instance_debug_extensions)];
   1051 
   1052 	static_assert(countof(vk_required_instance_extensions) == 0, "");
   1053 	//for EachElement(vk_required_instance_extensions, it)
   1054 	//	enabled_instance_extensions[enabled_instance_extensions_count++] = vk_required_instance_extensions[it];
   1055 
   1056 	#if BEAMFORMER_DEBUG
   1057 	{
   1058 		u32 layer_count = 0;
   1059 		vkEnumerateInstanceLayerProperties(&layer_count, 0);
   1060 
   1061 		VkLayerProperties *layers      = push_array(&arena, VkLayerProperties, layer_count);
   1062 		str8              *layer_str8s = push_array(&arena, str8,              layer_count);
   1063 		vkEnumerateInstanceLayerProperties(&layer_count, layers);
   1064 
   1065 		for (u32 i = 0; i < layer_count; i++)
   1066 			layer_str8s[i] = str8_from_c_str(layers[i].layerName);
   1067 
   1068 		for EachElement(vk_validation_layers, it) {
   1069 			for(u32 i = 0; i < layer_count; i++) {
   1070 				if (str8_equal(vk_validation_layers[it], layer_str8s[i])) {
   1071 					u32 index = enabled_validation_layers_count++;
   1072 					enabled_validation_layers[index]   = (char *)vk_validation_layers[it].data;
   1073 					vulkan_config.layers.enabled.E[it] = 1;
   1074 					vulkan_config.layers.version.E[it] = layers[i].specVersion;
   1075 					break;
   1076 				}
   1077 			}
   1078 		}
   1079 
   1080 		if (countof(vk_validation_layers) != enabled_validation_layers_count) {
   1081 			i32 missing_count = countof(vk_validation_layers) - enabled_validation_layers_count;
   1082 			stream_append_s8s(err, vulkan_info("missing validation layer"),
   1083 			                  missing_count > 1 ? s8("s:") : s8(":"), s8("\n"));
   1084 
   1085 			for EachElement(vk_validation_layers, it)
   1086 				if (vulkan_config.layers.enabled.E[it] == 0)
   1087 					stream_append_s8s(err, s8("    "), s8_from_str8(vk_validation_layers[it]), s8("\n"));
   1088 		}
   1089 
   1090 		u32 instance_extension_count = 0;
   1091 		vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, 0);
   1092 
   1093 		VkExtensionProperties *instance_extensions = push_array(&arena, VkExtensionProperties, instance_extension_count);
   1094 		s8                    *instance_ext_s8s    = push_array(&arena, s8,                    instance_extension_count);
   1095 		vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, instance_extensions);
   1096 		for EachIndex(instance_extension_count, it)
   1097 			instance_ext_s8s[it] = c_str_to_s8(instance_extensions[it].extensionName);
   1098 
   1099 		for EachElement(vk_instance_debug_extensions, it) {
   1100 			for EachIndex(instance_extension_count, i) {
   1101 				if (s8_equal(vk_instance_debug_extensions[it], instance_ext_s8s[i])) {
   1102 					u32 index = enabled_instance_extensions_count++;
   1103 					enabled_instance_extensions[index] = (char *)vk_instance_debug_extensions[it].data;
   1104 					vulkan_config.instance.E[it] = 1;
   1105 					break;
   1106 				}
   1107 			}
   1108 		}
   1109 	}
   1110 	#endif
   1111 
   1112 	VkApplicationInfo app_info = {
   1113 		.sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
   1114 		.pApplicationName   = BEAMFORMER_NAME_STRING,
   1115 		.applicationVersion = 0,
   1116 		.pEngineName        = "No Engine",
   1117 		.engineVersion      = 0,
   1118 		.apiVersion         = VK_MAKE_API_VERSION(1, 3, 0, 0),
   1119 	};
   1120 
   1121 	VkInstanceCreateInfo instance_create_info = {
   1122 		.sType                   = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
   1123 		.pApplicationInfo        = &app_info,
   1124 		.ppEnabledExtensionNames = enabled_instance_extensions,
   1125 		.enabledExtensionCount   = enabled_instance_extensions_count,
   1126 		.ppEnabledLayerNames     = enabled_validation_layers,
   1127 		.enabledLayerCount       = enabled_validation_layers_count,
   1128 	};
   1129 
   1130 	#if 0 && BEAMFORMER_DEBUG
   1131 	VkValidationFeatureEnableEXT validation_feature_enables[] = {
   1132 		VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
   1133 		VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
   1134 		VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
   1135 		VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
   1136 	};
   1137 
   1138 	VkValidationFeaturesEXT validation_features = {
   1139 		.sType                         = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
   1140 		.enabledValidationFeatureCount = countof(validation_feature_enables),
   1141 		.pEnabledValidationFeatures    = validation_feature_enables,
   1142 	};
   1143 
   1144 	instance_create_info.pNext = &validation_features;
   1145 	#endif
   1146 
   1147 	vkCreateInstance(&instance_create_info, 0, &vulkan_context->handle);
   1148 
   1149 	#define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(vulkan_context->handle, #name);
   1150 	VkInstanceProcedureList
   1151 	#undef X
   1152 }
   1153 
   1154 function void
   1155 vk_load_physical_device(Arena arena, Stream *err)
   1156 {
   1157 	VulkanContext *vk = vulkan_context;
   1158 
   1159 	u32 device_count;
   1160 	vkEnumeratePhysicalDevices(vk->handle, &device_count, 0);
   1161 
   1162 	VkPhysicalDevice *devices = push_array(&arena, typeof(*devices), device_count);
   1163 	vkEnumeratePhysicalDevices(vk->handle, &device_count, devices);
   1164 
   1165 	i32 best_index = -1, best_score = -1;
   1166 	for (u32 i = 0; i < device_count; i++) {
   1167 		Arena scratch = arena;
   1168 		VkPhysicalDeviceProperties2 *dp = push_struct(&scratch, typeof(*dp));
   1169 		dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
   1170 		vkGetPhysicalDeviceProperties2(devices[i], dp);
   1171 
   1172 		i32 score = 0;
   1173 		if (dp->properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
   1174 			score++;
   1175 
   1176 		if (score > best_score) {
   1177 			best_score = score;
   1178 			best_index = (i32)i;
   1179 		}
   1180 	}
   1181 
   1182 	vk->physical_device = best_index >= 0 ? devices[best_index] : 0;
   1183 	if (!vk->physical_device)
   1184 		fatal(vulkan_info("failed to find a suitable GPU\n"));
   1185 
   1186 	VkPhysicalDeviceProperties2        dp   = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
   1187 	VkPhysicalDeviceVulkan11Properties v11p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES};
   1188 	dp.pNext = &v11p;
   1189 
   1190 	vkGetPhysicalDeviceProperties2(vk->physical_device, &dp);
   1191 
   1192 	stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n"));
   1193 	stream_append_s8(err, vulkan_info("Vulkan Version: "));
   1194 	{
   1195 		u32 dv = dp.properties.apiVersion;
   1196 		stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv));
   1197 	}
   1198 
   1199 	{
   1200 		Arena scratch = arena;
   1201 		u32 extension_count = 0;
   1202 		vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, 0);
   1203 		VkExtensionProperties *extensions = push_array(&scratch, VkExtensionProperties, extension_count);
   1204 		vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, extensions);
   1205 
   1206 		s8 *ext_str8s = push_array(&scratch, s8, extension_count);
   1207 		for (u32 index = 0; index < extension_count; index++)
   1208 			ext_str8s[index] = c_str_to_s8(extensions[index].extensionName);
   1209 
   1210 		b8 *supported = push_array(&scratch, b8, countof(vk_required_device_extensions));
   1211 		for EachIndex(extension_count, index)
   1212 			for EachElement(vk_required_device_extensions, it)
   1213 				supported[it] |= s8_equal(vk_required_device_extensions[it], ext_str8s[index]);
   1214 
   1215 		u32 supported_count = 0;
   1216 		for EachElement(vk_required_device_extensions, it)
   1217 			supported_count += supported[it];
   1218 
   1219 		u32 missing_count = countof(vk_required_device_extensions) - supported_count;
   1220 		if (missing_count) {
   1221 			stream_append_s8s(err, vulkan_info("fatal error: missing required device extension"),
   1222 			                  missing_count > 1 ? s8("s") : s8(""), s8(":\n"));
   1223 			for EachElement(vk_required_device_extensions, it) {
   1224 				if (!supported[it]) {
   1225 					s8 name = vk_required_device_extensions[it];
   1226 					stream_append_s8s(err, vulkan_info("    "), name, s8("\n"));
   1227 				}
   1228 			}
   1229 			fatal(stream_to_s8(err));
   1230 		}
   1231 
   1232 		for EachIndex(extension_count, index)
   1233 			for EachElement(vk_optional_device_extensions, it)
   1234 				vulkan_config.optional.E[it] |= s8_equal(vk_optional_device_extensions[it], ext_str8s[index]);
   1235 
   1236 		#if BEAMFORMER_DEBUG
   1237 		for EachIndex(extension_count, index)
   1238 			for EachElement(vk_debug_extensions, it)
   1239 				vulkan_config.debug.E[it] |= s8_equal(vk_debug_extensions[it], ext_str8s[index]);
   1240 		#endif
   1241 	}
   1242 
   1243 	{
   1244 		VkPhysicalDeviceFeatures2        df   = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
   1245 		VkPhysicalDeviceVulkan11Features v11f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES};
   1246 		VkPhysicalDeviceVulkan12Features v12f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
   1247 		VkPhysicalDeviceVulkan13Features v13f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES};
   1248 		df.pNext   = &v11f;
   1249 		v11f.pNext = &v12f;
   1250 		v12f.pNext = &v13f;
   1251 		vkGetPhysicalDeviceFeatures2(vk->physical_device, &df);
   1252 
   1253 		{
   1254 			b32 all_supported = 1;
   1255 			#define X(name, ...) all_supported &= df.features.name;
   1256 			VK_REQUIRED_PHYSICAL_FEATURES
   1257 			#undef X
   1258 
   1259 			if (!all_supported) {
   1260 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1261 				#define X(name, ...) if (!df.features.name) stream_append_s8(err, s8("    " #name "\n"));
   1262 				VK_REQUIRED_PHYSICAL_FEATURES
   1263 				#undef X
   1264 				fatal(stream_to_s8(err));
   1265 			}
   1266 		}
   1267 
   1268 		{
   1269 			b32 all_supported = 1;
   1270 			#define X(name, ...) all_supported &= v11f.name;
   1271 			VK_REQUIRED_PHYSICAL_11_FEATURES
   1272 			#undef X
   1273 
   1274 			if (!all_supported) {
   1275 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1276 				#define X(name, ...) if (!v11f.name) stream_append_s8(err, s8("    " #name "\n"));
   1277 				VK_REQUIRED_PHYSICAL_11_FEATURES
   1278 				#undef X
   1279 				fatal(stream_to_s8(err));
   1280 			}
   1281 		}
   1282 
   1283 		{
   1284 			b32 all_supported = 1;
   1285 			#define X(name, ...) all_supported &= v12f.name;
   1286 			VK_REQUIRED_PHYSICAL_12_FEATURES
   1287 			#undef X
   1288 
   1289 			if (!all_supported) {
   1290 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1291 				#define X(name, ...) if (!v12f.name) stream_append_s8(err, s8("    " #name "\n"));
   1292 				VK_REQUIRED_PHYSICAL_12_FEATURES
   1293 				#undef X
   1294 				fatal(stream_to_s8(err));
   1295 			}
   1296 		}
   1297 
   1298 		{
   1299 			b32 all_supported = 1;
   1300 			#define X(name, ...) all_supported &= v13f.name;
   1301 			VK_REQUIRED_PHYSICAL_13_FEATURES
   1302 			#undef X
   1303 
   1304 			if (!all_supported) {
   1305 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1306 				#define X(name, ...) if (!v13f.name) stream_append_s8(err, s8("    " #name "\n"));
   1307 				VK_REQUIRED_PHYSICAL_13_FEATURES
   1308 				#undef X
   1309 				fatal(stream_to_s8(err));
   1310 			}
   1311 		}
   1312 
   1313 		if (vulkan_config.optional.cooperative_matrix) {
   1314 			Arena scratch = arena;
   1315 			u32 property_count = 0;
   1316 			vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, 0);
   1317 
   1318 			VkCooperativeMatrixPropertiesKHR *mat = push_array(&scratch, VkCooperativeMatrixPropertiesKHR, property_count);
   1319 
   1320 			// NOTE(rnp): validation layer stupidity
   1321 			for EachIndex(property_count, it)
   1322 				mat[it].sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
   1323 
   1324 			vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, mat);
   1325 			b32 supported = 0;
   1326 			// TODO(rnp): for now the requirements are hardcoded, it is possible to support a couple
   1327 			// variations if needed.
   1328 			for EachIndex(property_count, it) {
   1329 				b32 match = 1;
   1330 				supported &= mat[it].scope == VK_SCOPE_SUBGROUP_KHR;
   1331 
   1332 				supported &= mat[it].MSize == 16;
   1333 				supported &= mat[it].NSize == 16;
   1334 				supported &= mat[it].KSize == 16;
   1335 
   1336 				supported &= mat[it].AType == VK_COMPONENT_TYPE_FLOAT16_KHR;
   1337 				supported &= mat[it].BType == VK_COMPONENT_TYPE_FLOAT16_KHR;
   1338 				supported &= mat[it].CType == VK_COMPONENT_TYPE_FLOAT32_KHR;
   1339 				supported &= mat[it].ResultType == VK_COMPONENT_TYPE_FLOAT32_KHR;
   1340 
   1341 				supported |= match;
   1342 			}
   1343 			vk->gpu_info.cooperative_matrix = supported;
   1344 		}
   1345 	}
   1346 
   1347 	VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2};
   1348 	vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp);
   1349 
   1350 	VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties;
   1351 
   1352 	// NOTE(rnp): vulkan spec says that highest performance memory types must
   1353 	// come first. just take the first one found.
   1354 
   1355 	for (u32 i = 0; i < bmp->memoryHeapCount; i++) {
   1356 		if (bmp->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
   1357 			vk->memory_info.gpu_heap_index = i;
   1358 			break;
   1359 		}
   1360 	}
   1361 
   1362 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1363 		if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
   1364 			assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index);
   1365 			vk->memory_info.memory_type_indices[VulkanMemoryKind_Device] = i;
   1366 			break;
   1367 		}
   1368 	}
   1369 
   1370 	// TODO(rnp): it is possible that this isn't available. for devices like that we would need
   1371 	// to copy into a staging buffer then DMA. For now that is unsupported.
   1372 	u32 bar_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
   1373 	i32 bar_index = -1;
   1374 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1375 		if ((bmp->memoryTypes[i].propertyFlags & bar_flags) == bar_flags) {
   1376 			assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index);
   1377 			bar_index = (i32)i;
   1378 			break;
   1379 		}
   1380 	}
   1381 
   1382 	// TODO(rnp): this shouldn't be fatal
   1383 	if (bar_index == -1) {
   1384 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support host bar memory\n"));
   1385 		fatal(stream_to_s8(err));
   1386 	}
   1387 
   1388 	vk->memory_info.memory_type_indices[VulkanMemoryKind_BAR] = bar_index;
   1389 
   1390 	vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = -1;
   1391 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1392 		if ((bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == 0) {
   1393 			if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
   1394 				vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = (i8)i;
   1395 				break;
   1396 			}
   1397 		}
   1398 	}
   1399 
   1400 	if (vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] == -1) {
   1401 		stream_append_s8(err, vulkan_info("fatal error: vulkan driver does not provide host visible memory\n"));
   1402 		fatal(stream_to_s8(err));
   1403 	}
   1404 
   1405 	for EachElement(vk->memory_info.memory_type_indices, it) {
   1406 		u32 ti    = vk->memory_info.memory_type_indices[it];
   1407 		u32 flags = bmp->memoryTypes[ti].propertyFlags;
   1408 		vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
   1409 	}
   1410 
   1411 	vulkan_config.driver_api_version       = dp.properties.apiVersion;
   1412 	vk->memory_info.max_allocation_size    = v11p.maxMemoryAllocationSize;
   1413 	vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize;
   1414 	vk->gpu_info.vendor                    = dp.properties.vendorID;
   1415 	vk->gpu_info.gpu_heap_size             = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size;
   1416 	vk->gpu_info.timestamp_period_ns       = dp.properties.limits.timestampPeriod;
   1417 	vk->gpu_info.max_image_dimension_2D    = dp.properties.limits.maxImageDimension2D;
   1418 	vk->gpu_info.max_image_dimension_3D    = dp.properties.limits.maxImageDimension3D;
   1419 	vk->gpu_info.max_msaa_samples          = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts);
   1420 	vk->gpu_info.subgroup_size             = v11p.subgroupSize;
   1421 	vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize;
   1422 
   1423 	// IMPORTANT(rnp): memory must only be pushed at the end of the function
   1424 	vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName));
   1425 
   1426 	#if BEAMFORMER_DEBUG
   1427 	{
   1428 		b32 mismatch = 0;
   1429 		for EachElement(vk_validation_layers, it) {
   1430 			u32 lv = vulkan_config.layers.version.E[it];
   1431 			u32 dv = vulkan_config.driver_api_version;
   1432 			if (lv < dv) {
   1433 				mismatch = 1;
   1434 				stream_append_s8s(err, vulkan_info("warning: validaton layer \""),
   1435 				                  s8_from_str8(vk_validation_layers[it]), s8("\" version: "));
   1436 				stream_appendf(err, "%u.%u.%u", VK_API_VERSION_MAJOR(lv), VK_API_VERSION_MINOR(lv), VK_API_VERSION_PATCH(lv));
   1437 				stream_append_s8(err, s8(" lower than driver API version: "));
   1438 				stream_appendf(err, "%u.%u.%u\n", VK_API_VERSION_MAJOR(dv), VK_API_VERSION_MINOR(dv), VK_API_VERSION_PATCH(dv));
   1439 			}
   1440 		}
   1441 
   1442 		if (mismatch)
   1443 			stream_append_s8(err, vulkan_info("DO NOT report any bugs without updating your validation layers!\n"));
   1444 	}
   1445 	#endif
   1446 }
   1447 
   1448 function void
   1449 vk_load_queues(Arena *memory, Stream *err)
   1450 {
   1451 	///////////////////////////////////////////////////////
   1452 	// NOTE(rnp): try to allocate an appropriate queue for
   1453 	// each of the following tasks:
   1454 	//   * UI Rendering (Graphics)
   1455 	//   * Beamforming  (Compute)
   1456 	//   * Upload       (Transfer)
   1457 	// Then create a logical device ready for use
   1458 
   1459 	VulkanContext *vk = vulkan_context;
   1460 
   1461 	u32 queue_family_count;
   1462 	vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, 0);
   1463 
   1464 	TempArena arena_save = begin_temp_arena(memory);
   1465 	VkQueueFamilyProperties *queues = push_array(memory, typeof(*queues), queue_family_count);
   1466 	vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, queues);
   1467 
   1468 	i32 queue_indices[VulkanQueueKind_Count];
   1469 	for EachElement(queue_indices, it) queue_indices[it] = -1;
   1470 
   1471 	///////////////////////////////////////////////////////////////
   1472 	// NOTE(rnp): start by assigning queue families for each queue
   1473 
   1474 	/* NOTE(rnp): try for exclusive transfer queue */
   1475 	#if !ForceSingleQueue
   1476 	{
   1477 		u32 mask = VK_QUEUE_GRAPHICS_BIT|VK_QUEUE_COMPUTE_BIT|VK_QUEUE_TRANSFER_BIT;
   1478 		u32 max_timestamp_bits = 0;
   1479 		for (u32 index = 0; index < queue_family_count; index++) {
   1480 			if ((queues[index].queueFlags & mask) == VK_QUEUE_TRANSFER_BIT) {
   1481 				if (queues[index].timestampValidBits > max_timestamp_bits) {
   1482 					max_timestamp_bits = queues[index].timestampValidBits;
   1483 					queue_indices[VulkanQueueKind_Transfer] = (i32)index;
   1484 				}
   1485 			}
   1486 		}
   1487 	}
   1488 
   1489 	/* NOTE(rnp): try for compute separate from graphics */
   1490 	for (u32 index = 0; index < queue_family_count; index++) {
   1491 		if ((queues[index].queueFlags & VK_QUEUE_COMPUTE_BIT)  != 0 &&
   1492 		    (queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
   1493 		{
   1494 			queue_indices[VulkanQueueKind_Compute] = (i32)index;
   1495 			break;
   1496 		}
   1497 	}
   1498 	#endif /* !ForceSingleQueue */
   1499 
   1500 	/* NOTE(rnp): find graphics family and verify it is exclusive */
   1501 	b32 multi_graphics = 0;
   1502 	for (u32 index = 0; index < queue_family_count; index++) {
   1503 		if ((queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) {
   1504 			// TODO(rnp): check for presentation support
   1505 			multi_graphics = queue_indices[VulkanQueueKind_Graphics] != -1;
   1506 			queue_indices[VulkanQueueKind_Graphics] = (i32)index;
   1507 		}
   1508 	}
   1509 
   1510 	if (multi_graphics)
   1511 		stream_append_s8(err, vulkan_info("warning: multiple queue families reported graphics support\n"));
   1512 
   1513 	if (queue_indices[VulkanQueueKind_Graphics] == -1) {
   1514 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support graphics presentation\n"));
   1515 		fatal(stream_to_s8(err));
   1516 	}
   1517 
   1518 	if (queue_indices[VulkanQueueKind_Compute] == -1)
   1519 		if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
   1520 			queue_indices[VulkanQueueKind_Compute] = queue_indices[VulkanQueueKind_Graphics];
   1521 
   1522 	if (queue_indices[VulkanQueueKind_Compute] == -1) {
   1523 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support compute\n"));
   1524 		fatal(stream_to_s8(err));
   1525 	}
   1526 
   1527 	if (queue_indices[VulkanQueueKind_Transfer] == -1) {
   1528 		if ((queues[queue_indices[VulkanQueueKind_Compute]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
   1529 			queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Compute];
   1530 		else if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
   1531 			queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Graphics];
   1532 	}
   1533 
   1534 	if (queue_indices[VulkanQueueKind_Transfer] == -1) {
   1535 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support data transfer\n"));
   1536 		fatal(stream_to_s8(err));
   1537 	}
   1538 
   1539 	/////////////////////////////////////////////////////////////////
   1540 	// NOTE(rnp): if queues share families try to allocate subqueues
   1541 
   1542 	u32 assigned_subindices[VulkanQueueKind_Count] = {0};
   1543 	i32 queue_subindices[VulkanQueueKind_Count]    = {0};
   1544 
   1545 	assigned_subindices[VulkanQueueKind_Graphics] += 1;
   1546 
   1547 	if (queue_indices[VulkanQueueKind_Compute] == queue_indices[VulkanQueueKind_Graphics]) {
   1548 		if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount)
   1549 			queue_subindices[VulkanQueueKind_Compute] = assigned_subindices[VulkanQueueKind_Graphics]++;
   1550 	} else {
   1551 		assigned_subindices[VulkanQueueKind_Compute] += 1;
   1552 	}
   1553 
   1554 	if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Graphics]) {
   1555 		if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount)
   1556 			queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Graphics]++;
   1557 	} else if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Compute]) {
   1558 		if (assigned_subindices[VulkanQueueKind_Compute] < queues[queue_indices[VulkanQueueKind_Compute]].queueCount)
   1559 			queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Compute]++;
   1560 	} else {
   1561 		assigned_subindices[VulkanQueueKind_Transfer] += 1;
   1562 	}
   1563 
   1564 	for EachElement(assigned_subindices, it)
   1565 		vk->unique_queues += assigned_subindices[it];
   1566 
   1567 	end_temp_arena(arena_save);
   1568 
   1569 	/////////////////////////////////////////////
   1570 	// NOTE(rnp): fill in info and create device
   1571 	for EachElement(vk->queues, it) {
   1572 		u32 index = queue_subindices[it];
   1573 		for (i32 i = 0; i < queue_indices[it]; i++)
   1574 			index += assigned_subindices[i];
   1575 		vk->queue_indices[it] = index;
   1576 	}
   1577 
   1578 	for EachElement(vk->queues, it) {
   1579 		if (vk->queues[vk->queue_indices[it]] == 0) {
   1580 			vk->queues[vk->queue_indices[it]] = push_struct(memory, VulkanQueue);
   1581 			vk->queues[vk->queue_indices[it]]->queue_family = queue_indices[it];
   1582 			vk->queues[vk->queue_indices[it]]->queue_index  = queue_subindices[it];
   1583 		}
   1584 		vk->queues[it] = vk->queues[vk->queue_indices[it]];
   1585 	}
   1586 
   1587 	for EachElement(vk->command_pools, it)
   1588 		vk->command_pools[it] = push_struct(memory, VulkanCommandPool);
   1589 
   1590 	VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count];
   1591 
   1592 	f32 queue_priorities[VulkanQueueKind_Count][VulkanQueueKind_Count];
   1593 	for (u32 i = 0; i < VulkanQueueKind_Count; i++)
   1594 		for (u32 j = 0; j < VulkanQueueKind_Count; j++)
   1595 			queue_priorities[i][j] = 1.0f;
   1596 	queue_priorities[queue_indices[VulkanQueueKind_Compute]][queue_subindices[VulkanQueueKind_Compute]] = 0.5f;
   1597 
   1598 	u32 queue_create_index = 0;
   1599 	b32 queue_info_filled[VulkanQueueKind_Count] = {0};
   1600 	for (u32 q = 0; q < vk->unique_queues; q++) {
   1601 		u32 base_q = queue_indices[q];
   1602 		if (!queue_info_filled[base_q]) {
   1603 			queue_create_infos[queue_create_index++] = (VkDeviceQueueCreateInfo){
   1604 				.sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
   1605 				.queueFamilyIndex = base_q,
   1606 				.queueCount       = assigned_subindices[q],
   1607 				.pQueuePriorities = queue_priorities[q],
   1608 			};
   1609 		}
   1610 		queue_info_filled[base_q] = 1;
   1611 	}
   1612 
   1613 	u32 enabled_count = 0;
   1614 	const char *enabled_extensions[MAX_ENABLED_EXTENSIONS];
   1615 
   1616 	for EachElement(vk_required_device_extensions, it)
   1617 		enabled_extensions[enabled_count++] = (char *)vk_required_device_extensions[it].data;
   1618 
   1619 	for EachElement(vk_optional_device_extensions, it)
   1620 		if (vulkan_config.optional.E[it])
   1621 			enabled_extensions[enabled_count++] = (char *)vk_optional_device_extensions[it].data;
   1622 
   1623 	for EachElement(vk_debug_extensions, it)
   1624 		if (vulkan_config.debug.E[it])
   1625 			enabled_extensions[enabled_count++] = (char *)vk_debug_extensions[it].data;
   1626 
   1627 	VkDeviceCreateInfo device_create_info = {
   1628 		.sType                   = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
   1629 		.pQueueCreateInfos       = queue_create_infos,
   1630 		.queueCreateInfoCount    = queue_create_index,
   1631 		.ppEnabledExtensionNames = enabled_extensions,
   1632 		.enabledExtensionCount   = enabled_count,
   1633 	};
   1634 
   1635 	VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR pdsre = {
   1636 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR,
   1637 		.shaderRelaxedExtendedInstruction = 1,
   1638 	};
   1639 	if (vulkan_config.debug.shader_relaxed_extended_instruction) {
   1640 		pdsre.pNext = (void *)device_create_info.pNext;
   1641 		device_create_info.pNext = &pdsre;
   1642 	}
   1643 
   1644 	VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_mat_features = {
   1645 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR,
   1646 		.cooperativeMatrix = 1,
   1647 		.cooperativeMatrixRobustBufferAccess = 0,
   1648 	};
   1649 	if (vk->gpu_info.cooperative_matrix) {
   1650 		coop_mat_features.pNext = (void *)device_create_info.pNext;
   1651 		device_create_info.pNext = &coop_mat_features;
   1652 	}
   1653 
   1654 	VkPhysicalDeviceRobustness2FeaturesKHR robust2 = {
   1655 		.sType          = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_KHR,
   1656 		.pNext          = (void *)device_create_info.pNext,
   1657 		.nullDescriptor = 1,
   1658 	};
   1659 	device_create_info.pNext = &robust2;
   1660 
   1661 	VkPhysicalDeviceVulkan13Features v13f = {
   1662 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
   1663 		.pNext = (void *)device_create_info.pNext,
   1664 		#define X(name, ...) .name = 1,
   1665 		VK_REQUIRED_PHYSICAL_13_FEATURES
   1666 		#undef X
   1667 	};
   1668 	device_create_info.pNext = &v13f;
   1669 
   1670 	VkPhysicalDeviceVulkan12Features v12f = {
   1671 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
   1672 		.pNext = (void *)device_create_info.pNext,
   1673 		#define X(name, ...) .name = 1,
   1674 		VK_REQUIRED_PHYSICAL_12_FEATURES
   1675 		#undef X
   1676 	};
   1677 	device_create_info.pNext = &v12f;
   1678 
   1679 	VkPhysicalDeviceVulkan11Features v11f = {
   1680 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
   1681 		.pNext = (void *)device_create_info.pNext,
   1682 		#define X(name, ...) .name = 1,
   1683 		VK_REQUIRED_PHYSICAL_11_FEATURES
   1684 		#undef X
   1685 	};
   1686 	device_create_info.pNext = &v11f;
   1687 
   1688 	VkPhysicalDeviceFeatures2 device_features = {
   1689 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
   1690 		.pNext = (void *)device_create_info.pNext,
   1691 		.features = {
   1692 			#define X(name, ...) .name = 1,
   1693 			VK_REQUIRED_PHYSICAL_FEATURES
   1694 			#undef X
   1695 		},
   1696 	};
   1697 	device_create_info.pNext = &device_features;
   1698 
   1699 	vkCreateDevice(vk->physical_device, &device_create_info, 0, &vk->device);
   1700 
   1701 	#define X(name, ...) name = (name##_fn *)vkGetDeviceProcAddr(vk->device, #name);
   1702 	VkDeviceProcedureList
   1703 	#undef X
   1704 
   1705 	for (u32 q = 0; q < vk->unique_queues; q++) {
   1706 		VulkanQueue *qp = vk->queues[q];
   1707 		vkGetDeviceQueue(vk->device, qp->queue_family, qp->queue_index, &qp->queue);
   1708 
   1709 		qp->timeline_semaphore = vk_make_semaphore(0);
   1710 	}
   1711 
   1712 	vk->queues[VulkanQueueKind_Graphics]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
   1713 	vk->queues[VulkanQueueKind_Compute]->pipeline_stage_flags  |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
   1714 
   1715 	for EachElement(vk->command_pools, it) {
   1716 		VulkanCommandPool *vcp = vk->command_pools[it];
   1717 
   1718 		VkCommandPoolCreateInfo command_pool_create_info = {
   1719 			.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
   1720 			.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
   1721 			.queueFamilyIndex = vk->queues[it]->queue_family,
   1722 		};
   1723 
   1724 		vkCreateCommandPool(vk->device, &command_pool_create_info, 0, &vcp->handle);
   1725 
   1726 		VkCommandBufferAllocateInfo command_buffer_allocate_info = {
   1727 			.sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
   1728 			.commandPool        = vcp->handle,
   1729 			.level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
   1730 			.commandBufferCount = countof(vcp->buffers),
   1731 		};
   1732 		vkAllocateCommandBuffers(vk->device, &command_buffer_allocate_info, vcp->buffers);
   1733 
   1734 		VkQueryPoolCreateInfo query_pool_create_info = {
   1735 			.sType      = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
   1736 			.queryType  = VK_QUERY_TYPE_TIMESTAMP,
   1737 			.queryCount = MaxCommandBuffersInFlight * MaxCommandBufferTimestamps,
   1738 		};
   1739 		vkCreateQueryPool(vk->device, &query_pool_create_info, 0, &vcp->query_pool);
   1740 	}
   1741 }
   1742 
   1743 function void
   1744 vk_load_graphics(void)
   1745 {
   1746 	VulkanContext *vk = vulkan_context;
   1747 
   1748 	// NOTE: swap chain image format
   1749 	{
   1750 	}
   1751 
   1752 	// NOTE: depth/stencil format
   1753 	{
   1754 		VkFormat depth_formats[] = {
   1755 			VK_FORMAT_D32_SFLOAT_S8_UINT,
   1756 			VK_FORMAT_D24_UNORM_S8_UINT,
   1757 			VK_FORMAT_D16_UNORM_S8_UINT,
   1758 		};
   1759 
   1760 		vk->depth_stencil_format = VK_FORMAT_UNDEFINED;
   1761 		for EachElement(depth_formats, it) {
   1762 			VkFormatProperties3 format_properties3 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3};
   1763 			VkFormatProperties2 format_properties2 = {
   1764 				.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
   1765 				.pNext = &format_properties3,
   1766 			};
   1767 			vkGetPhysicalDeviceFormatProperties2(vk->physical_device, depth_formats[it], &format_properties2);
   1768 			if (format_properties3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT) {
   1769 				vk->depth_stencil_format = depth_formats[it];
   1770 				break;
   1771 			}
   1772 		}
   1773 	}
   1774 }
   1775 
   1776 function void
   1777 vk_load_descriptor_block(void)
   1778 {
   1779 	// NOTE(rnp):
   1780 	// * One Descriptor Pool
   1781 	// * One Descriptor Set Per Resource Kind
   1782 	// * Shaders know the ResourceKind enumeration
   1783 	// * Shaders know the per set binding points
   1784 
   1785 	VulkanContext *vk = vulkan_context;
   1786 
   1787 	// NOTE(rnp): Pool
   1788 	VkDescriptorPoolSize pool_sizes[] = {
   1789 		{
   1790 			.type            = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
   1791 			.descriptorCount = BeamformerShaderBufferSlot_Count,
   1792 		},
   1793 	};
   1794 	static_assert(countof(pool_sizes) == BeamformerShaderResourceKind_Count, "");
   1795 
   1796 	VkDescriptorPoolCreateInfo pool_create_info = {
   1797 		.sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
   1798 		.maxSets       = BeamformerShaderResourceKind_Count,
   1799 		.poolSizeCount = countof(pool_sizes),
   1800 		.pPoolSizes    = pool_sizes,
   1801 	};
   1802 
   1803 	vkCreateDescriptorPool(vk->device, &pool_create_info, 0, &vk->descriptor_pool);
   1804 
   1805 	// NOTE(rnp): Set Layouts
   1806 	VkDescriptorSetLayoutCreateInfo layout_create_info = {
   1807 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
   1808 	};
   1809 
   1810 	{
   1811 		VkDescriptorSetLayoutBinding layout_bindings[BeamformerShaderBufferSlot_Count];
   1812 		for EachEnumValue(BeamformerShaderBufferSlot, it) {
   1813 			layout_bindings[it] = (VkDescriptorSetLayoutBinding){
   1814 				.binding         = it,
   1815 				.descriptorType  = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
   1816 				.descriptorCount = 1,
   1817 				.stageFlags      = VK_SHADER_STAGE_ALL,
   1818 			};
   1819 		}
   1820 		layout_create_info.bindingCount = countof(layout_bindings),
   1821 		layout_create_info.pBindings    = layout_bindings,
   1822 		vkCreateDescriptorSetLayout(vk->device, &layout_create_info, 0,
   1823 		                            vk->descriptor_set_layouts + BeamformerShaderResourceKind_Buffer);
   1824 	}
   1825 
   1826 	// NOTE(rnp): Sets
   1827 	VkDescriptorSetAllocateInfo set_allocate_info = {
   1828 		.sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
   1829 		.descriptorPool     = vk->descriptor_pool,
   1830 		.descriptorSetCount = countof(vk->descriptor_sets),
   1831 		.pSetLayouts        = vk->descriptor_set_layouts,
   1832 	};
   1833 	static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), "");
   1834 	vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets);
   1835 
   1836 	vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, str8("Beamformer Resources"), str8("Pool"));
   1837 
   1838 	DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) {
   1839 		Arena scratch = vk->arena;
   1840 		for EachElement(vk->descriptor_sets, it) {
   1841 			Stream sb = arena_stream(scratch);
   1842 			stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s"));
   1843 			vk_label_object(DESCRIPTOR_SET,        vk->descriptor_sets[it],        stream_to_str8(&sb), str8("Set"));
   1844 			vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_str8(&sb), str8("Set Layout"));
   1845 		}
   1846 	}
   1847 
   1848 	// NOTE(rnp): junk API requirement that doesn't allow 0 initialization
   1849 	for EachElement(vk->descriptor_buffer_infos, it)
   1850 		vk->descriptor_buffer_infos[it].range = VK_WHOLE_SIZE;
   1851 }
   1852 
   1853 ///////////////////////
   1854 // NOTE(rnp): User API
   1855 
   1856 DEBUG_IMPORT void
   1857 vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err)
   1858 {
   1859 	#define X(name, ...) name = (name##_fn *)os_lookup_symbol(vulkan_library_handle, #name);
   1860 	VkLoaderProcedureList
   1861 	#undef X
   1862 
   1863 	if (!vkGetInstanceProcAddr) {
   1864 		stream_append_s8(err, vulkan_info("fatal error: failed to find \"vkGetInstanceProcAddr\"\n"));
   1865 		fatal(stream_to_s8(err));
   1866 	}
   1867 
   1868 	VulkanContext *vk = vulkan_context;
   1869 	vk->entity_arena = sub_arena_end(memory, KB(64), KB(4));
   1870 	vk->arena        = sub_arena_end(memory, KB(96), KB(4));
   1871 
   1872 	vk_load_instance(vk->arena, err);
   1873 	vk_load_physical_device(vk->arena, err);
   1874 	vk_load_queues(&vk->arena, err);
   1875 	vk_load_graphics();
   1876 	vk_load_descriptor_block();
   1877 
   1878 	read_only local_persist s8 default_compute_shader = s8(""
   1879 		"#version 430 core\n"
   1880 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1881 		"void main() {}\n"
   1882 		"\n");
   1883 	vk->default_compute_pipeline = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader,
   1884 	                                                                    s8("error_compute_shader"), 256);
   1885 
   1886 	read_only local_persist s8 default_vertex_shader = s8(""
   1887 		"#version 430 core\n"
   1888 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1889 		"void main() {gl_Position = vec4(0);}\n"
   1890 		"\n");
   1891 	read_only local_persist s8 default_fragment_shader = s8(""
   1892 		"#version 430 core\n"
   1893 		"layout(location = 0) out vec4 out_colour;"
   1894 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1895 		"void main() {out_colour = vec4(0.5f, 0.0f, 0.5f, 1.0f);}\n"
   1896 		"\n");
   1897 
   1898 	VulkanPipelineCreateInfo pipeline_create_infos[2] = {
   1899 		{
   1900 			.kind = VulkanShaderKind_Vertex,
   1901 			.text = default_vertex_shader,
   1902 			.name = s8("error_vertex_shader"),
   1903 		},
   1904 		{
   1905 			.kind = VulkanShaderKind_Fragment,
   1906 			.text = default_fragment_shader,
   1907 			.name = s8("error_fragment_shader"),
   1908 		},
   1909 	};
   1910 	vk->default_graphics_pipeline = vk_graphics_pipeline_from_infos(vk->arena, pipeline_create_infos, 2, 256);
   1911 
   1912 	// TODO: setup ui render pipeline
   1913 
   1914 	if (err->widx > 0) {
   1915 		os_console_log(err->data, err->widx);
   1916 		stream_reset(err, 0);
   1917 	}
   1918 }
   1919 
   1920 DEBUG_IMPORT GPUInfo *
   1921 vk_gpu_info(void)
   1922 {
   1923 	return &vulkan_context->gpu_info;
   1924 }
   1925 
   1926 function void
   1927 vk_vulkan_buffer_release(VulkanBuffer *vb)
   1928 {
   1929 	VulkanContext *vk = vulkan_context;
   1930 	VulkanEntity  *e  = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as));
   1931 	// TODO(rnp): this happens implicitly, probably just delete this if block
   1932 	if (vb->host_pointer)
   1933 		vkUnmapMemory(vk->device, vb->memory);
   1934 
   1935 	if (vb->buffer)
   1936 		vkDestroyBuffer(vk->device, vb->buffer, 0);
   1937 
   1938 	vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? vb->memory_size : 0);
   1939 	vk_entity_release(e);
   1940 }
   1941 
   1942 DEBUG_IMPORT void
   1943 vk_buffer_release(GPUBuffer *b)
   1944 {
   1945 	if ValidVulkanHandle(b->handle)
   1946 		vk_vulkan_buffer_release(vk_entity_data(b->handle, VulkanEntityKind_Buffer));
   1947 	zero_struct(b);
   1948 }
   1949 
   1950 DEBUG_IMPORT void
   1951 vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info)
   1952 {
   1953 	VulkanContext *vk = vulkan_context;
   1954 
   1955 	vk_buffer_release(b);
   1956 
   1957 	assert(info->size > 0);
   1958 
   1959 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer);
   1960 	VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
   1961 		.gpu_buffer = b,
   1962 		.size       = (u64)info->size,
   1963 		.flags      = info->flags,
   1964 		.index_type = VK_INDEX_TYPE_NONE_KHR,
   1965 		.label      = info->label,
   1966 		.export     = info->export,
   1967 	};
   1968 
   1969 	u32 queue_index_hit_count[VulkanQueueKind_Count] = {0};
   1970 	for (u32 it = 0; it < info->timeline_count; it++)
   1971 		queue_index_hit_count[vk->queue_indices[info->timelines_used[it]]]++;
   1972 
   1973 	for EachElement(queue_index_hit_count, it) {
   1974 		if (queue_index_hit_count[it] > 0) {
   1975 			u32 index = vulkan_buffer_allocate_info.queue_family_count++;
   1976 			vulkan_buffer_allocate_info.queue_family_indices[index] = vk->queues[vk->queue_indices[it]]->queue_family;
   1977 		}
   1978 	}
   1979 
   1980 	if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
   1981 		b->handle.value[0] = (u64)e;
   1982 	} else {
   1983 		vk_entity_release(e);
   1984 	}
   1985 }
   1986 
   1987 DEBUG_IMPORT b32
   1988 vk_buffer_needs_sync(GPUBuffer *b)
   1989 {
   1990 	b32 result = 0;
   1991 	if ValidVulkanHandle(b->handle) {
   1992 		VulkanBuffer *vb = vk_entity_data(b->handle, VulkanEntityKind_Buffer);
   1993 
   1994 		// TODO(rnp): not correct check. need to check if we used transfer queue
   1995 		result = vb->memory_kind != VulkanMemoryKind_BAR;
   1996 	}
   1997 
   1998 	return result;
   1999 }
   2000 
   2001 DEBUG_IMPORT u64
   2002 vk_round_up_to_sync_size(u64 size, u64 min)
   2003 {
   2004 	iz  round  = (iz)Max(min, vulkan_context->memory_info.non_coherent_atom_size);
   2005 	u64 result = (u64)round_up_to((iz)size, round);
   2006 	return result;
   2007 }
   2008 
   2009 function force_inline void
   2010 vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 destination_offset, u64 source_offset, u64 size, b32 non_temporal)
   2011 {
   2012 	VulkanContext *vk = vulkan_context;
   2013 
   2014 	switch (source->memory_kind) {
   2015 	case VulkanMemoryKind_BAR:
   2016 	{
   2017 		switch (destination->memory_kind) {
   2018 		case VulkanMemoryKind_Host:{
   2019 			if (destination->memory) {
   2020 				// TODO(rnp): there is likely a more efficient way of doing this in this case
   2021 				InvalidCodePath;
   2022 			} else {
   2023 				assert(source->host_pointer);
   2024 				b32 coherent = vk->memory_info.memory_host_coherent[source->memory_kind];
   2025 				if (!coherent) {
   2026 					u64 nca_size = vk->memory_info.non_coherent_atom_size;
   2027 					VkMappedMemoryRange mrs[1] = {{
   2028 						.sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
   2029 						.memory = source->memory,
   2030 						.offset = source_offset - (source_offset % nca_size),
   2031 						.size   = vk_round_up_to_sync_size(size, nca_size),
   2032 					}};
   2033 					vkInvalidateMappedMemoryRanges(vk->device, countof(mrs), mrs);
   2034 				}
   2035 
   2036 				void *dest = (u8 *)destination->host_pointer + destination_offset;
   2037 				void *src  = (u8 *)source->host_pointer + source_offset;
   2038 
   2039 				// NOTE(rnp): don't trash the CPU cache for large data stores
   2040 				if (non_temporal) memory_copy_non_temporal(dest, src, size);
   2041 				else              mem_copy(dest, src, size);
   2042 			}
   2043 		}break;
   2044 		InvalidDefaultCase;
   2045 		}
   2046 	}break;
   2047 
   2048 	case VulkanMemoryKind_Host:{
   2049 		switch (destination->memory_kind) {
   2050 		case VulkanMemoryKind_BAR:{
   2051 			assert(destination->host_pointer);
   2052 
   2053 			void *dest = (u8 *)destination->host_pointer + destination_offset;
   2054 			void *src  = (u8 *)source->host_pointer + source_offset;
   2055 
   2056 			// NOTE(rnp): don't trash the CPU cache for large data stores
   2057 			if (non_temporal) memory_copy_non_temporal(dest, src, size);
   2058 			else              mem_copy(dest, src, size);
   2059 
   2060 			b32 coherent = vk->memory_info.memory_host_coherent[destination->memory_kind];
   2061 			if (!coherent) {
   2062 				u64 nca_size = vk->memory_info.non_coherent_atom_size;
   2063 				VkMappedMemoryRange mrs[1] = {{
   2064 					.sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
   2065 					.memory = destination->memory,
   2066 					.offset = destination_offset - (destination_offset % nca_size),
   2067 					.size   = vk_round_up_to_sync_size(size, nca_size),
   2068 				}};
   2069 				vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs);
   2070 			}
   2071 		}break;
   2072 		InvalidDefaultCase;
   2073 
   2074 		}
   2075 	}break;
   2076 
   2077 	// TODO(rnp): use transfer queue when not mapped
   2078 	InvalidDefaultCase;
   2079 	}
   2080 }
   2081 
   2082 DEBUG_IMPORT void
   2083 vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal)
   2084 {
   2085 	VulkanBuffer *db = vk_entity_data(b->handle, VulkanEntityKind_Buffer);
   2086 	VulkanBuffer  sb = {
   2087 		.host_pointer = data,
   2088 		.memory_kind  = VulkanMemoryKind_Host,
   2089 	};
   2090 	vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
   2091 }
   2092 
   2093 DEBUG_IMPORT void
   2094 vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal)
   2095 {
   2096 	VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer);
   2097 	VulkanBuffer  db = {
   2098 		.host_pointer = destination,
   2099 		.memory_kind  = VulkanMemoryKind_Host,
   2100 	};
   2101 	vk_buffer_buffer_copy(&db, sb, 0, offset, size, non_temporal);
   2102 }
   2103 
   2104 DEBUG_IMPORT void
   2105 vk_render_model_release(GPUBuffer *model)
   2106 {
   2107 	if ValidVulkanHandle(model->handle)
   2108 		vk_vulkan_buffer_release(vk_entity_data(model->handle, VulkanEntityKind_RenderModel));
   2109 	zero_struct(model);
   2110 }
   2111 
   2112 DEBUG_IMPORT void
   2113 vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 model_size, s8 label)
   2114 {
   2115 	vk_render_model_release(model);
   2116 
   2117 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_RenderModel);
   2118 
   2119 	assert(index_count <= U32_MAX);
   2120 	VkIndexType index_type;
   2121 	if (index_count <= U16_MAX) index_type = VK_INDEX_TYPE_UINT16;
   2122 	else                        index_type = VK_INDEX_TYPE_UINT32;
   2123 
   2124 	i64 indices_size = round_up_to(vk_index_size(index_type) * index_count, 64);
   2125 
   2126 	i64 size = round_up_to(model_size + indices_size, 64);
   2127 	assert(size > 0);
   2128 
   2129 	VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
   2130 		.gpu_buffer              = model,
   2131 		.size                    = (u64)size,
   2132 		.flags                   = VulkanUsageFlag_HostReadWrite,
   2133 		.index_type              = index_type,
   2134 		.label                   = str8_from_s8(label),
   2135 		.queue_family_count      = 1,
   2136 		.queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family,
   2137 	};
   2138 	if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
   2139 		model->handle.value[0] = (u64)e;
   2140 		model->index_count  = index_count;
   2141 		model->gpu_pointer += indices_size;
   2142 
   2143 		VulkanBuffer  sb = {
   2144 			.host_pointer = indices,
   2145 			.memory_kind  = VulkanMemoryKind_Host,
   2146 		};
   2147 
   2148 		vk_buffer_buffer_copy(&e->as.buffer, &sb, 0, 0, vk_index_size(index_type) * index_count, 0);
   2149 	} else {
   2150 		vk_entity_release(e);
   2151 	}
   2152 }
   2153 
   2154 DEBUG_IMPORT void
   2155 vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal)
   2156 {
   2157 	VulkanBuffer *db = vk_entity_data(model->handle, VulkanEntityKind_RenderModel);
   2158 	VulkanBuffer  sb = {
   2159 		.host_pointer = data,
   2160 		.memory_kind  = VulkanMemoryKind_Host,
   2161 	};
   2162 
   2163 	offset += round_up_to(vk_index_size(db->index_type) * model->index_count, 64);
   2164 
   2165 	vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
   2166 }
   2167 
   2168 DEBUG_IMPORT void
   2169 vk_image_release(GPUImage *image)
   2170 {
   2171 	if ValidVulkanHandle(image->image) {
   2172 		VulkanContext *vk = vulkan_context;
   2173 		VulkanImage   *vi = vk_entity_data(image->image, VulkanEntityKind_Image);
   2174 
   2175 		vkDestroyImageView(vk->device, vi->view, 0);
   2176 		vkDestroyImage(vk->device, vi->image, 0);
   2177 		vk_release_memory(vi->memory, image->memory_size);
   2178 
   2179 		vk_entity_release((VulkanEntity *)image->image.value[0]);
   2180 	}
   2181 	zero_struct(image);
   2182 }
   2183 
   2184 DEBUG_IMPORT void
   2185 vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples,
   2186                   VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label)
   2187 {
   2188 	assert(IsPowerOfTwo(samples));
   2189 
   2190 	vk_image_release(image);
   2191 
   2192 	VulkanContext *vk = vulkan_context;
   2193 	VulkanEntity  *e  = vk_entity_allocate(VulkanEntityKind_Image);
   2194 	VulkanImage   *vi = &e->as.image;
   2195 
   2196 	image->image.value[0] = (u64)e;
   2197 	image->width          = Min(width,   vk->gpu_info.max_image_dimension_2D);
   2198 	image->height         = Min(height,  vk->gpu_info.max_image_dimension_2D);
   2199 	image->mip_map_levels = Max(mips,    1);
   2200 	image->samples        = Min(samples, vk->gpu_info.max_msaa_samples);
   2201 
   2202 	VkFormat usage_format_map[VulkanImageUsage_Count + 1] = {
   2203 		[VulkanImageUsage_None]         = VK_FORMAT_UNDEFINED,
   2204 		//[VulkanImageUsage_Colour]       = VK_FORMAT_R8G8B8A8_SRGB,
   2205 		[VulkanImageUsage_Colour]       = VK_FORMAT_R8G8B8A8_UNORM,
   2206 		[VulkanImageUsage_DepthStencil] = vk->depth_stencil_format,
   2207 		[VulkanImageUsage_Count]        = VK_FORMAT_UNDEFINED,
   2208 	};
   2209 
   2210 	read_only local_persist VkImageUsageFlagBits usage_extra_bit_map[VulkanImageUsage_Count + 1] = {
   2211 		[VulkanImageUsage_None]         = 0,
   2212 		[VulkanImageUsage_Colour]       = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
   2213 		[VulkanImageUsage_DepthStencil] = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
   2214 		[VulkanImageUsage_Count]        = 0,
   2215 	};
   2216 
   2217 	read_only local_persist VkImageAspectFlags usage_image_aspect_map[VulkanImageUsage_Count + 1] = {
   2218 		[VulkanImageUsage_None]         = 0,
   2219 		[VulkanImageUsage_Colour]       = VK_IMAGE_ASPECT_COLOR_BIT,
   2220 		[VulkanImageUsage_DepthStencil] = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
   2221 		[VulkanImageUsage_Count]        = 0,
   2222 	};
   2223 
   2224 	usage = Clamp((u32)usage, 0, VulkanImageUsage_Count);
   2225 	VkImageUsageFlagBits usage_flags = usage_extra_bit_map[usage];
   2226 
   2227 	if (flags & VulkanUsageFlag_ImageSampling)       usage_flags |= VK_IMAGE_USAGE_SAMPLED_BIT;
   2228 	if (flags & VulkanUsageFlag_TransferSource)      usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
   2229 	if (flags & VulkanUsageFlag_TransferDestination) usage_flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
   2230 
   2231 	u32 queue_family = vk->queues[VulkanQueueKind_Graphics]->queue_family;
   2232 	VkImageCreateInfo image_create_info = {
   2233 		.sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
   2234 		.flags                 = export ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0,
   2235 		.imageType             = VK_IMAGE_TYPE_2D,
   2236 		.format                = usage_format_map[usage],
   2237 		.extent                = {image->width, image->height, 1},
   2238 		.mipLevels             = image->mip_map_levels,
   2239 		.arrayLayers           = 1,
   2240 		.samples               = image->samples,
   2241 		.tiling                = VK_IMAGE_TILING_OPTIMAL,
   2242 		.usage                 = usage_flags,
   2243 		// NOTE(rnp): needed if multiple queue families are accessed
   2244 		.sharingMode           = VK_SHARING_MODE_EXCLUSIVE,
   2245 		.queueFamilyIndexCount = 1,
   2246 		.pQueueFamilyIndices   = &queue_family,
   2247 		.initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
   2248 	};
   2249 
   2250 	VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
   2251 		.sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
   2252 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
   2253 		                          : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
   2254 	};
   2255 
   2256 	if (export) image_create_info.pNext = &external_memory_image_create_info;
   2257 
   2258 	vkCreateImage(vk->device, &image_create_info, 0, &vi->image);
   2259 
   2260 	VkMemoryRequirements memory_requirements;
   2261 	vkGetImageMemoryRequirements(vk->device, vi->image, &memory_requirements);
   2262 
   2263 	VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
   2264 		.sType  = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
   2265 		.image  = vi->image,
   2266 	};
   2267 
   2268 	if (vk_allocate_memory(&vi->memory, memory_requirements.size, VulkanMemoryKind_Device, 0, &dedicated_allocate_info, export)) {
   2269 		image->memory_size = memory_requirements.size;
   2270 		vkBindImageMemory(vk->device, vi->image, vi->memory, 0);
   2271 
   2272 		VkImageViewCreateInfo image_view_info = {
   2273 			.sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
   2274 			.image      = vi->image,
   2275 			.viewType   = VK_IMAGE_VIEW_TYPE_2D,
   2276 			.format     = usage_format_map[usage],
   2277 			.subresourceRange = {
   2278 				.aspectMask     = usage_image_aspect_map[usage],
   2279 				.baseMipLevel   = 0,
   2280 				.levelCount     = 1,
   2281 				.baseArrayLayer = 0,
   2282 				.layerCount     = 1,
   2283 			},
   2284 		};
   2285 		vkCreateImageView(vk->device, &image_view_info, 0, &vi->view);
   2286 
   2287 		vk_label_object(IMAGE,         vi->image,  str8_from_s8(label), str8("Image"));
   2288 		vk_label_object(IMAGE_VIEW,    vi->view,   str8_from_s8(label), str8("Image View"));
   2289 		vk_label_object(DEVICE_MEMORY, vi->memory, str8_from_s8(label), str8("Memory"));
   2290 	} else {
   2291 		vkDestroyImage(vk->device, vi->image, 0);
   2292 		vk_entity_release(e);
   2293 		zero_struct(image);
   2294 	}
   2295 }
   2296 
   2297 DEBUG_IMPORT VulkanHandle
   2298 vk_create_semaphore(OSHandle *export)
   2299 {
   2300 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Semaphore);
   2301 	e->as.semaphore = vk_make_semaphore(export);
   2302 	VulkanHandle result = {(u64)e};
   2303 	return result;
   2304 }
   2305 
   2306 DEBUG_IMPORT b32
   2307 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns)
   2308 {
   2309 	b32 result = 0;
   2310 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2311 		VulkanContext *vk = vulkan_context;
   2312 		VulkanQueue   *vq = vk->queues[timeline];
   2313 		VkSemaphoreWaitInfo semaphore_wait_info = {
   2314 			.sType          = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
   2315 			.pSemaphores    = &vq->timeline_semaphore.semaphore,
   2316 			.semaphoreCount = 1,
   2317 			.pValues        = &value,
   2318 		};
   2319 		result = vkWaitSemaphores(vk->device, &semaphore_wait_info, timeout_ns) == VK_SUCCESS;
   2320 	}
   2321 	return result;
   2322 }
   2323 
   2324 DEBUG_IMPORT u64
   2325 vk_host_signal_timeline(VulkanTimeline timeline)
   2326 {
   2327 	u64 result = -1;
   2328 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2329 		VulkanContext   *vk = vulkan_context;
   2330 		VulkanQueue     *vq = vk->queues[timeline];
   2331 		VulkanSemaphore *vs = &vq->timeline_semaphore;
   2332 		result = ++vs->value;
   2333 		VkSemaphoreSignalInfo ssi = {
   2334 			.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
   2335 			.semaphore = vs->semaphore,
   2336 			.value     = result,
   2337 		};
   2338 		vkSignalSemaphore(vk->device, &ssi);
   2339 	}
   2340 	return result;
   2341 }
   2342 
   2343 DEBUG_IMPORT VulkanHandle
   2344 vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
   2345 {
   2346 	assert(Between(count, 1, 2));
   2347 	assert(count == 2 || infos[0].kind == VulkanShaderKind_Compute);
   2348 
   2349 	VulkanHandle result = {0};
   2350 	DeferLoop(take_lock(&vulkan_context->arena_lock, -1), release_lock(&vulkan_context->arena_lock))
   2351 	{
   2352 		Arena arena = vulkan_context->arena;
   2353 
   2354 		VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Pipeline);
   2355 		result = (VulkanHandle){(u64)e};
   2356 
   2357 		if (count == 2) e->as.pipeline = vk_graphics_pipeline_from_infos(arena, infos, count, push_constants_size);
   2358 		else            e->as.pipeline = vk_compute_pipeline_from_shader_text(arena, infos[0].text, infos[0].name, push_constants_size);
   2359 	}
   2360 	return result;
   2361 }
   2362 
   2363 DEBUG_IMPORT b32
   2364 vk_pipeline_valid(VulkanHandle h)
   2365 {
   2366 	b32 result = 0;
   2367 	if ValidVulkanHandle(h) {
   2368 		VulkanPipeline *vp = vk_entity_data(h, VulkanEntityKind_Pipeline);
   2369 		if (vp->stage_flags == VK_SHADER_STAGE_COMPUTE_BIT)
   2370 			result = vp->pipeline != vulkan_context->default_compute_pipeline.pipeline;
   2371 		else
   2372 			result = vp->pipeline != vulkan_context->default_graphics_pipeline.pipeline;
   2373 	}
   2374 	return result;
   2375 }
   2376 
   2377 DEBUG_IMPORT void
   2378 vk_pipeline_release(VulkanHandle h)
   2379 {
   2380 	if (vk_pipeline_valid(h)) {
   2381 		VulkanEntity *e = (VulkanEntity *)h.value[0];
   2382 		VulkanTimeline timeline;
   2383 		if (e->as.pipeline.stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) timeline = VulkanTimeline_Compute;
   2384 		else                                                           timeline = VulkanTimeline_Graphics;
   2385 
   2386 		// NOTE(rnp): block more command buffers from being recorded
   2387 		VulkanCommandPool *vcp = vulkan_context->command_pools[timeline];
   2388 		DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) {
   2389 			u32 index = (vcp->next_index - 1) % countof(vcp->buffers);
   2390 			vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2391 			vkDestroyPipeline(vulkan_context->device, e->as.pipeline.pipeline, 0);
   2392 			vkDestroyPipelineLayout(vulkan_context->device, e->as.pipeline.layout, 0);
   2393 
   2394 			if (&e->as.pipeline == vcp->bound_pipeline)
   2395 				vcp->bound_pipeline = 0;
   2396 		}
   2397 		vk_entity_release(e);
   2398 	}
   2399 }
   2400 
   2401 DEBUG_IMPORT void
   2402 vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count)
   2403 {
   2404 	VulkanContext *vk = vulkan_context;
   2405 
   2406 	VkWriteDescriptorSet   write_sets[BeamformerShaderResourceKind_Count] = {0};
   2407 
   2408 	for EachIndex(info_count, it) {
   2409 		switch (infos[it].kind) {
   2410 		case BeamformerShaderResourceKind_Buffer:{
   2411 			VulkanBuffer *vb = vk_entity_data(infos[it].handle, VulkanEntityKind_Buffer);
   2412 			vk->descriptor_buffer_infos[infos[it].slot].buffer = vb->buffer;
   2413 			vk->descriptor_buffer_infos[infos[it].slot].offset = 0;
   2414 			vk->descriptor_buffer_infos[infos[it].slot].range  = vb->memory_size;
   2415 		}break;
   2416 
   2417 		InvalidDefaultCase;
   2418 		}
   2419 	}
   2420 
   2421 	write_sets[BeamformerShaderResourceKind_Buffer].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
   2422 	write_sets[BeamformerShaderResourceKind_Buffer].dstSet           = vk->descriptor_sets[BeamformerShaderResourceKind_Buffer];
   2423 	write_sets[BeamformerShaderResourceKind_Buffer].dstBinding       = 0;
   2424 	write_sets[BeamformerShaderResourceKind_Buffer].descriptorCount  = countof(vk->descriptor_buffer_infos);
   2425 	write_sets[BeamformerShaderResourceKind_Buffer].descriptorType   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
   2426 	write_sets[BeamformerShaderResourceKind_Buffer].pBufferInfo      = vk->descriptor_buffer_infos;
   2427 
   2428 	vkUpdateDescriptorSets(vk->device, countof(write_sets), write_sets, 0, 0);
   2429 }
   2430 
   2431 DEBUG_IMPORT VulkanHandle
   2432 vk_command_begin(VulkanTimeline timeline)
   2433 {
   2434 	VulkanHandle result = {0};
   2435 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2436 		VulkanContext     *vk  = vulkan_context;
   2437 		VulkanCommandPool *vcp = vk->command_pools[timeline];
   2438 
   2439 		take_lock(&vcp->lock, -1);
   2440 
   2441 		VulkanEntity        *e   = vk_entity_allocate(VulkanEntityKind_CommandBuffer);
   2442 		VulkanCommandBuffer *vcb = &e->as.command_buffer;
   2443 		vcb->timeline     = timeline;
   2444 		vcb->buffer_index = vcp->next_index++ % countof(vcp->buffers);
   2445 
   2446 		u32 index = vcb->buffer_index;
   2447 		// TODO(rnp): probably not the best to have this here but it will likely not be hit
   2448 		b32 wait_result = vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2449 		assert(wait_result);
   2450 
   2451 		vcp->queries_occupied[index] = 0;
   2452 
   2453 		VkCommandBufferBeginInfo buffer_begin_info = {
   2454 			.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
   2455 			.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
   2456 		};
   2457 
   2458 		vkBeginCommandBuffer(vcp->buffers[index], &buffer_begin_info);
   2459 		vkCmdResetQueryPool(vcp->buffers[index], vcp->query_pool, index * MaxCommandBufferTimestamps,
   2460 		                    MaxCommandBufferTimestamps);
   2461 
   2462 		result = (VulkanHandle){(u64)e};
   2463 	}
   2464 	return result;
   2465 }
   2466 
   2467 DEBUG_IMPORT void
   2468 vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline)
   2469 {
   2470 	if ValidVulkanHandle(command) {
   2471 		VulkanContext       *vk  = vulkan_context;
   2472 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2473 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2474 
   2475 		VulkanPipeline *vp = 0;
   2476 		if ValidVulkanHandle(pipeline) {
   2477 			vp = vk_entity_data(pipeline, VulkanEntityKind_Pipeline);
   2478 		} else if (vcb->timeline == VulkanTimeline_Compute) {
   2479 			vp = &vk->default_compute_pipeline;
   2480 		} else if (vcb->timeline == VulkanTimeline_Graphics) {
   2481 			vp = &vk->default_graphics_pipeline;
   2482 		} else {
   2483 			InvalidCodePath;
   2484 		}
   2485 
   2486 		read_only local_persist VkPipelineBindPoint bind_point_lut[VulkanTimeline_Count] = {
   2487 			[VulkanTimeline_Graphics] = VK_PIPELINE_BIND_POINT_GRAPHICS,
   2488 			[VulkanTimeline_Compute]  = VK_PIPELINE_BIND_POINT_COMPUTE,
   2489 			[VulkanTimeline_Transfer] = -1,
   2490 		};
   2491 
   2492 		VkPipelineBindPoint bind_point = bind_point_lut[vcb->timeline];
   2493 		assert(bind_point != (VkPipelineBindPoint)-1);
   2494 
   2495 		vkCmdBindPipeline(vcp->buffers[vcb->buffer_index], bind_point, vp->pipeline);
   2496 		vkCmdBindDescriptorSets(vcp->buffers[vcb->buffer_index], bind_point, vp->layout,
   2497 		                        0, countof(vk->descriptor_sets), vk->descriptor_sets, 0, 0);
   2498 		vcp->bound_pipeline = vp;
   2499 	}
   2500 }
   2501 
   2502 DEBUG_IMPORT void
   2503 vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count)
   2504 {
   2505 	if ValidVulkanHandle(command) {
   2506 		VulkanContext       *vk  = vulkan_context;
   2507 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2508 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2509 		VulkanQueue         *vq  = vk->queues[vcb->timeline];
   2510 
   2511 		DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock))
   2512 		{
   2513 			Arena arena = vk->arena;
   2514 			u32 valid_count = 0;
   2515 			VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count);
   2516 			for (u64 it = 0; it < count; it++) {
   2517 				if ValidVulkanHandle(barriers[it].gpu_buffer->handle) {
   2518 					u32           index = valid_count++;
   2519 					VulkanBuffer *vb    = vk_entity_data(barriers[it].gpu_buffer->handle, VulkanEntityKind_Buffer);
   2520 					memory_barriers[index].sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2;
   2521 					memory_barriers[index].srcStageMask        = vq->pipeline_stage_flags;
   2522 					memory_barriers[index].srcAccessMask       = VK_ACCESS_2_MEMORY_WRITE_BIT;
   2523 					memory_barriers[index].dstStageMask        = vq->pipeline_stage_flags;
   2524 					memory_barriers[index].dstAccessMask       = VK_ACCESS_2_MEMORY_READ_BIT;
   2525 					memory_barriers[index].srcQueueFamilyIndex = vq->queue_family;
   2526 					memory_barriers[index].dstQueueFamilyIndex = vq->queue_family;
   2527 					memory_barriers[index].buffer              = vb->buffer;
   2528 					memory_barriers[index].offset              = barriers[it].offset;
   2529 					memory_barriers[index].size                = barriers[it].size;
   2530 				}
   2531 			}
   2532 
   2533 			VkDependencyInfo dependancy_info = {
   2534 				.sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
   2535 				.bufferMemoryBarrierCount = valid_count,
   2536 				.pBufferMemoryBarriers    = memory_barriers,
   2537 			};
   2538 
   2539 			vkCmdPipelineBarrier2(vcp->buffers[vcb->buffer_index], &dependancy_info);
   2540 		}
   2541 	}
   2542 }
   2543 
   2544 DEBUG_IMPORT void
   2545 vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch)
   2546 {
   2547 	assert(dispatch.x <= U16_MAX);
   2548 	assert(dispatch.y <= U16_MAX);
   2549 	assert(dispatch.z <= U16_MAX);
   2550 	if ValidVulkanHandle(command) {
   2551 		VkCommandBuffer cmd = vk_command_buffer(command);
   2552 		vkCmdDispatch(cmd, dispatch.x, dispatch.y, dispatch.z);
   2553 	}
   2554 }
   2555 
   2556 DEBUG_IMPORT void
   2557 vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values)
   2558 {
   2559 	if ValidVulkanHandle(command) {
   2560 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2561 		VulkanCommandPool   *vcp = vulkan_context->command_pools[vcb->timeline];
   2562 		VulkanPipeline      *vp  = vcp->bound_pipeline;
   2563 
   2564 		assert(vp);
   2565 
   2566 		vkCmdPushConstants(vcp->buffers[vcb->buffer_index], vp->layout, vp->stage_flags, offset, size, values);
   2567 	}
   2568 }
   2569 
   2570 DEBUG_IMPORT void
   2571 vk_command_timestamp(VulkanHandle command)
   2572 {
   2573 	if ValidVulkanHandle(command) {
   2574 		VulkanContext       *vk  = vulkan_context;
   2575 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2576 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2577 
   2578 		read_only local_persist VkPipelineStageFlags2 stage_lut[VulkanTimeline_Count] = {
   2579 			[VulkanTimeline_Graphics] = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT,
   2580 			[VulkanTimeline_Compute]  = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
   2581 			[VulkanTimeline_Transfer] = -1,
   2582 		};
   2583 
   2584 		VkPipelineStageFlags2 stage = stage_lut[vcb->timeline];
   2585 		assert(stage != (VkPipelineStageFlags2)-1);
   2586 
   2587 		if (vcp->queries_occupied[vcb->buffer_index] < MaxCommandBufferTimestamps) {
   2588 			u32 query_index = vcp->queries_occupied[vcb->buffer_index]++;
   2589 			vkCmdWriteTimestamp2(vcp->buffers[vcb->buffer_index], stage, vcp->query_pool,
   2590 			                     vcb->buffer_index * MaxCommandBufferTimestamps + query_index);
   2591 		}
   2592 	}
   2593 }
   2594 
   2595 DEBUG_IMPORT void
   2596 vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value)
   2597 {
   2598 	if (ValidVulkanHandle(command) && Between(timeline, 0, VulkanTimeline_Count - 1)) {
   2599 		VulkanContext       *vk  = vulkan_context;
   2600 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2601 
   2602 		u32 wait_index = vk->queue_indices[timeline];
   2603 		vcb->in_flight_wait_values[wait_index] = Max(value, vcb->in_flight_wait_values[wait_index]);
   2604 	}
   2605 }
   2606 
   2607 DEBUG_IMPORT u64
   2608 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore)
   2609 {
   2610 	u64 result = -1;
   2611 	if ValidVulkanHandle(command) {
   2612 		VulkanContext       *vk  = vulkan_context;
   2613 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2614 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2615 		VulkanQueue         *vq  = vk->queues[vcb->timeline];
   2616 		VulkanSemaphore     *vs  = &vq->timeline_semaphore;
   2617 
   2618 		vkEndCommandBuffer(vcp->buffers[vcb->buffer_index]);
   2619 
   2620 		DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock)) {
   2621 			VkCommandBufferSubmitInfo command_buffer_submit_info = {
   2622 				.sType         = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
   2623 				.commandBuffer = vcp->buffers[vcb->buffer_index],
   2624 			};
   2625 
   2626 			result = ++vs->value;
   2627 
   2628 			u32 signal_submit_info_count = 1;
   2629 			VkSemaphoreSubmitInfo signal_submit_infos[2] = {{
   2630 				.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2631 				.semaphore = vs->semaphore,
   2632 				.value     = result,
   2633 				.stageMask = vq->pipeline_stage_flags,
   2634 			}};
   2635 
   2636 			if ValidVulkanHandle(finished_semaphore) {
   2637 				VulkanSemaphore *fs = vk_entity_data(finished_semaphore, VulkanEntityKind_Semaphore);
   2638 				signal_submit_infos[signal_submit_info_count++] = (VkSemaphoreSubmitInfo){
   2639 					.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2640 					.semaphore = fs->semaphore,
   2641 					.stageMask = vq->pipeline_stage_flags,
   2642 				};
   2643 			}
   2644 
   2645 			u32 wait_submit_info_count = 0;
   2646 			VkSemaphoreSubmitInfo wait_submit_infos[VulkanQueueKind_Count + 1];
   2647 			for (u32 i = 0; i < vk->unique_queues; i++) {
   2648 				u32 queue_index = vk->queue_indices[i];
   2649 				if (vcb->in_flight_wait_values[queue_index] > 0) {
   2650 					VulkanQueue *q = vk->queues[queue_index];
   2651 					VkSemaphoreSubmitInfo wait_ssi = {
   2652 						.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2653 						.semaphore = q->timeline_semaphore.semaphore,
   2654 						.value     = vcb->in_flight_wait_values[queue_index],
   2655 						.stageMask = q->pipeline_stage_flags,
   2656 					};
   2657 					wait_submit_infos[wait_submit_info_count++] = wait_ssi;
   2658 				}
   2659 			}
   2660 
   2661 			if ValidVulkanHandle(wait_semaphore) {
   2662 				VulkanSemaphore *ws = vk_entity_data(wait_semaphore, VulkanEntityKind_Semaphore);
   2663 				wait_submit_infos[wait_submit_info_count++] = (VkSemaphoreSubmitInfo){
   2664 					.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2665 					.semaphore = ws->semaphore,
   2666 					.stageMask = vq->pipeline_stage_flags,
   2667 				};
   2668 			}
   2669 
   2670 			VkSubmitInfo2 submit_info = {
   2671 				.sType                    = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
   2672 				.commandBufferInfoCount   = 1,
   2673 				.pCommandBufferInfos      = &command_buffer_submit_info,
   2674 				.waitSemaphoreInfoCount   = wait_submit_info_count,
   2675 				.pWaitSemaphoreInfos      = wait_submit_infos,
   2676 				.signalSemaphoreInfoCount = signal_submit_info_count,
   2677 				.pSignalSemaphoreInfos    = signal_submit_infos,
   2678 			};
   2679 
   2680 			vkQueueSubmit2(vq->queue, 1, &submit_info, 0);
   2681 
   2682 			vcp->bound_pipeline = 0;
   2683 
   2684 			atomic_store_u64(vcp->submission_values + vcb->buffer_index, result);
   2685 		}
   2686 
   2687 		release_lock(&vcp->lock);
   2688 
   2689 		vk_entity_release((VulkanEntity *)command.value[0]);
   2690 	}
   2691 	return result;
   2692 }
   2693 
   2694 DEBUG_IMPORT void
   2695 vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *depth, GPUImage *resolve)
   2696 {
   2697 	if ValidVulkanHandle(command) {
   2698 		VkCommandBuffer cmd = vk_command_buffer(command);
   2699 
   2700 		assert((colour->width == depth->width) && (colour->height == depth->height));
   2701 
   2702 		VulkanImage *ci = vk_entity_data(colour->image, VulkanEntityKind_Image);
   2703 		VulkanImage *di = vk_entity_data(depth->image,  VulkanEntityKind_Image);
   2704 		VulkanImage *ri = 0;
   2705 		if (resolve) ri = vk_entity_data(resolve->image, VulkanEntityKind_Image);
   2706 
   2707 		// NOTE: Layout Transitions
   2708 		{
   2709 			u32 image_memory_barrier_count = 2;
   2710 			VkImageMemoryBarrier2 image_memory_barriers[3] = {
   2711 				{
   2712 					.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2713 					.srcStageMask     = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
   2714 					.srcAccessMask    = 0,
   2715 					.dstStageMask     = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
   2716 					.dstAccessMask    = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
   2717 					.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2718 					.newLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2719 					.image            = ci->image,
   2720 					.subresourceRange = {
   2721 						.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
   2722 						.baseMipLevel   = 0,
   2723 						.levelCount     = 1,
   2724 						.baseArrayLayer = 0,
   2725 						.layerCount     = 1,
   2726 					},
   2727 				},
   2728 				{
   2729 					.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2730 					.srcStageMask     = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
   2731 					.srcAccessMask    = 0,
   2732 					.dstStageMask     = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
   2733 					.dstAccessMask    = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
   2734 					.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2735 					.newLayout        = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
   2736 					.image            = di->image,
   2737 					.subresourceRange = {
   2738 						.aspectMask     = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
   2739 						.baseMipLevel   = 0,
   2740 						.levelCount     = 1,
   2741 						.baseArrayLayer = 0,
   2742 						.layerCount     = 1,
   2743 					},
   2744 				},
   2745 			};
   2746 
   2747 			if (resolve) image_memory_barriers[image_memory_barrier_count++] = (VkImageMemoryBarrier2){
   2748 				.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2749 				.srcStageMask     = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
   2750 				.srcAccessMask    = 0,
   2751 				.dstStageMask     = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT|VK_PIPELINE_STAGE_2_RESOLVE_BIT,
   2752 				.dstAccessMask    = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
   2753 				.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2754 				.newLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2755 				.image            = ri->image,
   2756 				.subresourceRange = {
   2757 					.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
   2758 					.baseMipLevel   = 0,
   2759 					.levelCount     = 1,
   2760 					.baseArrayLayer = 0,
   2761 					.layerCount     = 1,
   2762 				},
   2763 			};
   2764 
   2765 			VkDependencyInfo dependency_info = {
   2766 				.sType                   = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
   2767 				.imageMemoryBarrierCount = image_memory_barrier_count,
   2768 				.pImageMemoryBarriers    = image_memory_barriers,
   2769 			};
   2770 
   2771 			vkCmdPipelineBarrier2(cmd, &dependency_info);
   2772 		}
   2773 
   2774 		VkRenderingAttachmentInfo colour_attachment = {
   2775 			.sType              = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
   2776 			.imageView          = ci->view,
   2777 			.imageLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2778 			.resolveMode        = ri ? VK_RESOLVE_MODE_AVERAGE_BIT : 0,
   2779 			.resolveImageView   = ri ? ri->view : 0,
   2780 			.resolveImageLayout = ri ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : 0,
   2781 			.loadOp             = VK_ATTACHMENT_LOAD_OP_CLEAR,
   2782 			.storeOp            = VK_ATTACHMENT_STORE_OP_STORE,
   2783 			.clearValue         = {.color = {{0.0f, 0.0f, 0.0f, 0.0f}}},
   2784 		};
   2785 
   2786 		VkRenderingAttachmentInfo depth_stencil_attachment = {
   2787 			.sType       = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
   2788 			.imageView   = di->view,
   2789 			.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
   2790 			.loadOp      = VK_ATTACHMENT_LOAD_OP_CLEAR,
   2791 			.storeOp     = VK_ATTACHMENT_STORE_OP_STORE,
   2792 			.clearValue  = {.depthStencil = {1.0f, 0}},
   2793 		};
   2794 
   2795 		VkRenderingInfo rendering_info = {
   2796 			.sType                = VK_STRUCTURE_TYPE_RENDERING_INFO,
   2797 			.renderArea           = {.offset = {0}, .extent = {colour->width, colour->height}},
   2798 			.layerCount           = 1,
   2799 			.colorAttachmentCount = 1,
   2800 			.pColorAttachments    = &colour_attachment,
   2801 			.pDepthAttachment     = &depth_stencil_attachment,
   2802 			.pStencilAttachment   = &depth_stencil_attachment,
   2803 		};
   2804 
   2805 		vkCmdBeginRendering(cmd, &rendering_info);
   2806 	}
   2807 }
   2808 
   2809 DEBUG_IMPORT void
   2810 vk_command_draw(VulkanHandle command, GPUBuffer *model)
   2811 {
   2812 	if (ValidVulkanHandle(command) && ValidVulkanHandle(model->handle)) {
   2813 		VkCommandBuffer cmd = vk_command_buffer(command);
   2814 		VulkanBuffer   *vb  = vk_entity_data(model->handle, VulkanEntityKind_RenderModel);
   2815 		vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type);
   2816 		vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0);
   2817 	}
   2818 }
   2819 
   2820 DEBUG_IMPORT void
   2821 vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset)
   2822 {
   2823 	if ValidVulkanHandle(command) {
   2824 		VkCommandBuffer cmd = vk_command_buffer(command);
   2825 		VkRect2D scissor = {.offset = {x_offset, y_offset}, .extent = {width, height}};
   2826 		vkCmdSetScissor(cmd, 0, 1, &scissor);
   2827 	}
   2828 }
   2829 
   2830 DEBUG_IMPORT void
   2831 vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth)
   2832 {
   2833 	if ValidVulkanHandle(command) {
   2834 		VkCommandBuffer cmd = vk_command_buffer(command);
   2835 		VkViewport viewport = {x_offset, y_offset, width, height, min_depth, max_depth};
   2836 		vkCmdSetViewport(cmd, 0, 1, &viewport);
   2837 	}
   2838 }
   2839 
   2840 DEBUG_IMPORT void
   2841 vk_command_end_rendering(VulkanHandle command)
   2842 {
   2843 	if ValidVulkanHandle(command) vkCmdEndRendering(vk_command_buffer(command));
   2844 }
   2845 
   2846 DEBUG_IMPORT void
   2847 vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination,
   2848                        GPUBuffer *restrict source, u64 source_offset, i64 size)
   2849 {
   2850 	if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->handle) && ValidVulkanHandle(source->handle)) {
   2851 		VkCommandBuffer cmd = vk_command_buffer(command);
   2852 		VulkanBuffer *db = vk_entity_data(destination->handle, VulkanEntityKind_Buffer);
   2853 		VulkanBuffer *sb = vk_entity_data(source->handle,      VulkanEntityKind_Buffer);
   2854 
   2855 		VkBufferCopy2 buffer_copy = {
   2856 			.sType     = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
   2857 			.srcOffset = source_offset,
   2858 			.dstOffset = 0,
   2859 			.size      = size,
   2860 		};
   2861 
   2862 		VkCopyBufferInfo2 copy_buffer_info = {
   2863 			.sType       = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
   2864 			.srcBuffer   = sb->buffer,
   2865 			.dstBuffer   = db->buffer,
   2866 			.regionCount = 1,
   2867 			.pRegions    = &buffer_copy,
   2868 		};
   2869 
   2870 		vkCmdCopyBuffer2(cmd, &copy_buffer_info);
   2871 	}
   2872 }
   2873 
   2874 DEBUG_IMPORT u64 *
   2875 vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena)
   2876 {
   2877 	u64 *result = 0;
   2878 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2879 		VulkanContext     *vk  = vulkan_context;
   2880 		VulkanCommandPool *vcp = vk->command_pools[timeline];
   2881 		DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) {
   2882 			u32 index = (vcp->next_index - 1) % countof(vcp->buffers);
   2883 			u32 count = vcp->queries_occupied[index];
   2884 			if (count > 0) {
   2885 				result = push_array(arena, u64, count + 1);
   2886 				result[0] = count;
   2887 
   2888 				vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2889 
   2890 				vkGetQueryPoolResults(vk->device, vcp->query_pool, index * MaxCommandBufferTimestamps, count,
   2891 				                      count * sizeof(u64), result + 1, 8, VK_QUERY_RESULT_WAIT_BIT);
   2892 			}
   2893 		}
   2894 	} else {
   2895 		result = push_array(arena, u64, 1);
   2896 	}
   2897 	return result;
   2898 }