ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

util.c (23979B)


      1 /* See LICENSE for license details. */
      2 #if   COMPILER_CLANG
      3   #pragma GCC diagnostic ignored "-Winitializer-overrides"
      4 #elif COMPILER_GCC
      5   #pragma GCC diagnostic ignored "-Woverride-init"
      6 #endif
      7 
      8 #define zero_struct(s) memory_clear((s), 0, sizeof(*(s)))
      9 function void *
     10 memory_clear(void *restrict p_, u8 c, u64 size)
     11 {
     12 	u8 *p = p_;
     13 	while (size > 0) p[--size] = c;
     14 	return p;
     15 }
     16 
     17 function b32
     18 memory_equal(void *restrict left, void *restrict right, u64 n)
     19 {
     20 	u8 *a = left, *b = right;
     21 	b32 result = 1;
     22 	for (; result && n; n--)
     23 		result &= *a++ == *b++;
     24 	return result;
     25 }
     26 
     27 #define mem_copy memory_copy
     28 function void
     29 memory_copy(void *restrict dest, void *restrict src, u64 n)
     30 {
     31 	u8 *s = src, *d = dest;
     32 	#ifdef __AVX512BW__
     33 	{
     34 		for (; n >= 64; n -= 64, s += 64, d += 64)
     35 			_mm512_storeu_epi8(d, _mm512_loadu_epi8(s));
     36 		__mmask64 k = _cvtu64_mask64(_bzhi_u64(-1ULL, n));
     37 		_mm512_mask_storeu_epi8(d, k, _mm512_maskz_loadu_epi8(k, s));
     38 	}
     39 	#else
     40 		for (; n; n--) *d++ = *s++;
     41 	#endif
     42 }
     43 
     44 /* IMPORTANT: this function may fault if dest, src, and n are not multiples of 64 */
     45 function void
     46 memory_copy_non_temporal(void *restrict dest, void *restrict src, u64 n)
     47 {
     48 	assume(((u64)dest & 63) == 0);
     49 	assume(((u64)src  & 63) == 0);
     50 	assume(((u64)n    & 63) == 0);
     51 	u8 *s = src, *d = dest;
     52 
     53 	#if defined(__AVX512BW__)
     54 	{
     55 		for (; n >= 64; n -= 64, s += 64, d += 64)
     56 			_mm512_stream_si512((__m512i *)d, _mm512_stream_load_si512((__m512i *)s));
     57 	}
     58 	#elif defined(__AVX2__)
     59 	{
     60 		for (; n >= 32; n -= 32, s += 32, d += 32)
     61 			_mm256_stream_si256((__m256i *)d, _mm256_stream_load_si256((__m256i *)s));
     62 	}
     63 	#elif ARCH_ARM64 && !COMPILER_MSVC
     64 	{
     65 		asm volatile (
     66 			"cbz  %2, 2f\n"
     67 			"1: ldnp q0, q1, [%1]\n"
     68 			"subs %2, %2, #32\n"
     69 			"add  %1, %1, #32\n"
     70 			"stnp q0, q1, [%0]\n"
     71 			"add  %0, %0, #32\n"
     72 			"b.ne 1b\n"
     73 			"2:"
     74 			:  "+r"(d), "+r"(s), "+r"(n)
     75 			:: "memory", "v0", "v1"
     76 		);
     77 	}
     78 	#else
     79 		memory_copy(d, s, n);
     80 	#endif
     81 }
     82 
     83 function void
     84 memory_move(void *dest, void *src, u64 n)
     85 {
     86 	u8 *d = dest, *s = src;
     87 	if (d < s) memory_copy(d, s, n);
     88 	else            while (n) { n--; d[n] = s[n]; }
     89 }
     90 
     91 function void *
     92 memory_scan_backwards(void *memory, u8 byte, iz n)
     93 {
     94 	void *result = 0;
     95 	u8   *s      = memory;
     96 	while (n > 0) if (s[--n] == byte) { result = s + n; break; }
     97 	return result;
     98 }
     99 
    100 function Arena
    101 arena_from_memory(void *memory, u64 size)
    102 {
    103 	Arena result;
    104 	result.beg = memory;
    105 	result.end = result.beg + size;
    106 	return result;
    107 }
    108 
    109 function void *
    110 align_pointer_up(void *p, uz alignment)
    111 {
    112 	uz padding = -(u64)p & (alignment - 1);
    113 	void *result = (u8 *)p + padding;
    114 	return result;
    115 }
    116 
    117 function void *
    118 arena_aligned_start(Arena a, uz alignment)
    119 {
    120 	return align_pointer_up(a.beg, alignment);
    121 }
    122 
    123 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), alignof(t))
    124 function iz
    125 arena_capacity_(Arena *a, iz size, uz alignment)
    126 {
    127 	iz available = a->end - (u8 *)arena_aligned_start(*a, alignment);
    128 	iz result    = available / size;
    129 	return result;
    130 }
    131 
    132 function u8 *
    133 arena_commit(Arena *a, iz size)
    134 {
    135 	assert(a->end - a->beg >= size);
    136 	u8 *result = a->beg;
    137 	a->beg += size;
    138 	return result;
    139 }
    140 
    141 function void
    142 arena_pop(Arena *a, iz length)
    143 {
    144 	a->beg -= length;
    145 }
    146 
    147 typedef enum {
    148 	ArenaAllocateFlags_NoZero = 1 << 0,
    149 } ArenaAllocateFlags;
    150 
    151 typedef struct {
    152 	iz size;
    153 	uz align;
    154 	iz count;
    155 	ArenaAllocateFlags flags;
    156 } ArenaAllocateInfo;
    157 
    158 #define arena_alloc(a, ...)         arena_alloc_(a, (ArenaAllocateInfo){.align = 8, .count = 1, ##__VA_ARGS__})
    159 #define push_array(a, t, n)         (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n)
    160 #define push_array_no_zero(a, t, n) (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n, .flags = ArenaAllocateFlags_NoZero)
    161 #define push_struct(a, t)           push_array(a, t, 1)
    162 #define push_struct_no_zero(a, t)   push_array_no_zero(a, t, 1)
    163 
    164 function void *
    165 arena_alloc_(Arena *a, ArenaAllocateInfo info)
    166 {
    167 	void *result = 0;
    168 	if (a->beg) {
    169 		u8 *start = arena_aligned_start(*a, info.align);
    170 		iz available = a->end - start;
    171 		assert((available >= 0 && info.count <= available / info.size));
    172 		asan_unpoison_region(start, info.count * info.size);
    173 		a->beg = start + info.count * info.size;
    174 		result = start;
    175 		if ((info.flags & ArenaAllocateFlags_NoZero) == 0)
    176 			result = memory_clear(start, 0, info.count * info.size);
    177 	}
    178 	return result;
    179 }
    180 
    181 function Arena
    182 sub_arena(Arena *a, iz size, uz align)
    183 {
    184 	Arena result = {.beg = arena_alloc(a, .size = size, .align = align, .flags = ArenaAllocateFlags_NoZero)};
    185 	result.end   = result.beg + size;
    186 	return result;
    187 }
    188 
    189 function Arena
    190 sub_arena_end(Arena *a, iz len, uz align)
    191 {
    192 	Arena result;
    193 	result.beg = (u8 *)((u64)(a->end - len) & ~(align - 1)),
    194 	result.end = a->end,
    195 
    196 	a->end = result.beg;
    197 	assert(a->end >= a->beg);
    198 
    199 	return result;
    200 }
    201 
    202 function TempArena
    203 begin_temp_arena(Arena *a)
    204 {
    205 	TempArena result = {.arena = a, .original_arena = *a};
    206 	return result;
    207 }
    208 
    209 function void
    210 end_temp_arena(TempArena ta)
    211 {
    212 	Arena *a = ta.arena;
    213 	if (a) {
    214 		assert(a->beg >= ta.original_arena.beg);
    215 		*a = ta.original_arena;
    216 	}
    217 }
    218 
    219 
    220 enum { DA_INITIAL_CAP = 16 };
    221 
    222 #define da_index(it, s) ((it) - (s)->data)
    223 #define da_reserve(a, s, n) \
    224   (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
    225                           _Alignof(typeof(*(s)->data)), sizeof(*(s)->data))
    226 
    227 #define da_append_count(a, s, items, item_count) do { \
    228 	da_reserve((a), (s), (item_count));                                             \
    229 	mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \
    230 	(s)->count += (item_count);                                                     \
    231 } while (0)
    232 
    233 #define da_push(a, s) \
    234   ((typeof((s)->data))memory_clear((s)->count == (s)->capacity  \
    235     ? da_reserve(a, s, 1),      \
    236       (s)->data + (s)->count++  \
    237     : (s)->data + (s)->count++, 0, sizeof(*(s)->data)))
    238 
    239 function void *
    240 da_reserve_(Arena *a, void *data, da_count *capacity, da_count needed, u64 align, i64 size)
    241 {
    242 	da_count cap = *capacity;
    243 
    244 	/* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started
    245 	 * on the stack or someone allocated something in the middle of the arena during usage) */
    246 	if (!data || a->beg != (u8 *)data + cap * size) {
    247 		void *copy = arena_alloc(a, .size = size, .align = align, .count = cap);
    248 		if (data) mem_copy(copy, data, (uz)(cap * size));
    249 		data = copy;
    250 	}
    251 
    252 	if (!cap) cap = DA_INITIAL_CAP;
    253 	while (cap < needed) cap *= 2;
    254 	arena_alloc(a, .size = size, .align = align, .count = cap - *capacity);
    255 	*capacity = cap;
    256 	return data;
    257 }
    258 
    259 function u32
    260 utf8_encode(u8 *out, u32 cp)
    261 {
    262 	u32 result = 1;
    263 	if (cp <= 0x7F) {
    264 		out[0] = cp & 0x7F;
    265 	} else if (cp <= 0x7FF) {
    266 		result = 2;
    267 		out[0] = ((cp >>  6) & 0x1F) | 0xC0;
    268 		out[1] = ((cp >>  0) & 0x3F) | 0x80;
    269 	} else if (cp <= 0xFFFF) {
    270 		result = 3;
    271 		out[0] = ((cp >> 12) & 0x0F) | 0xE0;
    272 		out[1] = ((cp >>  6) & 0x3F) | 0x80;
    273 		out[2] = ((cp >>  0) & 0x3F) | 0x80;
    274 	} else if (cp <= 0x10FFFF) {
    275 		result = 4;
    276 		out[0] = ((cp >> 18) & 0x07) | 0xF0;
    277 		out[1] = ((cp >> 12) & 0x3F) | 0x80;
    278 		out[2] = ((cp >>  6) & 0x3F) | 0x80;
    279 		out[3] = ((cp >>  0) & 0x3F) | 0x80;
    280 	} else {
    281 		out[0] = '?';
    282 	}
    283 	return result;
    284 }
    285 
    286 function UnicodeDecode
    287 utf16_decode(u16 *data, iz length)
    288 {
    289 	UnicodeDecode result = {.cp = U32_MAX};
    290 	if (length) {
    291 		result.consumed = 1;
    292 		result.cp = data[0];
    293 		if (length > 1 && BETWEEN(data[0], 0xD800u, 0xDBFFu)
    294 		               && BETWEEN(data[1], 0xDC00u, 0xDFFFu))
    295 		{
    296 			result.consumed = 2;
    297 			result.cp = ((data[0] - 0xD800u) << 10u) | ((data[1] - 0xDC00u) + 0x10000u);
    298 		}
    299 	}
    300 	return result;
    301 }
    302 
    303 function u32
    304 utf16_encode(u16 *out, u32 cp)
    305 {
    306 	u32 result = 1;
    307 	if (cp == U32_MAX) {
    308 		out[0] = '?';
    309 	} else if (cp < 0x10000u) {
    310 		out[0] = (u16)cp;
    311 	} else {
    312 		u32 value = cp - 0x10000u;
    313 		out[0] = (u16)(0xD800u + (value >> 10u));
    314 		out[1] = (u16)(0xDC00u + (value & 0x3FFu));
    315 		result = 2;
    316 	}
    317 	return result;
    318 }
    319 
    320 function Stream
    321 stream_from_buffer(u8 *buffer, u32 capacity)
    322 {
    323 	Stream result = {.data = buffer, .cap = (i32)capacity};
    324 	return result;
    325 }
    326 
    327 function Stream
    328 stream_alloc(Arena *a, i32 cap)
    329 {
    330 	Stream result = stream_from_buffer(arena_commit(a, cap), (u32)cap);
    331 	return result;
    332 }
    333 
    334 #define stream_to_s8(s) s8_from_str8(stream_to_str8(s))
    335 function str8
    336 stream_to_str8(Stream *s)
    337 {
    338 	str8 result = str8("");
    339 	if (!s->errors) result = (str8){.length = s->widx, .data = s->data};
    340 	return result;
    341 }
    342 
    343 function void
    344 stream_reset(Stream *s, i32 index)
    345 {
    346 	s->errors = s->cap <= index;
    347 	if (!s->errors)
    348 		s->widx = index;
    349 }
    350 
    351 function void
    352 stream_commit(Stream *s, i32 count)
    353 {
    354 	s->errors |= !BETWEEN(s->widx + count, 0, s->cap);
    355 	if (!s->errors)
    356 		s->widx += count;
    357 }
    358 
    359 function void
    360 stream_append(Stream *s, void *data, iz count)
    361 {
    362 	s->errors |= (s->cap - s->widx) < count;
    363 	if (!s->errors) {
    364 		memory_copy(s->data + s->widx, data, (uz)count);
    365 		s->widx += (i32)count;
    366 	}
    367 }
    368 
    369 function void
    370 stream_append_codepoint(Stream *s, u32 codepoint)
    371 {
    372 	u8 buffer[4];
    373 	stream_append(s, buffer, utf8_encode(buffer, codepoint));
    374 }
    375 
    376 // TODO(rnp): replace with handwritten version
    377 #include <stdarg.h>
    378 #include <stdio.h>
    379 function void
    380 stream_appendfv(Stream *s, const char *format, va_list args)
    381 {
    382 	i32 written = vsnprintf((char *)s->data + s->widx, s->cap - s->widx, format, args);
    383 	s->errors |= written > (s->cap - s->widx);
    384 	if (!s->errors) s->widx += written;
    385 }
    386 
    387 function print_format(2, 3) void
    388 stream_appendf(Stream *s, const char *format, ...)
    389 {
    390 	va_list args;
    391 	va_start(args, format);
    392 	stream_appendfv(s, format, args);
    393 	va_end(args);
    394 }
    395 
    396 function void
    397 stream_append_byte(Stream *s, u8 b)
    398 {
    399 	stream_append(s, &b, 1);
    400 }
    401 
    402 function void
    403 stream_pad(Stream *s, u8 b, i32 n)
    404 {
    405 	while (n > 0) stream_append_byte(s, b), n--;
    406 }
    407 
    408 function void
    409 stream_append_s8(Stream *s, s8 str)
    410 {
    411 	stream_append(s, str.data, str.len);
    412 }
    413 
    414 #define stream_append_s8s(s, ...) stream_append_s8s_(s, arg_list(s8, ##__VA_ARGS__))
    415 function void
    416 stream_append_s8s_(Stream *s, s8 *strs, iz count)
    417 {
    418 	for (iz i = 0; i < count; i++)
    419 		stream_append(s, strs[i].data, strs[i].len);
    420 }
    421 
    422 function void
    423 stream_append_u64_width(Stream *s, u64 n, u64 min_width)
    424 {
    425 	u8 tmp[64];
    426 	u8 *end = tmp + sizeof(tmp);
    427 	u8 *beg = end;
    428 	min_width = MIN(sizeof(tmp), min_width);
    429 
    430 	do { *--beg = (u8)('0' + (n % 10)); } while (n /= 10);
    431 	while (end - beg > 0 && (uz)(end - beg) < min_width)
    432 		*--beg = '0';
    433 
    434 	stream_append(s, beg, end - beg);
    435 }
    436 
    437 function void
    438 stream_append_u64(Stream *s, u64 n)
    439 {
    440 	stream_append_u64_width(s, n, 0);
    441 }
    442 
    443 function void
    444 stream_append_hex_u64_width(Stream *s, u64 n, iz width)
    445 {
    446 	assert(width <= 16);
    447 	if (!s->errors) {
    448 		u8  buf[16];
    449 		u8 *end = buf + sizeof(buf);
    450 		u8 *beg = end;
    451 		while (n) {
    452 			*--beg = (u8)"0123456789abcdef"[n & 0x0F];
    453 			n >>= 4;
    454 		}
    455 		while (end - beg < width)
    456 			*--beg = '0';
    457 		stream_append(s, beg, end - beg);
    458 	}
    459 }
    460 
    461 function void
    462 stream_append_hex_u64(Stream *s, u64 n)
    463 {
    464 	stream_append_hex_u64_width(s, n, 2);
    465 }
    466 
    467 function void
    468 stream_append_i64(Stream *s, i64 n)
    469 {
    470 	if (n < 0) {
    471 		stream_append_byte(s, '-');
    472 		n *= -1;
    473 	}
    474 	stream_append_u64(s, (u64)n);
    475 }
    476 
    477 function void
    478 stream_append_f64(Stream *s, f64 f, u64 prec)
    479 {
    480 	if (f < 0) {
    481 		stream_append_byte(s, '-');
    482 		f *= -1;
    483 	}
    484 
    485 	/* NOTE: round last digit */
    486 	f += 0.5f / (f64)prec;
    487 
    488 	if (f >= (f64)(-1UL >> 1)) {
    489 		stream_append_s8(s, s8("inf"));
    490 	} else {
    491 		u64 integral = (u64)f;
    492 		u64 fraction = (u64)((f - (f64)integral) * (f64)prec);
    493 		stream_append_u64(s, integral);
    494 		stream_append_byte(s, '.');
    495 		for (u64 i = prec / 10; i > 1; i /= 10) {
    496 			if (i > fraction)
    497 				stream_append_byte(s, '0');
    498 		}
    499 		stream_append_u64(s, fraction);
    500 	}
    501 }
    502 
    503 function void
    504 stream_append_f64_e(Stream *s, f64 f)
    505 {
    506 	/* TODO: there should be a better way of doing this */
    507 	#if 0
    508 	/* NOTE: we ignore subnormal numbers for now */
    509 	union { f64 f; u64 u; } u = {.f = f};
    510 	i32 exponent = ((u.u >> 52) & 0x7ff) - 1023;
    511 	f32 log_10_of_2 = 0.301f;
    512 	i32 scale       = (exponent * log_10_of_2);
    513 	/* NOTE: normalize f */
    514 	for (i32 i = ABS(scale); i > 0; i--)
    515 		f *= (scale > 0)? 0.1f : 10.0f;
    516 	#else
    517 	i32 scale = 0;
    518 	if (f != 0) {
    519 		while (f > 1) {
    520 			f *= 0.1f;
    521 			scale++;
    522 		}
    523 		while (f < 1) {
    524 			f *= 10.0f;
    525 			scale--;
    526 		}
    527 	}
    528 	#endif
    529 
    530 	u32 prec = 100;
    531 	stream_append_f64(s, f, prec);
    532 	stream_append_byte(s, 'e');
    533 	stream_append_byte(s, scale >= 0? '+' : '-');
    534 	for (u32 i = prec / 10; i > 1; i /= 10)
    535 		stream_append_byte(s, '0');
    536 	stream_append_u64(s, (u64)Abs(scale));
    537 }
    538 
    539 function void
    540 stream_append_v2(Stream *s, v2 v)
    541 {
    542 	stream_append_byte(s, '{');
    543 	stream_append_f64(s, v.x, 100);
    544 	stream_append_s8(s, s8(", "));
    545 	stream_append_f64(s, v.y, 100);
    546 	stream_append_byte(s, '}');
    547 }
    548 
    549 function Stream
    550 arena_stream(Arena a)
    551 {
    552 	Stream result = {0};
    553 	result.data   = a.beg;
    554 	result.cap    = (i32)(a.end - a.beg);
    555 
    556 	/* TODO(rnp): no idea what to do here if we want to maintain the ergonomics */
    557 	asan_unpoison_region(result.data, result.cap);
    558 
    559 	return result;
    560 }
    561 
    562 function s8
    563 arena_stream_commit(Arena *a, Stream *s)
    564 {
    565 	assert(s->data == a->beg);
    566 	s8 result = stream_to_s8(s);
    567 	arena_commit(a, result.len);
    568 	return result;
    569 }
    570 
    571 function s8
    572 arena_stream_commit_zero(Arena *a, Stream *s)
    573 {
    574 	b32 error = s->errors || s->widx == s->cap;
    575 	if (!error)
    576 		s->data[s->widx] = 0;
    577 	s8 result = stream_to_s8(s);
    578 	arena_commit(a, result.len + 1);
    579 	return result;
    580 }
    581 
    582 function s8
    583 arena_stream_commit_and_reset(Arena *arena, Stream *s)
    584 {
    585 	s8 result = arena_stream_commit_zero(arena, s);
    586 	*s = arena_stream(*arena);
    587 	return result;
    588 }
    589 
    590 #if !defined(XXH_IMPLEMENTATION)
    591 # define XXH_INLINE_ALL
    592 # define XXH_IMPLEMENTATION
    593 # define XXH_STATIC_LINKING_ONLY
    594 # include "external/xxhash.h"
    595 #endif
    596 
    597 function u128
    598 u128_hash_from_data(void *data, uz size)
    599 {
    600 	u128 result = {0};
    601 	XXH128_hash_t hash = XXH3_128bits_withSeed(data, size, 4969);
    602 	mem_copy(&result, &hash, sizeof(result));
    603 	return result;
    604 }
    605 
    606 function u64
    607 u64_hash_from_str8_seed(str8 string, u64 seed)
    608 {
    609 	u64 result = XXH3_64bits_withSeed(string.data, (uz)string.length, seed);
    610 	return result;
    611 }
    612 
    613 function u64
    614 u64_hash_from_str8(str8 v)
    615 {
    616 	u64 result = u64_hash_from_str8_seed(v, 4969);
    617 	return result;
    618 }
    619 
    620 function str8
    621 str8_from_c_str(char *cstr)
    622 {
    623 	str8 result = {.data = (u8 *)cstr};
    624 	if (cstr) while (*cstr) cstr++;
    625 	result.length = (u8 *)cstr - result.data;
    626 	return result;
    627 }
    628 
    629 function s8
    630 c_str_to_s8(char *cstr)
    631 {
    632 	str8 s = str8_from_c_str(cstr);
    633 	s8 result = s8_from_str8(s);
    634 	return result;
    635 }
    636 
    637 function str8
    638 str8_range(u8 *start, u8 *one_past_last)
    639 {
    640 	str8 result;
    641 	result.data   = start;
    642 	result.length = one_past_last - start;
    643 	return result;
    644 }
    645 
    646 function str8
    647 str8_skip(str8 s, i64 count)
    648 {
    649 	str8 result = s;
    650 	if (count > 0) {
    651 		result.data   += count;
    652 		result.length -= count;
    653 	}
    654 	return result;
    655 }
    656 
    657 function b32
    658 str8_equal(str8 a, str8 b)
    659 {
    660 	b32 result = a.length == b.length;
    661 	for (i64 i = 0; result && i < a.length; i++)
    662 		result = a.data[i] == b.data[i];
    663 	return result;
    664 }
    665 
    666 function b32
    667 s8_equal(s8 a, s8 b)
    668 {
    669 	return str8_equal(str8_from_s8(a), str8_from_s8(b));
    670 }
    671 
    672 /* NOTE(rnp): returns < 0 if byte is not found */
    673 function i64
    674 str8_scan_backwards(str8 s, u8 byte)
    675 {
    676 	i64 result = (u8 *)memory_scan_backwards(s.data, byte, s.length) - s.data;
    677 	return result;
    678 }
    679 
    680 function str8
    681 str8_cut_head(str8 s, i64 cut)
    682 {
    683 	str8 result = s;
    684 	if (cut > 0) {
    685 		result.data   += cut;
    686 		result.length -= cut;
    687 	}
    688 	result.length = Max(0, result.length);
    689 	return result;
    690 }
    691 
    692 function b32
    693 str8_match(str8 a, str8 b, StringMatchFlags flags)
    694 {
    695 	b32 result = 0;
    696 	if (flags == 0) {
    697 		result = str8_equal(a, b);
    698 	} else if (a.length == b.length || (flags & StringMatchFlag_SloppySize)) {
    699 		result = 1;
    700 		i64 length = Min(a.length, b.length);
    701 		for (i64 it = 0; it < length && result; it++) {
    702 			u8 ab = a.data[it], bb = b.data[it];
    703 			if (flags & StringMatchFlag_CaseInsensitive) {
    704 				ab |= 0x20;
    705 				bb |= 0x20;
    706 			}
    707 			result &= ab == bb;
    708 		}
    709 	}
    710 	return result;
    711 }
    712 
    713 function i64
    714 str8_find_needle(str8 string, str8 needle, StringMatchFlags flags)
    715 {
    716 	u8 *s  = string.data;
    717 	u8 *se = string.data + Max(string.length + 1, needle.length) - needle.length;
    718 	if (needle.length > 0) {
    719 		flags |= StringMatchFlag_SloppySize;
    720 
    721 		u8 nb = needle.data[0];
    722 		if (flags & StringMatchFlag_CaseInsensitive)
    723 			nb |= 0x20;
    724 
    725 		str8 needle_tail = str8_skip(needle, 1);
    726 		u8 *s_opl = string.data + string.length;
    727 		for (; s < se; s++) {
    728 			u8 sb = *s;
    729 			if (flags & StringMatchFlag_CaseInsensitive)
    730 				sb |= 0x20;
    731 
    732 			if (sb == nb && str8_match(str8_range(s + 1, s_opl), needle_tail, flags))
    733 				break;
    734 		}
    735 	}
    736 
    737 	i64 result = string.length;
    738 	if (s < se)
    739 		result = s - string.data;
    740 	return result;
    741 }
    742 
    743 
    744 function str8
    745 str8_alloc(Arena *a, i64 length)
    746 {
    747 	str8 result = {.data = push_array(a, u8, length), .length = length};
    748 	return result;
    749 }
    750 
    751 function str8
    752 str8_from_str16(Arena *a, str16 in)
    753 {
    754 	str8 result = str8("");
    755 	if (in.length) {
    756 		i64 commit = in.length * 4;
    757 		i64 length = 0;
    758 		u8 *data = arena_commit(a, commit + 1);
    759 		u16 *beg = in.data;
    760 		u16 *end = in.data + in.length;
    761 		while (beg < end) {
    762 			UnicodeDecode decode = utf16_decode(beg, end - beg);
    763 			length += utf8_encode(data + length, decode.cp);
    764 			beg    += decode.consumed;
    765 		}
    766 		data[length] = 0;
    767 		result = (str8){.length = length, .data = data};
    768 		arena_pop(a, commit - length);
    769 	}
    770 	return result;
    771 }
    772 
    773 function str16
    774 str16_from_str8(Arena *a, str8 in)
    775 {
    776 	str16 result = {0};
    777 	if (in.length) {
    778 		i64  length   = 0;
    779 		i64  required = 2 * in.length + 1;
    780 		u16 *data     = push_array(a, u16, required);
    781 		/* TODO(rnp): utf8_decode */
    782 		for (i64 i = 0; i < in.length; i++) {
    783 			u32 cp  = in.data[i];
    784 			length += utf16_encode(data + length, cp);
    785 		}
    786 		result = (str16){.length = length, .data = data};
    787 		arena_pop(a, required - length);
    788 	}
    789 	return result;
    790 }
    791 
    792 #define push_str8_from_parts(a, j, ...) push_str8_from_parts_((a), (j), arg_list(str8, __VA_ARGS__))
    793 function str8
    794 push_str8_from_parts_(Arena *arena, str8 joiner, str8 *parts, i64 count)
    795 {
    796 	i64 length = joiner.length * (count - 1);
    797 	for (i64 i = 0; i < count; i++)
    798 		length += parts[i].length;
    799 
    800 	str8 result = {.length = length, .data = arena_commit(arena, length + 1)};
    801 
    802 	i64 offset = 0;
    803 	for (i64 i = 0; i < count; i++) {
    804 		if (i != 0) {
    805 			memory_copy(result.data + offset, joiner.data, (uz)joiner.length);
    806 			offset += joiner.length;
    807 		}
    808 		memory_copy(result.data + offset, parts[i].data, (uz)parts[i].length);
    809 		offset += parts[i].length;
    810 	}
    811 	result.data[result.length] = 0;
    812 
    813 	return result;
    814 }
    815 
    816 #define push_s8_from_parts(a, j, ...) push_s8_from_parts_((a), (j), arg_list(s8, __VA_ARGS__))
    817 function s8
    818 push_s8_from_parts_(Arena *arena, s8 joiner, s8 *parts, iz count)
    819 {
    820 	iz length = joiner.len * (count - 1);
    821 	for (iz i = 0; i < count; i++)
    822 		length += parts[i].len;
    823 
    824 	s8 result = {.len = length, .data = arena_commit(arena, length + 1)};
    825 
    826 	iz offset = 0;
    827 	for (iz i = 0; i < count; i++) {
    828 		if (i != 0) {
    829 			mem_copy(result.data + offset, joiner.data, (uz)joiner.len);
    830 			offset += joiner.len;
    831 		}
    832 		mem_copy(result.data + offset, parts[i].data, (uz)parts[i].len);
    833 		offset += parts[i].len;
    834 	}
    835 	result.data[result.len] = 0;
    836 
    837 	return result;
    838 }
    839 
    840 function str8
    841 push_str8(Arena *a, str8 str)
    842 {
    843 	str8 result    = str8_alloc(a, str.length + 1);
    844 	result.length -= 1;
    845 	memory_copy(result.data, str.data, (uz)result.length);
    846 	return result;
    847 }
    848 
    849 function s8
    850 push_s8(Arena *a, s8 str)
    851 {
    852 	str8 copy   = push_str8(a, str8_from_s8(str));
    853 	s8   result = s8_from_str8(copy);
    854 	return result;
    855 }
    856 
    857 // TODO(rnp): replace with handwritten version
    858 function str8
    859 push_str8_fv(Arena *arena, const char *format, va_list args)
    860 {
    861 	Stream sb = arena_stream(*arena);
    862 	stream_appendfv(&sb, format, args);
    863 	s8 s = arena_stream_commit(arena, &sb);
    864 	str8 result = {.length = s.len, .data = s.data};
    865 	return result;
    866 }
    867 
    868 function str8
    869 push_f64_string(Arena *arena, f64 value, u64 precision)
    870 {
    871 	Stream sb = arena_stream(*arena);
    872 	stream_append_f64(&sb, value, precision);
    873 	s8 s = arena_stream_commit(arena, &sb);
    874 	str8 result = {.length = s.len, .data = s.data};
    875 	return result;
    876 }
    877 
    878 /* NOTE(rnp): from Hacker's Delight */
    879 function force_inline u64
    880 round_down_power_of_two(u64 a)
    881 {
    882 	u64 result = 0x8000000000000000ULL >> clz_u64(a);
    883 	return result;
    884 }
    885 
    886 function force_inline u64
    887 round_up_power_of_two(u64 a)
    888 {
    889 	u64 result = 0x8000000000000000ULL >> (clz_u64(a - 1) - 1);
    890 	return result;
    891 }
    892 
    893 function force_inline iz
    894 round_up_to(iz value, iz multiple)
    895 {
    896 	iz result = value;
    897 	if (value % multiple != 0)
    898 		result += multiple - value % multiple;
    899 	return result;
    900 }
    901 
    902 function void
    903 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right)
    904 {
    905 	if (left) {
    906 		left->pos    = rect.pos;
    907 		left->size.h = rect.size.h;
    908 		left->size.w = rect.size.w * fraction;
    909 	}
    910 	if (right) {
    911 		right->pos    = rect.pos;
    912 		right->pos.x += rect.size.w * fraction;
    913 		right->size.h = rect.size.h;
    914 		right->size.w = rect.size.w * (1.0f - fraction);
    915 	}
    916 }
    917 
    918 function void
    919 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot)
    920 {
    921 	if (top) {
    922 		top->pos    = rect.pos;
    923 		top->size.w = rect.size.w;
    924 		top->size.h = rect.size.h * fraction;
    925 	}
    926 	if (bot) {
    927 		bot->pos    = rect.pos;
    928 		bot->pos.y += rect.size.h * fraction;
    929 		bot->size.w = rect.size.w;
    930 		bot->size.h = rect.size.h * (1.0f - fraction);
    931 	}
    932 }
    933 
    934 function void
    935 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right)
    936 {
    937 	at = MIN(at, rect.size.w);
    938 	if (left) {
    939 		*left = rect;
    940 		left->size.w = at;
    941 	}
    942 	if (right) {
    943 		*right = rect;
    944 		right->pos.x  += at;
    945 		right->size.w -= at;
    946 	}
    947 }
    948 
    949 function void
    950 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot)
    951 {
    952 	at = MIN(at, rect.size.h);
    953 	if (top) {
    954 		*top = rect;
    955 		top->size.h = at;
    956 	}
    957 	if (bot) {
    958 		*bot = rect;
    959 		bot->pos.y  += at;
    960 		bot->size.h -= at;
    961 	}
    962 }
    963 
    964 function NumberConversion
    965 integer_from_str8(str8 raw)
    966 {
    967 	read_only local_persist alignas(64) i8 lut[64] = {
    968 		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
    969 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    970 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    971 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    972 	};
    973 
    974 	NumberConversion result = {.unparsed = raw};
    975 
    976 	i64 i     = 0;
    977 	i64 scale = 1;
    978 	if (raw.length > 0 && raw.data[0] == '-') {
    979 		scale = -1;
    980 		i     =  1;
    981 	}
    982 
    983 	b32 hex = 0;
    984 	if (raw.length - i > 2 && raw.data[i] == '0' && (raw.data[1] == 'x' || raw.data[1] == 'X')) {
    985 		hex = 1;
    986 		i += 2;
    987 	}
    988 
    989 	#define integer_conversion_body(radix, clamp) do {\
    990 		for (; i < raw.length; i++) {\
    991 			i64 value = lut[Min((u8)(raw.data[i] - (u8)'0'), clamp)];\
    992 			if (value >= 0) {\
    993 				if (result.U64 > (U64_MAX - (u64)value) / radix) {\
    994 					result.result = NumberConversionResult_OutOfRange;\
    995 					result.U64    = U64_MAX;\
    996 					return result;\
    997 				} else {\
    998 					result.U64 = radix * result.U64 + (u64)value;\
    999 				}\
   1000 			} else {\
   1001 				break;\
   1002 			}\
   1003 		}\
   1004 	} while (0)
   1005 
   1006 	if (hex) integer_conversion_body(16u, 63u);
   1007 	else     integer_conversion_body(10u, 15u);
   1008 
   1009 	#undef integer_conversion_body
   1010 
   1011 	result.unparsed = (str8){.length = raw.length - i, .data = raw.data + i};
   1012 	result.result   = i > 0 ? NumberConversionResult_Success : NumberConversionResult_Invalid;
   1013 	result.kind     = NumberConversionKind_Integer;
   1014 	if (scale < 0) result.U64 = 0 - result.U64;
   1015 
   1016 	return result;
   1017 }
   1018 
   1019 function NumberConversion
   1020 number_from_str8(str8 s)
   1021 {
   1022 	NumberConversion result  = {.unparsed = s};
   1023 	NumberConversion integer = integer_from_str8(s);
   1024 	if (integer.result == NumberConversionResult_Success) {
   1025 		if (integer.unparsed.length != 0 && integer.unparsed.data[0] == '.') {
   1026 			s = integer.unparsed;
   1027 			s.data++;
   1028 			s.length--;
   1029 
   1030 			while (s.length > 0 && s.data[s.length - 1] == '0') s.length--;
   1031 
   1032 			NumberConversion fractional = integer_from_str8(s);
   1033 			if (fractional.result == NumberConversionResult_Success || s.length == 0) {
   1034 				result.F64 = (f64)fractional.U64;
   1035 
   1036 				u64 divisor = (u64)(fractional.unparsed.data - s.data);
   1037 				while (divisor > 0) { result.F64 /= 10.0; divisor--; }
   1038 
   1039 				result.F64 += (f64)integer.S64;
   1040 
   1041 				result.result   = NumberConversionResult_Success;
   1042 				result.kind     = NumberConversionKind_Float;
   1043 				result.unparsed = fractional.unparsed;
   1044 			}
   1045 		} else {
   1046 			result = integer;
   1047 		}
   1048 	}
   1049 	return result;
   1050 }