math.c (24486B)
1 #include "external/cephes.c" 2 3 function void 4 fill_kronecker_sub_matrix(f32 *out, i32 out_stride, f32 scale, f32 *b, iv2 b_dim) 5 { 6 f32x4 vscale = dup_f32x4((f32)scale); 7 for (i32 i = 0; i < b_dim.y; i++) { 8 for (i32 j = 0; j < b_dim.x; j += 4, b += 4) { 9 f32x4 vb = load_f32x4(b); 10 store_f32x4(out + j, mul_f32x4(vscale, vb)); 11 } 12 out += out_stride; 13 } 14 } 15 16 /* NOTE: this won't check for valid space/etc and assumes row major order */ 17 function void 18 kronecker_product(f32 *out, f32 *a, iv2 a_dim, f32 *b, iv2 b_dim) 19 { 20 iv2 out_dim = {{a_dim.x * b_dim.x, a_dim.y * b_dim.y}}; 21 assert(out_dim.y % 4 == 0); 22 for (i32 i = 0; i < a_dim.y; i++) { 23 f32 *vout = out; 24 for (i32 j = 0; j < a_dim.x; j++, a++) { 25 fill_kronecker_sub_matrix(vout, out_dim.y, *a, b, b_dim); 26 vout += b_dim.y; 27 } 28 out += out_dim.y * b_dim.x; 29 } 30 } 31 32 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 33 function f32 * 34 make_hadamard_transpose(Arena *a, i32 dim) 35 { 36 read_only local_persist f32 hadamard_12_12_transpose[] = { 37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 39 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 40 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 41 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 42 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 43 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 44 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 45 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 46 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 47 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 48 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 49 }; 50 51 read_only local_persist f32 hadamard_20_20_transpose[] = { 52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 53 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, 54 1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 55 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 56 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 57 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, 58 1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, 59 1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, 60 1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, 61 1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 62 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, 63 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 64 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 65 1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 66 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 67 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 68 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 69 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, 70 1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, 71 1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 72 }; 73 74 f32 *result = 0; 75 76 b32 power_of_2 = ISPOWEROF2(dim); 77 b32 multiple_of_12 = dim % 12 == 0; 78 b32 multiple_of_20 = dim % 20 == 0; 79 iz elements = dim * dim; 80 81 i32 base_dim = 0; 82 if (power_of_2) { 83 base_dim = dim; 84 } else if (multiple_of_20 && ISPOWEROF2(dim / 20)) { 85 base_dim = 20; 86 dim /= 20; 87 } else if (multiple_of_12 && ISPOWEROF2(dim / 12)) { 88 base_dim = 12; 89 dim /= 12; 90 } 91 92 if (ISPOWEROF2(dim) && base_dim && arena_capacity(a, f32) >= elements * (1 + (dim != base_dim))) { 93 result = push_array(a, f32, elements); 94 95 Arena tmp = *a; 96 f32 *m = dim == base_dim ? result : push_array(&tmp, f32, elements); 97 98 #define IND(i, j) ((i) * dim + (j)) 99 m[0] = 1; 100 for (i32 k = 1; k < dim; k *= 2) { 101 for (i32 i = 0; i < k; i++) { 102 for (i32 j = 0; j < k; j++) { 103 f32 val = m[IND(i, j)]; 104 m[IND(i + k, j)] = val; 105 m[IND(i, j + k)] = val; 106 m[IND(i + k, j + k)] = -val; 107 } 108 } 109 } 110 #undef IND 111 112 f32 *m2 = 0; 113 iv2 m2_dim; 114 switch (base_dim) { 115 case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (iv2){{12, 12}}; }break; 116 case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (iv2){{20, 20}}; }break; 117 } 118 if (m2) kronecker_product(result, m, (iv2){{dim, dim}}, m2, m2_dim); 119 } 120 121 return result; 122 } 123 124 function b32 125 u128_equal(u128 a, u128 b) 126 { 127 b32 result = a.U64[0] == b.U64[0] && a.U64[1] == b.U64[1]; 128 return result; 129 } 130 131 function RangeU64 132 subrange_n_from_n_m_count(u64 n, u64 n_count, u64 m) 133 { 134 assert(n < n_count); 135 136 u64 per_lane = m / n_count; 137 u64 leftover = m - per_lane * n_count; 138 u64 leftovers_before_n = MIN(leftover, n); 139 u64 base_index = n * per_lane + leftovers_before_n; 140 u64 one_past_last_index = base_index + per_lane + ((n < leftover) ? 1 : 0); 141 142 RangeU64 result = {base_index, one_past_last_index}; 143 return result; 144 } 145 146 function b32 147 iv2_equal(iv2 a, iv2 b) 148 { 149 b32 result = a.x == b.x && a.y == b.y; 150 return result; 151 } 152 153 function b32 154 iv3_equal(iv3 a, iv3 b) 155 { 156 b32 result = a.x == b.x && a.y == b.y && a.z == b.z; 157 return result; 158 } 159 160 function i32 161 iv3_dimension(iv3 points) 162 { 163 i32 result = (points.x > 1) + (points.y > 1) + (points.z > 1); 164 return result; 165 } 166 167 function v2 168 clamp_v2_rect(v2 v, Rect r) 169 { 170 v2 result = v; 171 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 172 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 173 return result; 174 } 175 176 function v2 177 v2_from_iv2(iv2 v) 178 { 179 v2 result; 180 result.E[0] = (f32)v.E[0]; 181 result.E[1] = (f32)v.E[1]; 182 return result; 183 } 184 185 function v2 186 v2_abs(v2 a) 187 { 188 v2 result; 189 result.x = Abs(a.x); 190 result.y = Abs(a.y); 191 return result; 192 } 193 194 function v2 195 v2_scale(v2 a, f32 scale) 196 { 197 v2 result; 198 result.x = a.x * scale; 199 result.y = a.y * scale; 200 return result; 201 } 202 203 function v2 204 v2_add(v2 a, v2 b) 205 { 206 v2 result; 207 result.x = a.x + b.x; 208 result.y = a.y + b.y; 209 return result; 210 } 211 212 function v2 213 v2_sub(v2 a, v2 b) 214 { 215 v2 result = v2_add(a, v2_scale(b, -1.0f)); 216 return result; 217 } 218 219 function v2 220 v2_mul(v2 a, v2 b) 221 { 222 v2 result; 223 result.x = a.x * b.x; 224 result.y = a.y * b.y; 225 return result; 226 } 227 228 function v2 229 v2_div(v2 a, v2 b) 230 { 231 v2 result; 232 result.x = a.x / b.x; 233 result.y = a.y / b.y; 234 return result; 235 } 236 237 function v2 238 v2_floor(v2 a) 239 { 240 v2 result; 241 result.x = (f32)((i32)a.x); 242 result.y = (f32)((i32)a.y); 243 return result; 244 } 245 246 function f32 247 v2_magnitude_squared(v2 a) 248 { 249 f32 result = a.x * a.x + a.y * a.y; 250 return result; 251 } 252 253 function f32 254 v2_magnitude(v2 a) 255 { 256 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 257 return result; 258 } 259 260 function v3 261 cross(v3 a, v3 b) 262 { 263 v3 result; 264 result.x = a.y * b.z - a.z * b.y; 265 result.y = a.z * b.x - a.x * b.z; 266 result.z = a.x * b.y - a.y * b.x; 267 return result; 268 } 269 270 function v3 271 v3_from_iv3(iv3 v) 272 { 273 v3 result; 274 result.E[0] = (f32)v.E[0]; 275 result.E[1] = (f32)v.E[1]; 276 result.E[2] = (f32)v.E[2]; 277 return result; 278 } 279 280 function v3 281 v3_abs(v3 a) 282 { 283 v3 result; 284 result.x = Abs(a.x); 285 result.y = Abs(a.y); 286 result.z = Abs(a.z); 287 return result; 288 } 289 290 function v3 291 v3_scale(v3 a, f32 scale) 292 { 293 v3 result; 294 result.x = scale * a.x; 295 result.y = scale * a.y; 296 result.z = scale * a.z; 297 return result; 298 } 299 300 function v3 301 v3_add(v3 a, v3 b) 302 { 303 v3 result; 304 result.x = a.x + b.x; 305 result.y = a.y + b.y; 306 result.z = a.z + b.z; 307 return result; 308 } 309 310 function v3 311 v3_sub(v3 a, v3 b) 312 { 313 v3 result = v3_add(a, v3_scale(b, -1.0f)); 314 return result; 315 } 316 317 function v3 318 v3_div(v3 a, v3 b) 319 { 320 v3 result; 321 result.x = a.x / b.x; 322 result.y = a.y / b.y; 323 result.z = a.z / b.z; 324 return result; 325 } 326 327 function f32 328 v3_dot(v3 a, v3 b) 329 { 330 f32 result = a.x * b.x + a.y * b.y + a.z * b.z; 331 return result; 332 } 333 334 function f32 335 v3_magnitude_squared(v3 a) 336 { 337 f32 result = v3_dot(a, a); 338 return result; 339 } 340 341 function f32 342 v3_magnitude(v3 a) 343 { 344 f32 result = sqrt_f32(v3_dot(a, a)); 345 return result; 346 } 347 348 function v3 349 v3_normalize(v3 a) 350 { 351 v3 result = v3_scale(a, 1.0f / v3_magnitude(a)); 352 return result; 353 } 354 355 function v4 356 v4_scale(v4 a, f32 scale) 357 { 358 v4 result; 359 result.x = scale * a.x; 360 result.y = scale * a.y; 361 result.z = scale * a.z; 362 result.w = scale * a.w; 363 return result; 364 } 365 366 function v4 367 v4_add(v4 a, v4 b) 368 { 369 v4 result; 370 result.x = a.x + b.x; 371 result.y = a.y + b.y; 372 result.z = a.z + b.z; 373 result.w = a.w + b.w; 374 return result; 375 } 376 377 function v4 378 v4_sub(v4 a, v4 b) 379 { 380 v4 result = v4_add(a, v4_scale(b, -1)); 381 return result; 382 } 383 384 function f32 385 v4_dot(v4 a, v4 b) 386 { 387 f32 result = a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 388 return result; 389 } 390 391 function v4 392 v4_lerp(v4 a, v4 b, f32 t) 393 { 394 v4 result = v4_add(a, v4_scale(v4_sub(b, a), t)); 395 return result; 396 } 397 398 function b32 399 m4_equal(m4 a, m4 b) 400 { 401 b32 result = 1; 402 for EachElement(a.E, it) 403 result &= f32_equal(a.E[it], b.E[it]); 404 return result; 405 } 406 407 function m4 408 m4_identity(void) 409 { 410 m4 result; 411 result.c[0] = (v4){{1, 0, 0, 0}}; 412 result.c[1] = (v4){{0, 1, 0, 0}}; 413 result.c[2] = (v4){{0, 0, 1, 0}}; 414 result.c[3] = (v4){{0, 0, 0, 1}}; 415 return result; 416 } 417 418 function v4 419 m4_row(m4 a, u32 row) 420 { 421 v4 result; 422 result.E[0] = a.c[0].E[row]; 423 result.E[1] = a.c[1].E[row]; 424 result.E[2] = a.c[2].E[row]; 425 result.E[3] = a.c[3].E[row]; 426 return result; 427 } 428 429 function m4 430 m4_mul(m4 a, m4 b) 431 { 432 m4 result; 433 for (u32 i = 0; i < 4; i++) { 434 for (u32 j = 0; j < 4; j++) { 435 result.c[i].E[j] = v4_dot(m4_row(a, j), b.c[i]); 436 } 437 } 438 return result; 439 } 440 441 /* NOTE(rnp): based on: 442 * https://web.archive.org/web/20131215123403/ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf 443 * TODO(rnp): redo with SIMD as given in the link (but need to rewrite for column-major) 444 */ 445 function m4 446 m4_inverse(m4 m) 447 { 448 m4 result; 449 result.E[ 0] = m.E[5] * m.E[10] * m.E[15] - m.E[5] * m.E[11] * m.E[14] - m.E[9] * m.E[6] * m.E[15] + m.E[9] * m.E[7] * m.E[14] + m.E[13] * m.E[6] * m.E[11] - m.E[13] * m.E[7] * m.E[10]; 450 result.E[ 4] = -m.E[4] * m.E[10] * m.E[15] + m.E[4] * m.E[11] * m.E[14] + m.E[8] * m.E[6] * m.E[15] - m.E[8] * m.E[7] * m.E[14] - m.E[12] * m.E[6] * m.E[11] + m.E[12] * m.E[7] * m.E[10]; 451 result.E[ 8] = m.E[4] * m.E[ 9] * m.E[15] - m.E[4] * m.E[11] * m.E[13] - m.E[8] * m.E[5] * m.E[15] + m.E[8] * m.E[7] * m.E[13] + m.E[12] * m.E[5] * m.E[11] - m.E[12] * m.E[7] * m.E[ 9]; 452 result.E[12] = -m.E[4] * m.E[ 9] * m.E[14] + m.E[4] * m.E[10] * m.E[13] + m.E[8] * m.E[5] * m.E[14] - m.E[8] * m.E[6] * m.E[13] - m.E[12] * m.E[5] * m.E[10] + m.E[12] * m.E[6] * m.E[ 9]; 453 result.E[ 1] = -m.E[1] * m.E[10] * m.E[15] + m.E[1] * m.E[11] * m.E[14] + m.E[9] * m.E[2] * m.E[15] - m.E[9] * m.E[3] * m.E[14] - m.E[13] * m.E[2] * m.E[11] + m.E[13] * m.E[3] * m.E[10]; 454 result.E[ 5] = m.E[0] * m.E[10] * m.E[15] - m.E[0] * m.E[11] * m.E[14] - m.E[8] * m.E[2] * m.E[15] + m.E[8] * m.E[3] * m.E[14] + m.E[12] * m.E[2] * m.E[11] - m.E[12] * m.E[3] * m.E[10]; 455 result.E[ 9] = -m.E[0] * m.E[ 9] * m.E[15] + m.E[0] * m.E[11] * m.E[13] + m.E[8] * m.E[1] * m.E[15] - m.E[8] * m.E[3] * m.E[13] - m.E[12] * m.E[1] * m.E[11] + m.E[12] * m.E[3] * m.E[ 9]; 456 result.E[13] = m.E[0] * m.E[ 9] * m.E[14] - m.E[0] * m.E[10] * m.E[13] - m.E[8] * m.E[1] * m.E[14] + m.E[8] * m.E[2] * m.E[13] + m.E[12] * m.E[1] * m.E[10] - m.E[12] * m.E[2] * m.E[ 9]; 457 result.E[ 2] = m.E[1] * m.E[ 6] * m.E[15] - m.E[1] * m.E[ 7] * m.E[14] - m.E[5] * m.E[2] * m.E[15] + m.E[5] * m.E[3] * m.E[14] + m.E[13] * m.E[2] * m.E[ 7] - m.E[13] * m.E[3] * m.E[ 6]; 458 result.E[ 6] = -m.E[0] * m.E[ 6] * m.E[15] + m.E[0] * m.E[ 7] * m.E[14] + m.E[4] * m.E[2] * m.E[15] - m.E[4] * m.E[3] * m.E[14] - m.E[12] * m.E[2] * m.E[ 7] + m.E[12] * m.E[3] * m.E[ 6]; 459 result.E[10] = m.E[0] * m.E[ 5] * m.E[15] - m.E[0] * m.E[ 7] * m.E[13] - m.E[4] * m.E[1] * m.E[15] + m.E[4] * m.E[3] * m.E[13] + m.E[12] * m.E[1] * m.E[ 7] - m.E[12] * m.E[3] * m.E[ 5]; 460 result.E[14] = -m.E[0] * m.E[ 5] * m.E[14] + m.E[0] * m.E[ 6] * m.E[13] + m.E[4] * m.E[1] * m.E[14] - m.E[4] * m.E[2] * m.E[13] - m.E[12] * m.E[1] * m.E[ 6] + m.E[12] * m.E[2] * m.E[ 5]; 461 result.E[ 3] = -m.E[1] * m.E[ 6] * m.E[11] + m.E[1] * m.E[ 7] * m.E[10] + m.E[5] * m.E[2] * m.E[11] - m.E[5] * m.E[3] * m.E[10] - m.E[ 9] * m.E[2] * m.E[ 7] + m.E[ 9] * m.E[3] * m.E[ 6]; 462 result.E[ 7] = m.E[0] * m.E[ 6] * m.E[11] - m.E[0] * m.E[ 7] * m.E[10] - m.E[4] * m.E[2] * m.E[11] + m.E[4] * m.E[3] * m.E[10] + m.E[ 8] * m.E[2] * m.E[ 7] - m.E[ 8] * m.E[3] * m.E[ 6]; 463 result.E[11] = -m.E[0] * m.E[ 5] * m.E[11] + m.E[0] * m.E[ 7] * m.E[ 9] + m.E[4] * m.E[1] * m.E[11] - m.E[4] * m.E[3] * m.E[ 9] - m.E[ 8] * m.E[1] * m.E[ 7] + m.E[ 8] * m.E[3] * m.E[ 5]; 464 result.E[15] = m.E[0] * m.E[ 5] * m.E[10] - m.E[0] * m.E[ 6] * m.E[ 9] - m.E[4] * m.E[1] * m.E[10] + m.E[4] * m.E[2] * m.E[ 9] + m.E[ 8] * m.E[1] * m.E[ 6] - m.E[ 8] * m.E[2] * m.E[ 5]; 465 466 f32 determinant = m.E[0] * result.E[0] + m.E[1] * result.E[4] + m.E[2] * result.E[8] + m.E[3] * result.E[12]; 467 determinant = 1.0f / determinant; 468 for(i32 i = 0; i < 16; i++) 469 result.E[i] *= determinant; 470 return result; 471 } 472 473 function m4 474 m4_translation(v3 delta) 475 { 476 m4 result; 477 result.c[0] = (v4){{1, 0, 0, 0}}; 478 result.c[1] = (v4){{0, 1, 0, 0}}; 479 result.c[2] = (v4){{0, 0, 1, 0}}; 480 result.c[3] = (v4){{delta.x, delta.y, delta.z, 1}}; 481 return result; 482 } 483 484 function m4 485 m4_scale(v3 scale) 486 { 487 m4 result; 488 result.c[0] = (v4){{scale.x, 0, 0, 0}}; 489 result.c[1] = (v4){{0, scale.y, 0, 0}}; 490 result.c[2] = (v4){{0, 0, scale.z, 0}}; 491 result.c[3] = (v4){{0, 0, 0, 1}}; 492 return result; 493 } 494 495 function m4 496 m4_rotation_about_axis(v3 axis, f32 turns) 497 { 498 assert(f32_equal(v3_magnitude_squared(axis), 1.0f)); 499 f32 sa = sin_f32(turns * 2 * PI); 500 f32 ca = cos_f32(turns * 2 * PI); 501 f32 mca = 1.0f - ca; 502 503 f32 x = axis.x, x2 = x * x; 504 f32 y = axis.y, y2 = y * y; 505 f32 z = axis.z, z2 = z * z; 506 507 m4 result; 508 result.c[0] = (v4){{ca + mca * x2, mca * x * y - sa * z, mca * x * z + sa * y, 0}}; 509 result.c[1] = (v4){{mca * x * y + sa * z, ca + mca * y2, mca * y * z - sa * x, 0}}; 510 result.c[2] = (v4){{mca * x * z - sa * y, mca * y * z + sa * x, ca + mca * z2, 0}}; 511 result.c[3] = (v4){{0, 0, 0, 1}}; 512 return result; 513 } 514 515 function m4 516 m4_rotation_about_y(f32 turns) 517 { 518 m4 result = m4_rotation_about_axis((v3){.y = 1.0f}, turns); 519 return result; 520 } 521 522 function m4 523 y_aligned_volume_transform(v3 extent, v3 translation, f32 rotation_turns) 524 { 525 m4 T = m4_translation(translation); 526 m4 R = m4_rotation_about_axis((v3){.y = 1.0f}, rotation_turns); 527 m4 S = m4_scale(extent); 528 m4 result = m4_mul(T, m4_mul(R, S)); 529 return result; 530 } 531 532 function v4 533 m4_mul_v4(m4 a, v4 v) 534 { 535 v4 result; 536 result.x = v4_dot(m4_row(a, 0), v); 537 result.y = v4_dot(m4_row(a, 1), v); 538 result.z = v4_dot(m4_row(a, 2), v); 539 result.w = v4_dot(m4_row(a, 3), v); 540 return result; 541 } 542 543 function v3 544 m4_mul_v3(m4 a, v3 v) 545 { 546 v3 result = m4_mul_v4(a, (v4){{v.x, v.y, v.z, 1.0f}}).xyz; 547 return result; 548 } 549 550 function m4 551 orthographic_projection(f32 n, f32 f, f32 t, f32 r) 552 { 553 m4 result; 554 f32 a = -2 / (f - n); 555 f32 b = - (f + n) / (f - n); 556 result.c[0] = (v4){{1 / r, 0, 0, 0}}; 557 result.c[1] = (v4){{0, 1 / t, 0, 0}}; 558 result.c[2] = (v4){{0, 0, a, 0}}; 559 result.c[3] = (v4){{0, 0, b, 1}}; 560 return result; 561 } 562 563 function m4 564 perspective_projection(f32 n, f32 f, f32 fov, f32 aspect) 565 { 566 m4 result; 567 f32 t = tan_f32(fov / 2.0f); 568 f32 r = t * aspect; 569 f32 a = -(f + n) / (f - n); 570 f32 b = -2 * f * n / (f - n); 571 result.c[0] = (v4){{1 / r, 0, 0, 0}}; 572 result.c[1] = (v4){{0, 1 / t, 0, 0}}; 573 result.c[2] = (v4){{0, 0, a, -1}}; 574 result.c[3] = (v4){{0, 0, b, 0}}; 575 return result; 576 } 577 578 function m4 579 camera_look_at(v3 camera, v3 point) 580 { 581 v3 orthogonal = {{0, 1.0f, 0}}; 582 v3 normal = v3_normalize(v3_sub(camera, point)); 583 v3 right = cross(orthogonal, normal); 584 v3 up = cross(normal, right); 585 586 v3 translate; 587 camera = v3_sub((v3){0}, camera); 588 translate.x = v3_dot(camera, right); 589 translate.y = v3_dot(camera, up); 590 translate.z = v3_dot(camera, normal); 591 592 m4 result; 593 result.c[0] = (v4){{right.x, up.x, normal.x, 0}}; 594 result.c[1] = (v4){{right.y, up.y, normal.y, 0}}; 595 result.c[2] = (v4){{right.z, up.z, normal.z, 0}}; 596 result.c[3] = (v4){{translate.x, translate.y, translate.z, 1}}; 597 return result; 598 } 599 600 /* NOTE(rnp): adapted from "Essential Mathematics for Games and Interactive Applications" (Verth, Bishop) */ 601 function f32 602 obb_raycast(m4 obb_orientation, v3 obb_size, v3 obb_center, ray r) 603 { 604 v3 p = v3_sub(obb_center, r.origin); 605 v3 X = obb_orientation.c[0].xyz; 606 v3 Y = obb_orientation.c[1].xyz; 607 v3 Z = obb_orientation.c[2].xyz; 608 609 /* NOTE(rnp): projects direction vector onto OBB axis */ 610 v3 f; 611 f.x = v3_dot(X, r.direction); 612 f.y = v3_dot(Y, r.direction); 613 f.z = v3_dot(Z, r.direction); 614 615 /* NOTE(rnp): projects relative vector onto OBB axis */ 616 v3 e; 617 e.x = v3_dot(X, p); 618 e.y = v3_dot(Y, p); 619 e.z = v3_dot(Z, p); 620 621 f32 result = 0; 622 f32 t[6] = {0}; 623 for (i32 i = 0; i < 3; i++) { 624 if (f32_equal(f.E[i], 0)) { 625 if (-e.E[i] - obb_size.E[i] > 0 || -e.E[i] + obb_size.E[i] < 0) 626 result = -1.0f; 627 f.E[i] = F32_EPSILON; 628 } 629 t[i * 2 + 0] = (e.E[i] + obb_size.E[i]) / f.E[i]; 630 t[i * 2 + 1] = (e.E[i] - obb_size.E[i]) / f.E[i]; 631 } 632 633 if (result != -1) { 634 f32 tmin = MAX(MAX(MIN(t[0], t[1]), MIN(t[2], t[3])), MIN(t[4], t[5])); 635 f32 tmax = MIN(MIN(MAX(t[0], t[1]), MAX(t[2], t[3])), MAX(t[4], t[5])); 636 if (tmax >= 0 && tmin <= tmax) { 637 result = tmin > 0 ? tmin : tmax; 638 } else { 639 result = -1; 640 } 641 } 642 643 return result; 644 } 645 646 function f32 647 complex_filter_first_moment(v2 *filter, i32 length, f32 sampling_frequency) 648 { 649 f32 n = 0, d = 0; 650 for (i32 i = 0; i < length; i++) { 651 f32 t = v2_magnitude_squared(filter[i]); 652 n += (f32)i * t; 653 d += t; 654 } 655 f32 result = n / d / sampling_frequency; 656 return result; 657 } 658 659 function f32 660 real_filter_first_moment(f32 *filter, i32 length, f32 sampling_frequency) 661 { 662 f32 n = 0, d = 0; 663 for (i32 i = 0; i < length; i++) { 664 f32 t = filter[i] * filter[i]; 665 n += (f32)i * t; 666 d += t; 667 } 668 f32 result = n / d / sampling_frequency; 669 return result; 670 } 671 672 function f32 673 tukey_window(f32 t, f32 tapering) 674 { 675 f32 r = tapering; 676 f32 result = 1; 677 if (t < r / 2) result = 0.5f * (1 + cos_f32(2 * PI * (t - r / 2) / r)); 678 if (t >= 1 - r / 2) result = 0.5f * (1 + cos_f32(2 * PI * (t - 1 + r / 2) / r)); 679 return result; 680 } 681 682 /* NOTE(rnp): adapted from "Discrete Time Signal Processing" (Oppenheim) */ 683 function f32 * 684 kaiser_low_pass_filter(Arena *arena, f32 cutoff_frequency, f32 sampling_frequency, f32 beta, i32 length) 685 { 686 f32 *result = push_array(arena, f32, length); 687 f32 wc = 2 * PI * cutoff_frequency / sampling_frequency; 688 f32 a = (f32)length / 2.0f; 689 f32 pi_i0_b = PI * (f32)cephes_i0(beta); 690 691 for (i32 n = 0; n < length; n++) { 692 f32 t = (f32)n - a; 693 f32 impulse = !f32_equal(t, 0) ? sin_f32(wc * t) / t : wc; 694 t = t / a; 695 f32 window = (f32)cephes_i0(beta * sqrt_f32(1 - t * t)) / pi_i0_b; 696 result[n] = impulse * window; 697 } 698 699 return result; 700 } 701 702 function f32 * 703 rf_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency, 704 i32 length, b32 reverse) 705 { 706 f32 *result = push_array(arena, f32, length); 707 for (i32 i = 0; i < length; i++) { 708 i32 index = reverse? length - 1 - i : i; 709 f32 fc = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length); 710 f32 arg = 2 * PI * fc * (f32)i / sampling_frequency; 711 result[index] = sin_f32(arg) * tukey_window((f32)i / (f32)length, 0.2f); 712 } 713 return result; 714 } 715 716 function v2 * 717 baseband_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency, 718 i32 length, b32 reverse, f32 scale) 719 { 720 v2 *result = push_array(arena, v2, length); 721 f32 conjugate = reverse ? -1 : 1; 722 for (i32 i = 0; i < length; i++) { 723 i32 index = reverse? length - 1 - i : i; 724 f32 fc = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length); 725 f32 arg = 2 * PI * fc * (f32)i / sampling_frequency; 726 v2 sample = {{scale * cos_f32(arg), conjugate * scale * sin_f32(arg)}}; 727 result[index] = v2_scale(sample, tukey_window((f32)i / (f32)length, 0.2f)); 728 } 729 return result; 730 } 731 732 function iv3 733 das_output_dimension(iv3 points) 734 { 735 iv3 result; 736 result.x = Max(points.x, 1); 737 result.y = Max(points.y, 1); 738 result.z = Max(points.z, 1); 739 740 switch (iv3_dimension(result)) { 741 case 1:{ 742 if (result.y > 1) result.x = result.y; 743 if (result.z > 1) result.x = result.z; 744 result.y = result.z = 1; 745 }break; 746 747 case 2:{ 748 if (result.x > 1) { 749 if (result.z > 1) result.y = result.z; 750 } else { 751 result.x = result.z; 752 } 753 result.z = 1; 754 }break; 755 756 case 3:{}break; 757 758 InvalidDefaultCase; 759 } 760 761 return result; 762 } 763 764 function m4 765 das_transform_1d(v3 p1, v3 p2) 766 { 767 v3 extent = v3_sub(p2, p1); 768 m4 result = { 769 .c[0] = (v4){{extent.x, extent.y, extent.z, 0.0f}}, 770 .c[1] = (v4){{0.0f, 0.0f, 0.0f, 0.0f}}, 771 .c[2] = (v4){{0.0f, 0.0f, 0.0f, 0.0f}}, 772 .c[3] = (v4){{p1.x, p1.y, p1.z, 1.0f}}, 773 }; 774 return result; 775 } 776 777 function m4 778 das_transform_2d_with_normal(v3 normal, v2 min_coordinate, v2 max_coordinate, f32 offset) 779 { 780 v3 U = {{0, 1.0f, 0}}; 781 if (f32_equal(v3_dot(U, normal), 1.0f)) 782 U = (v3){{1.0f, 0, 0}}; 783 784 v3 N = normal; 785 v3 V = cross(U, N); 786 787 v3 min = v3_add(v3_scale(U, min_coordinate.x), v3_scale(V, min_coordinate.y)); 788 v3 max = v3_add(v3_scale(U, max_coordinate.x), v3_scale(V, max_coordinate.y)); 789 790 v3 extent = v3_sub(max, min); 791 U = v3_scale(U, v3_dot(U, extent)); 792 V = v3_scale(V, v3_dot(V, extent)); 793 794 v3 t = v3_add(v3_scale(N, offset), min); 795 796 m4 result; 797 result.c[0] = (v4){{U.x, U.y, U.z, 0.0f}}; 798 result.c[1] = (v4){{V.x, V.y, V.z, 0.0f}}; 799 result.c[2] = (v4){{N.x, N.y, N.z, 0.0f}}; 800 result.c[3] = (v4){{t.x, t.y, t.z, 1.0f}}; 801 802 return result; 803 } 804 805 function m4 806 das_transform_2d_xz(v2 min_coordinate, v2 max_coordinate, f32 y_off) 807 { 808 m4 result = das_transform_2d_with_normal((v3){.y = 1.0f}, min_coordinate, max_coordinate, y_off); 809 return result; 810 } 811 812 function m4 813 das_transform_2d_yz(v2 min_coordinate, v2 max_coordinate, f32 x_off) 814 { 815 m4 result = das_transform_2d_with_normal((v3){.x = 1.0f}, min_coordinate, max_coordinate, x_off); 816 return result; 817 } 818 819 function m4 820 das_transform_2d_xy(v2 min_coordinate, v2 max_coordinate, f32 z_off) 821 { 822 m4 result = das_transform_2d_with_normal((v3){.z = 1.0f}, min_coordinate, max_coordinate, z_off); 823 return result; 824 } 825 826 function m4 827 das_transform_3d(v3 min_coordinate, v3 max_coordinate) 828 { 829 v3 extent = v3_sub(max_coordinate, min_coordinate); 830 m4 result; 831 result.c[0] = (v4){{extent.x, 0.0f, 0.0f, 0.0f}}; 832 result.c[1] = (v4){{0.0f, extent.y, 0.0f, 0.0f}}; 833 result.c[2] = (v4){{0.0f, 0.0f, extent.z, 0.0f}}; 834 result.c[3] = (v4){{min_coordinate.x, min_coordinate.y, min_coordinate.z, 1.0f}}; 835 return result; 836 } 837 838 function m4 839 das_transform(v3 min_coordinate, v3 max_coordinate, iv3 *points) 840 { 841 m4 result; 842 843 *points = das_output_dimension(*points); 844 845 switch (iv3_dimension(*points)) { 846 case 1:{result = das_transform_1d( min_coordinate, max_coordinate); }break; 847 case 2:{result = das_transform_2d_xz(XY(min_coordinate), XY(max_coordinate), 0);}break; 848 case 3:{result = das_transform_3d( min_coordinate, max_coordinate); }break; 849 } 850 851 return result; 852 } 853 854 function v2 855 plane_uv(v3 point, v3 U, v3 V) 856 { 857 v2 result; 858 result.x = v3_dot(U, point) / v3_dot(U, U); 859 result.y = v3_dot(V, point) / v3_dot(V, V); 860 return result; 861 } 862 863 function v4 864 hsv_to_rgb(v4 hsv) 865 { 866 /* f(k(n)) = V - V*S*max(0, min(k, min(4 - k, 1))) 867 * k(n) = fmod((n + H * 6), 6) 868 * (R, G, B) = (f(n = 5), f(n = 3), f(n = 1)) 869 */ 870 alignas(16) f32 nval[4] = {5.0f, 3.0f, 1.0f, 0.0f}; 871 f32x4 n = load_f32x4(nval); 872 f32x4 H = dup_f32x4(hsv.x); 873 f32x4 S = dup_f32x4(hsv.y); 874 f32x4 V = dup_f32x4(hsv.z); 875 f32x4 six = dup_f32x4(6); 876 877 f32x4 t = add_f32x4(n, mul_f32x4(six, H)); 878 f32x4 rem = floor_f32x4(div_f32x4(t, six)); 879 f32x4 k = sub_f32x4(t, mul_f32x4(rem, six)); 880 881 t = min_f32x4(sub_f32x4(dup_f32x4(4), k), dup_f32x4(1)); 882 t = max_f32x4(dup_f32x4(0), min_f32x4(k, t)); 883 t = mul_f32x4(t, mul_f32x4(S, V)); 884 885 v4 rgba; 886 store_f32x4(rgba.E, sub_f32x4(V, t)); 887 rgba.a = hsv.a; 888 return rgba; 889 } 890 891 function f32 892 ease_in_out_cubic(f32 t) 893 { 894 f32 result; 895 if (t < 0.5f) { 896 result = 4.0f * t * t * t; 897 } else { 898 t = -2.0f * t + 2.0f; 899 result = 1.0f - t * t * t / 2.0f; 900 } 901 return result; 902 } 903 904 function f32 905 ease_in_out_quartic(f32 t) 906 { 907 f32 result; 908 if (t < 0.5f) { 909 result = 8.0f * t * t * t * t; 910 } else { 911 t = -2.0f * t + 2.0f; 912 result = 1.0f - t * t * t * t / 2.0f; 913 } 914 return result; 915 }