ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

das.glsl (12893B)


      1 /* See LICENSE for license details. */
      2 #if   DataKind == DataKind_Float32
      3   #define SAMPLE_TYPE           float
      4   #define TEXTURE_KIND          r32f
      5   #define RESULT_TYPE_CAST(a)   (a).x
      6   #define OUTPUT_TYPE_CAST(a)   vec4((a).x, 0, 0, 0)
      7   #if !Fast
      8     #define RESULT_TYPE         vec2
      9     #define RESULT_LAST_INDEX   1
     10   #endif
     11 #elif DataKind == DataKind_Float32Complex
     12   #define SAMPLE_TYPE           vec2
     13   #define TEXTURE_KIND          rg32f
     14   #define RESULT_TYPE_CAST(a)   (a).xy
     15   #define OUTPUT_TYPE_CAST(a)   vec4((a).xy, 0, 0)
     16   #if !Fast
     17     #define RESULT_TYPE         vec3
     18     #define RESULT_LAST_INDEX   2
     19   #endif
     20 #else
     21   #error DataKind unsupported for DAS
     22 #endif
     23 
     24 layout(std430, binding = 1) readonly restrict buffer buffer_1 {
     25 	SAMPLE_TYPE rf_data[];
     26 };
     27 
     28 #ifndef RESULT_TYPE
     29   #define RESULT_TYPE SAMPLE_TYPE
     30 #endif
     31 
     32 #if Fast
     33   #define RESULT_STORE(a, length_a) RESULT_TYPE(a)
     34 	layout(TEXTURE_KIND, binding = 0)           restrict uniform image3D  u_out_data_tex;
     35 #else
     36   #define RESULT_STORE(a, length_a) RESULT_TYPE(a, length_a)
     37 	layout(TEXTURE_KIND, binding = 0) writeonly restrict uniform image3D  u_out_data_tex;
     38 #endif
     39 
     40 layout(r16i,  binding = 1) readonly  restrict uniform iimage1D sparse_elements;
     41 layout(rg32f, binding = 2) readonly  restrict uniform image1D  focal_vectors;
     42 layout(r8i,   binding = 3) readonly  restrict uniform iimage1D transmit_receive_orientations;
     43 
     44 #define RX_ORIENTATION(tx_rx) (((tx_rx) >> 0) & 0x0F)
     45 #define TX_ORIENTATION(tx_rx) (((tx_rx) >> 4) & 0x0F)
     46 
     47 #define C_SPLINE 0.5
     48 
     49 #if DataKind == DataKind_Float32Complex
     50 vec2 rotate_iq(const vec2 iq, const float time)
     51 {
     52 	float arg    = radians(360) * DemodulationFrequency * time;
     53 	mat2  phasor = mat2( cos(arg), sin(arg),
     54 	                    -sin(arg), cos(arg));
     55 	vec2 result = phasor * iq;
     56 	return result;
     57 }
     58 #else
     59   #define rotate_iq(a, b) (a)
     60 #endif
     61 
     62 /* NOTE: See: https://cubic.org/docs/hermite.htm */
     63 SAMPLE_TYPE cubic(const int base_index, const float t)
     64 {
     65 	const mat4 h = mat4(
     66 		 2, -3,  0, 1,
     67 		-2,  3,  0, 0,
     68 		 1, -2,  1, 0,
     69 		 1, -1,  0, 0
     70 	);
     71 
     72 	SAMPLE_TYPE samples[4] = {
     73 		rf_data[base_index + 0],
     74 		rf_data[base_index + 1],
     75 		rf_data[base_index + 2],
     76 		rf_data[base_index + 3],
     77 	};
     78 
     79 	vec4        S  = vec4(t * t * t, t * t, t, 1);
     80 	SAMPLE_TYPE P1 = samples[1];
     81 	SAMPLE_TYPE P2 = samples[2];
     82 	SAMPLE_TYPE T1 = C_SPLINE * (P2 - samples[0]);
     83 	SAMPLE_TYPE T2 = C_SPLINE * (samples[3] - P1);
     84 
     85 #if   DataKind == DataKind_Float32
     86 	vec4 C = vec4(P1.x, P2.x, T1.x, T2.x);
     87 	float result = dot(S, h * C);
     88 #elif DataKind == DataKind_Float32Complex
     89 	mat2x4 C = mat2x4(vec4(P1.x, P2.x, T1.x, T2.x), vec4(P1.y, P2.y, T1.y, T2.y));
     90 	vec2 result = S * h * C;
     91 #endif
     92 	return result;
     93 }
     94 
     95 SAMPLE_TYPE sample_rf(const int rf_offset, const float index)
     96 {
     97 	SAMPLE_TYPE result = SAMPLE_TYPE(0);
     98 	switch (InterpolationMode) {
     99 	case InterpolationMode_Nearest:{
    100 		if (int(index) >= 0 && int(round(index)) < SampleCount)
    101 			result = rotate_iq(rf_data[rf_offset + int(round(index))], index / SamplingFrequency);
    102 	}break;
    103 	case InterpolationMode_Linear:{
    104 		if (int(index) >= 0 && int(index) < SampleCount - 1) {
    105 			float tk, t = modf(index, tk);
    106 			int n = rf_offset + int(tk);
    107 			result = (1 - t) * rf_data[n] + t * rf_data[n + 1];
    108 			result = rotate_iq(result, index / SamplingFrequency);
    109 		}
    110 	}break;
    111 	case InterpolationMode_Cubic:{
    112 		if (int(index) > 0 && int(index) < SampleCount - 2) {
    113 			float tk, t = modf(index, tk);
    114 			result = rotate_iq(cubic(rf_offset + int(index), t), index / SamplingFrequency);
    115 		}
    116 	}break;
    117 	}
    118 	return result;
    119 }
    120 
    121 float sample_index(const float distance)
    122 {
    123 	float  time = distance / SpeedOfSound + TimeOffset;
    124 	return time * SamplingFrequency;
    125 }
    126 
    127 float apodize(const float arg)
    128 {
    129 	/* IMPORTANT: do not move calculation of arg into this function. It will generate a
    130 	 * conditional move resulting in cos always being evaluated causing a slowdown */
    131 
    132 	/* NOTE: constant F# dynamic receive apodization. This is implemented as:
    133 	 *
    134 	 *                  /        |x_e - x_i|\
    135 	 *    a(x, z) = cos(F# * π * ----------- ) ^ 2
    136 	 *                  \        |z_e - z_i|/
    137 	 *
    138 	 * where x,z_e are transducer element positions and x,z_i are image positions. */
    139 	float a = cos(radians(180) * arg);
    140 	return a * a;
    141 }
    142 
    143 vec2 rca_plane_projection(const vec3 point, const bool rows)
    144 {
    145 	vec2 result = vec2(point[int(rows)], point[2]);
    146 	return result;
    147 }
    148 
    149 float plane_wave_transmit_distance(const vec3 point, const float transmit_angle, const bool tx_rows)
    150 {
    151 	return dot(rca_plane_projection(point, tx_rows), vec2(sin(transmit_angle), cos(transmit_angle)));
    152 }
    153 
    154 float cylindrical_wave_transmit_distance(const vec3 point, const float focal_depth,
    155                                          const float transmit_angle, const bool tx_rows)
    156 {
    157 	vec2 f = focal_depth * vec2(sin(transmit_angle), cos(transmit_angle));
    158 	return distance(rca_plane_projection(point, tx_rows), f);
    159 }
    160 
    161 int tx_rx_orientation_for_acquisition(const int acquisition)
    162 {
    163 	int result = bool(SingleOrientation) ? TransmitReceiveOrientation : imageLoad(transmit_receive_orientations, acquisition).x;
    164 	return result;
    165 }
    166 
    167 vec2 focal_vector_for_acquisition(const int acquisition)
    168 {
    169 	vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth) : imageLoad(focal_vectors, acquisition).xy;
    170 	return result;
    171 }
    172 
    173 float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const int transmit_receive_orientation)
    174 {
    175 	float result = 0;
    176 	if (TX_ORIENTATION(transmit_receive_orientation) != RCAOrientation_None) {
    177 		bool  tx_rows        = TX_ORIENTATION(transmit_receive_orientation) == RCAOrientation_Rows;
    178 		float transmit_angle = radians(focal_vector.x);
    179 		float focal_depth    = focal_vector.y;
    180 
    181 		if (isinf(focal_depth)) {
    182 			result = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows);
    183 		} else {
    184 			result = cylindrical_wave_transmit_distance(world_point, focal_depth, transmit_angle, tx_rows);
    185 		}
    186 	}
    187 	return result;
    188 }
    189 
    190 RESULT_TYPE RCA(const vec3 world_point)
    191 {
    192 	const int acquisition_start = bool(Fast)? u_channel     : 0;
    193 	const int acquisition_end   = bool(Fast)? u_channel + 1 : AcquisitionCount;
    194 	RESULT_TYPE result = RESULT_TYPE(0);
    195 	for (int acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) {
    196 		const int  tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition);
    197 		const bool rx_rows           = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Rows;
    198 		const vec2 focal_vector      = focal_vector_for_acquisition(acquisition);
    199 		vec2  xdc_world_point   = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
    200 		float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
    201 
    202 		int rf_offset  = acquisition * SampleCount;
    203 		rf_offset     -= int(InterpolationMode == InterpolationMode_Cubic);
    204 		for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++) {
    205 			vec3  rx_center      = vec3(rx_channel * xdc_element_pitch, 0);
    206 			vec2  receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows);
    207 			float a_arg          = abs(FNumber * receive_vector.x / abs(xdc_world_point.y));
    208 
    209 			if (a_arg < 0.5f) {
    210 				float       sidx  = sample_index(transmit_distance + length(receive_vector));
    211 				SAMPLE_TYPE value = apodize(a_arg) * sample_rf(rf_offset, sidx);
    212 				result += RESULT_STORE(value, length(value));
    213 			}
    214 			rf_offset += SampleCount * AcquisitionCount;
    215 		}
    216 	}
    217 	return result;
    218 }
    219 
    220 RESULT_TYPE HERCULES(const vec3 world_point)
    221 {
    222 	const int tx_rx_orientation  = tx_rx_orientation_for_acquisition(0);
    223 	const bool rx_cols           = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Columns;
    224 	const vec2 focal_vector      = focal_vector_for_acquisition(0);
    225 	const vec3 xdc_world_point   = (xdc_transform * vec4(world_point, 1)).xyz;
    226 
    227 	const float transmit_index   = sample_index(rca_transmit_distance(world_point, focal_vector, tx_rx_orientation));
    228 	const float z_delta_squared  = xdc_world_point.z * xdc_world_point.z;
    229 	const float f_number_over_z  = abs(FNumber / xdc_world_point.z);
    230 	const vec2  xy_world_point   = xdc_world_point.xy;
    231 	const float apodization_test = 0.25f / (f_number_over_z * f_number_over_z);
    232 
    233 	RESULT_TYPE result = RESULT_TYPE(0);
    234 	#if Fast
    235 	const int rx_channel = u_channel;
    236 	#else
    237 	for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++)
    238 	#endif
    239 	{
    240 		int rf_offset   = rx_channel * SampleCount * AcquisitionCount + Sparse * SampleCount;
    241 		rf_offset      -= int(InterpolationMode == InterpolationMode_Cubic);
    242 
    243 		// NOTE(rnp): this wouldn't be so messy if we just forced an orientation like with FORCES
    244 		vec2 element_receive_delta_squared = xy_world_point;
    245 		if (rx_cols) element_receive_delta_squared.x -= rx_channel * xdc_element_pitch.x;
    246 		else         element_receive_delta_squared.y -= rx_channel * xdc_element_pitch.y;
    247 
    248 		if (rx_cols) element_receive_delta_squared.x *= element_receive_delta_squared.x;
    249 		else         element_receive_delta_squared.y *= element_receive_delta_squared.y;
    250 
    251 		for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
    252 			int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit;
    253 
    254 			if (rx_cols) element_receive_delta_squared.y  = xy_world_point.y - tx_channel * xdc_element_pitch.y;
    255 			else         element_receive_delta_squared.x  = xy_world_point.x - tx_channel * xdc_element_pitch.x;
    256 
    257 			if (rx_cols) element_receive_delta_squared.y *= element_receive_delta_squared.y;
    258 			else         element_receive_delta_squared.x *= element_receive_delta_squared.x;
    259 
    260 			float element_delta_squared = element_receive_delta_squared.x + element_receive_delta_squared.y;
    261 			if (element_delta_squared < apodization_test) {
    262 				/* NOTE: tribal knowledge */
    263 				float apodization = transmit == 0 ? inversesqrt(float(AcquisitionCount)) : 1.0f;
    264 				apodization *= apodize(f_number_over_z * sqrt(element_delta_squared));
    265 
    266 				float index = transmit_index + sqrt(z_delta_squared + element_delta_squared) * SamplingFrequency / SpeedOfSound;
    267 				SAMPLE_TYPE value = apodization * sample_rf(rf_offset, index);
    268 				result += RESULT_STORE(value, length(value));
    269 			}
    270 
    271 			rf_offset += SampleCount;
    272 		}
    273 	}
    274 	return result;
    275 }
    276 
    277 RESULT_TYPE FORCES(const vec3 xdc_world_point)
    278 {
    279 	const int rx_channel_start = bool(Fast)? u_channel     : 0;
    280 	const int rx_channel_end   = bool(Fast)? u_channel + 1 : ChannelCount;
    281 
    282 	RESULT_TYPE result = RESULT_TYPE(0);
    283 
    284 	float z_delta_squared     = xdc_world_point.z * xdc_world_point.z;
    285 	float transmit_y_delta    = xdc_world_point.y - xdc_element_pitch.y * ChannelCount / 2;
    286 	float transmit_yz_squared = transmit_y_delta * transmit_y_delta + z_delta_squared;
    287 
    288 	for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) {
    289 		float receive_x_delta = xdc_world_point.x - rx_channel * xdc_element_pitch.x;
    290 		float a_arg           = abs(FNumber * receive_x_delta / xdc_world_point.z);
    291 
    292 		if (a_arg < 0.5f) {
    293 			int rf_offset  = rx_channel * SampleCount * AcquisitionCount + Sparse * SampleCount;
    294 			rf_offset     -= int(InterpolationMode == InterpolationMode_Cubic);
    295 
    296 			float receive_index = sample_index(sqrt(receive_x_delta * receive_x_delta + z_delta_squared));
    297 			float apodization   = apodize(a_arg);
    298 			for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
    299 				int   tx_channel       = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit;
    300 				float transmit_x_delta = xdc_world_point.x - xdc_element_pitch.x * tx_channel;
    301 				float transmit_index   = sqrt(transmit_yz_squared + transmit_x_delta * transmit_x_delta) * SamplingFrequency / SpeedOfSound;
    302 
    303 				SAMPLE_TYPE value = apodization * sample_rf(rf_offset, receive_index + transmit_index);
    304 				result    += RESULT_STORE(value, length(value));
    305 				rf_offset += SampleCount;
    306 			}
    307 		}
    308 	}
    309 	return result;
    310 }
    311 
    312 void main()
    313 {
    314 	ivec3 out_voxel    = ivec3(gl_GlobalInvocationID);
    315 	vec3  image_points = vec3(imageSize(u_out_data_tex)) - 1.0f;
    316 	if (!all(lessThan(out_voxel, imageSize(u_out_data_tex))))
    317 		return;
    318 
    319 	vec3 point       = vec3(out_voxel) / max(vec3(1.0f), image_points);
    320 	vec3 world_point = (voxel_transform * vec4(point, 1)).xyz;
    321 
    322 	RESULT_TYPE sum;
    323 	switch (AcquisitionKind) {
    324 	case AcquisitionKind_FORCES:
    325 	case AcquisitionKind_UFORCES:
    326 	{
    327 		sum = FORCES(world_point);
    328 	}break;
    329 	case AcquisitionKind_HERCULES:
    330 	case AcquisitionKind_UHERCULES:
    331 	case AcquisitionKind_HERO_PA:
    332 	{
    333 		sum = HERCULES(world_point);
    334 	}break;
    335 	case AcquisitionKind_Flash:
    336 	case AcquisitionKind_RCA_TPW:
    337 	case AcquisitionKind_RCA_VLS:
    338 	{
    339 		sum = RCA(world_point);
    340 	}break;
    341 	}
    342 
    343 	#if Fast
    344 		sum += RESULT_TYPE_CAST(imageLoad(u_out_data_tex, out_voxel));
    345 	#endif
    346 
    347 	#if CoherencyWeighting
    348 		/* TODO(rnp): scale such that brightness remains ~constant */
    349 		float denominator = sum[RESULT_LAST_INDEX] + float(sum[RESULT_LAST_INDEX] == 0);
    350 		RESULT_TYPE_CAST(sum) *= RESULT_TYPE_CAST(sum) / denominator;
    351 	#endif
    352 
    353 	imageStore(u_out_data_tex, out_voxel, OUTPUT_TYPE_CAST(sum));
    354 }