libpromeki 1.0.0-alpha
PROfessional MEdia toolKIt
 
Loading...
Searching...
No Matches
matrix3x3.h
Go to the documentation of this file.
1
8#pragma once
9
10
11#include <promeki/config.h>
12#if PROMEKI_ENABLE_CORE
13#include <cmath>
14#include <promeki/platform.h>
15#if defined(PROMEKI_HAS_SSE2)
16#include <immintrin.h>
17#elif defined(PROMEKI_HAS_NEON)
18#include <arm_neon.h>
19#endif
20#include <promeki/namespace.h>
21
22PROMEKI_NAMESPACE_BEGIN
23
39class Matrix3x3 {
40 public:
42 static constexpr float IdentityMatrix[3][3] = {
43 {1.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f}, {0.0f, 0.0f, 1.0f}};
44
52 static Matrix3x3 scalingMatrix(float scale_x, float scale_y, float scale_z) {
53 Matrix3x3 result;
54 result.data[0][0] = scale_x;
55 result.data[1][1] = scale_y;
56 result.data[2][2] = scale_z;
57 return result;
58 }
59
66 static Matrix3x3 rotationMatrix(float angle, char axis) {
67 Matrix3x3 result;
68 float cos_angle = std::cos(angle);
69 float sin_angle = std::sin(angle);
70 switch (axis) {
71 case 'x':
72 result.data[0][0] = 1.0f;
73 result.data[1][1] = cos_angle;
74 result.data[1][2] = -sin_angle;
75 result.data[2][1] = sin_angle;
76 result.data[2][2] = cos_angle;
77 break;
78 case 'y':
79 result.data[0][0] = cos_angle;
80 result.data[0][2] = sin_angle;
81 result.data[1][1] = 1.0f;
82 result.data[2][0] = -sin_angle;
83 result.data[2][2] = cos_angle;
84 break;
85 case 'z':
86 result.data[0][0] = cos_angle;
87 result.data[0][1] = -sin_angle;
88 result.data[1][0] = sin_angle;
89 result.data[1][1] = cos_angle;
90 result.data[2][2] = 1.0f;
91 break;
92 default:
93 // Invalid axis, return identity matrix
94 result.data[0][0] = 1.0f;
95 result.data[1][1] = 1.0f;
96 result.data[2][2] = 1.0f;
97 break;
98 }
99 return result;
100 }
101
103 Matrix3x3() { zero(); }
104
109 Matrix3x3(float val[3][3]) { set(val); }
110
116 Matrix3x3 operator+(const Matrix3x3 &other) const {
117 Matrix3x3 result;
118 for (int i = 0; i < 3; ++i) {
119#if defined(PROMEKI_HAS_SSE2)
120 __m128 row = _mm_loadu_ps(data[i]);
121 __m128 other_row = _mm_loadu_ps(other.data[i]);
122 _mm_storeu_ps(result.data[i], _mm_add_ps(row, other_row));
123#elif defined(PROMEKI_HAS_NEON)
124 float32x4_t row = vld1q_f32(data[i]);
125 float32x4_t other_row = vld1q_f32(other.data[i]);
126 vst1q_f32(result.data[i], vaddq_f32(row, other_row));
127#else
128 for (int j = 0; j < 4; ++j) {
129 result.data[i][j] = data[i][j] + other.data[i][j];
130 }
131#endif
132 }
133 return result;
134 }
135
141 Matrix3x3 operator-(const Matrix3x3 &other) const {
142 Matrix3x3 result;
143 for (int i = 0; i < 3; ++i) {
144#if defined(PROMEKI_HAS_SSE2)
145 __m128 row = _mm_loadu_ps(data[i]);
146 __m128 other_row = _mm_loadu_ps(other.data[i]);
147 _mm_storeu_ps(result.data[i], _mm_sub_ps(row, other_row));
148#elif defined(PROMEKI_HAS_NEON)
149 float32x4_t row = vld1q_f32(data[i]);
150 float32x4_t other_row = vld1q_f32(other.data[i]);
151 vst1q_f32(result.data[i], vsubq_f32(row, other_row));
152#else
153 for (int j = 0; j < 4; ++j) {
154 result.data[i][j] = data[i][j] - other.data[i][j];
155 }
156#endif
157 }
158 return result;
159 }
160
166 Matrix3x3 operator*(const Matrix3x3 &other) const {
167 Matrix3x3 result;
168 for (int i = 0; i < 3; ++i) {
169 for (int j = 0; j < 3; ++j) {
170 float sum = 0.0f;
171 for (int k = 0; k < 3; ++k) {
172 sum += data[i][k] * other.data[k][j];
173 }
174 result.data[i][j] = sum;
175 }
176 }
177 return result;
178 }
179
186 float dot(int row1, int row2) const {
187 // Column 3 is always zero (class invariant), so a full
188 // 4-lane multiply contributes nothing extra to the sum.
189#if defined(PROMEKI_HAS_SSE3)
190 __m128 vec1 = _mm_loadu_ps(data[row1]);
191 __m128 vec2 = _mm_loadu_ps(data[row2]);
192 __m128 mul = _mm_mul_ps(vec1, vec2);
193 mul = _mm_hadd_ps(mul, mul);
194 mul = _mm_hadd_ps(mul, mul);
195 return _mm_cvtss_f32(mul);
196#elif defined(PROMEKI_HAS_NEON) && defined(PROMEKI_ARCH_AARCH64)
197 float32x4_t vec1 = vld1q_f32(data[row1]);
198 float32x4_t vec2 = vld1q_f32(data[row2]);
199 return vaddvq_f32(vmulq_f32(vec1, vec2));
200#else
201 return data[row1][0] * data[row2][0] +
202 data[row1][1] * data[row2][1] +
203 data[row1][2] * data[row2][2];
204#endif
205 }
206
208 void zero() {
209 for (int i = 0; i < 3; ++i) {
210 for (int j = 0; j < 4; ++j) {
211 data[i][j] = 0.0f;
212 }
213 }
214 return;
215 }
216
221 void set(float val[3][3]) {
222 for (int i = 0; i < 3; ++i) {
223 for (int j = 0; j < 3; ++j) {
224 data[i][j] = val[i][j];
225 }
226 data[i][3] = 0.0f;
227 }
228 return;
229 }
230
235 Matrix3x3 transpose() const {
236 Matrix3x3 result;
237 for (int i = 0; i < 3; ++i) {
238 for (int j = 0; j < 3; ++j) {
239 result.data[i][j] = data[j][i];
240 }
241 }
242 return result;
243 }
244
249 float determinant() const {
250 return data[0][0] * (data[1][1] * data[2][2] - data[1][2] * data[2][1]) -
251 data[0][1] * (data[1][0] * data[2][2] - data[1][2] * data[2][0]) +
252 data[0][2] * (data[1][0] * data[2][1] - data[1][1] * data[2][0]);
253 }
254
259 Matrix3x3 inverse() const {
260 Matrix3x3 result;
261 float det = determinant();
262 if (det == 0.0f) {
263 // Singular matrix, inverse does not exist
264 return result; // Return zero matrix
265 }
266 float inv_det = 1.0f / det;
267 result.data[0][0] = (data[1][1] * data[2][2] - data[1][2] * data[2][1]) * inv_det;
268 result.data[0][1] = (data[0][2] * data[2][1] - data[0][1] * data[2][2]) * inv_det;
269 result.data[0][2] = (data[0][1] * data[1][2] - data[0][2] * data[1][1]) * inv_det;
270 result.data[1][0] = (data[1][2] * data[2][0] - data[1][0] * data[2][2]) * inv_det;
271 result.data[1][1] = (data[0][0] * data[2][2] - data[0][2] * data[2][0]) * inv_det;
272 result.data[1][2] = (data[0][2] * data[1][0] - data[0][0] * data[1][2]) * inv_det;
273 result.data[2][0] = (data[1][0] * data[2][1] - data[1][1] * data[2][0]) * inv_det;
274 result.data[2][1] = (data[0][1] * data[2][0] - data[0][0] * data[2][1]) * inv_det;
275 result.data[2][2] = (data[0][0] * data[1][1] - data[0][1] * data[1][0]) * inv_det;
276 return result;
277 }
278
283 float trace() const { return data[0][0] + data[1][1] + data[2][2]; }
284
290 Matrix3x3 operator*(float scalar) const {
291 Matrix3x3 result;
292 for (int i = 0; i < 3; ++i) {
293 for (int j = 0; j < 3; ++j) {
294 result.data[i][j] = data[i][j] * scalar;
295 }
296 }
297 return result;
298 }
299
305 Matrix3x3 operator/(float scalar) const {
306 Matrix3x3 result;
307 if (scalar != 0.0f) {
308 float inv_scalar = 1.0f / scalar;
309 for (int i = 0; i < 3; ++i) {
310 for (int j = 0; j < 3; ++j) {
311 result.data[i][j] = data[i][j] * inv_scalar;
312 }
313 }
314 }
315 return result;
316 }
317
319 bool operator==(const Matrix3x3 &other) const {
320 for (int i = 0; i < 3; ++i) {
321 for (int j = 0; j < 3; ++j) {
322 if (data[i][j] != other.data[i][j]) {
323 return false;
324 }
325 }
326 }
327 return true;
328 }
329
331 bool operator!=(const Matrix3x3 &other) const { return !(*this == other); }
332
333
340 float get(int row, int col) const {
341 if (row >= 0 && row < 3 && col >= 0 && col < 3) {
342 return data[row][col];
343 }
344 return 0.0f; // Invalid row or column, return 0.0f
345 }
346
353 void set(int row, int col, float value) {
354 if (row >= 0 && row < 3 && col >= 0 && col < 3) {
355 data[row][col] = value;
356 }
357 }
358
364 Matrix3x3 elementMultiply(const Matrix3x3 &other) const {
365 Matrix3x3 result;
366 for (int i = 0; i < 3; ++i) {
367#if defined(PROMEKI_HAS_SSE2)
368 __m128 row = _mm_loadu_ps(data[i]);
369 __m128 other_row = _mm_loadu_ps(other.data[i]);
370 _mm_storeu_ps(result.data[i], _mm_mul_ps(row, other_row));
371#elif defined(PROMEKI_HAS_NEON)
372 float32x4_t row = vld1q_f32(data[i]);
373 float32x4_t other_row = vld1q_f32(other.data[i]);
374 vst1q_f32(result.data[i], vmulq_f32(row, other_row));
375#else
376 for (int j = 0; j < 3; ++j) {
377 result.data[i][j] = data[i][j] * other.data[i][j];
378 }
379#endif
380 }
381 return result;
382 }
383
392 Matrix3x3 elementDivide(const Matrix3x3 &other) const {
393 Matrix3x3 result;
394 for (int i = 0; i < 3; ++i) {
395#if defined(PROMEKI_HAS_SSE2)
396 __m128 row = _mm_loadu_ps(data[i]);
397 __m128 other_row = _mm_loadu_ps(other.data[i]);
398 // Mask out lanes where the divisor is zero so
399 // we return 0.0f for those rather than NaN/inf.
400 __m128 mask = _mm_cmpneq_ps(other_row, _mm_set1_ps(0.0f));
401 __m128 div = _mm_and_ps(_mm_div_ps(row, other_row), mask);
402 _mm_storeu_ps(result.data[i], div);
403#elif defined(PROMEKI_HAS_NEON) && defined(PROMEKI_ARCH_AARCH64)
404 float32x4_t row = vld1q_f32(data[i]);
405 float32x4_t other_row = vld1q_f32(other.data[i]);
406 // Mask out lanes where the divisor is zero
407 // before dividing, since on aarch64 a NaN-only
408 // result is still a real divide.
409 uint32x4_t zero_mask = vceqq_f32(other_row, vdupq_n_f32(0.0f));
410 float32x4_t div = vdivq_f32(row, other_row);
411 uint32x4_t bits = vbicq_u32(vreinterpretq_u32_f32(div), zero_mask);
412 vst1q_f32(result.data[i], vreinterpretq_f32_u32(bits));
413#else
414 for (int j = 0; j < 3; ++j) {
415 result.data[i][j] = other.data[i][j] != 0.0f
416 ? data[i][j] / other.data[i][j]
417 : 0.0f;
418 }
419#endif
420 }
421 return result;
422 }
423
428 void vectorTransform(float vector[3]) const {
429 float result[3];
430 result[0] = data[0][0] * vector[0] + data[0][1] * vector[1] + data[0][2] * vector[2];
431 result[1] = data[1][0] * vector[0] + data[1][1] * vector[1] + data[1][2] * vector[2];
432 result[2] = data[2][0] * vector[0] + data[2][1] * vector[1] + data[2][2] * vector[2];
433 vector[0] = result[0];
434 vector[1] = result[1];
435 vector[2] = result[2];
436 }
437
438 private:
439 // Padded to 4 columns so the 128-bit SIMD load/store paths
440 // never read or write past the row. Column 3 is always zero
441 // and the SIMD ops preserve that invariant.
442 float data[3][4];
443};
444
445
446PROMEKI_NAMESPACE_END
447
448#endif // PROMEKI_ENABLE_CORE