11#include <promeki/config.h>
12#if PROMEKI_ENABLE_CORE
15#if defined(PROMEKI_HAS_SSE2)
17#elif defined(PROMEKI_HAS_NEON)
22PROMEKI_NAMESPACE_BEGIN
42 static constexpr float IdentityMatrix[3][3] = {
43 {1.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f}, {0.0f, 0.0f, 1.0f}};
52 static Matrix3x3 scalingMatrix(
float scale_x,
float scale_y,
float scale_z) {
54 result.data[0][0] = scale_x;
55 result.data[1][1] = scale_y;
56 result.data[2][2] = scale_z;
66 static Matrix3x3 rotationMatrix(
float angle,
char axis) {
68 float cos_angle = std::cos(angle);
69 float sin_angle = std::sin(angle);
72 result.data[0][0] = 1.0f;
73 result.data[1][1] = cos_angle;
74 result.data[1][2] = -sin_angle;
75 result.data[2][1] = sin_angle;
76 result.data[2][2] = cos_angle;
79 result.data[0][0] = cos_angle;
80 result.data[0][2] = sin_angle;
81 result.data[1][1] = 1.0f;
82 result.data[2][0] = -sin_angle;
83 result.data[2][2] = cos_angle;
86 result.data[0][0] = cos_angle;
87 result.data[0][1] = -sin_angle;
88 result.data[1][0] = sin_angle;
89 result.data[1][1] = cos_angle;
90 result.data[2][2] = 1.0f;
94 result.data[0][0] = 1.0f;
95 result.data[1][1] = 1.0f;
96 result.data[2][2] = 1.0f;
103 Matrix3x3() { zero(); }
109 Matrix3x3(
float val[3][3]) { set(val); }
116 Matrix3x3 operator+(
const Matrix3x3 &other)
const {
118 for (
int i = 0; i < 3; ++i) {
119#if defined(PROMEKI_HAS_SSE2)
120 __m128 row = _mm_loadu_ps(data[i]);
121 __m128 other_row = _mm_loadu_ps(other.data[i]);
122 _mm_storeu_ps(result.data[i], _mm_add_ps(row, other_row));
123#elif defined(PROMEKI_HAS_NEON)
124 float32x4_t row = vld1q_f32(data[i]);
125 float32x4_t other_row = vld1q_f32(other.data[i]);
126 vst1q_f32(result.data[i], vaddq_f32(row, other_row));
128 for (
int j = 0; j < 4; ++j) {
129 result.data[i][j] = data[i][j] + other.data[i][j];
141 Matrix3x3 operator-(
const Matrix3x3 &other)
const {
143 for (
int i = 0; i < 3; ++i) {
144#if defined(PROMEKI_HAS_SSE2)
145 __m128 row = _mm_loadu_ps(data[i]);
146 __m128 other_row = _mm_loadu_ps(other.data[i]);
147 _mm_storeu_ps(result.data[i], _mm_sub_ps(row, other_row));
148#elif defined(PROMEKI_HAS_NEON)
149 float32x4_t row = vld1q_f32(data[i]);
150 float32x4_t other_row = vld1q_f32(other.data[i]);
151 vst1q_f32(result.data[i], vsubq_f32(row, other_row));
153 for (
int j = 0; j < 4; ++j) {
154 result.data[i][j] = data[i][j] - other.data[i][j];
166 Matrix3x3 operator*(
const Matrix3x3 &other)
const {
168 for (
int i = 0; i < 3; ++i) {
169 for (
int j = 0; j < 3; ++j) {
171 for (
int k = 0; k < 3; ++k) {
172 sum += data[i][k] * other.data[k][j];
174 result.data[i][j] = sum;
186 float dot(
int row1,
int row2)
const {
189#if defined(PROMEKI_HAS_SSE3)
190 __m128 vec1 = _mm_loadu_ps(data[row1]);
191 __m128 vec2 = _mm_loadu_ps(data[row2]);
192 __m128 mul = _mm_mul_ps(vec1, vec2);
193 mul = _mm_hadd_ps(mul, mul);
194 mul = _mm_hadd_ps(mul, mul);
195 return _mm_cvtss_f32(mul);
196#elif defined(PROMEKI_HAS_NEON) && defined(PROMEKI_ARCH_AARCH64)
197 float32x4_t vec1 = vld1q_f32(data[row1]);
198 float32x4_t vec2 = vld1q_f32(data[row2]);
199 return vaddvq_f32(vmulq_f32(vec1, vec2));
201 return data[row1][0] * data[row2][0] +
202 data[row1][1] * data[row2][1] +
203 data[row1][2] * data[row2][2];
209 for (
int i = 0; i < 3; ++i) {
210 for (
int j = 0; j < 4; ++j) {
221 void set(
float val[3][3]) {
222 for (
int i = 0; i < 3; ++i) {
223 for (
int j = 0; j < 3; ++j) {
224 data[i][j] = val[i][j];
235 Matrix3x3 transpose()
const {
237 for (
int i = 0; i < 3; ++i) {
238 for (
int j = 0; j < 3; ++j) {
239 result.data[i][j] = data[j][i];
249 float determinant()
const {
250 return data[0][0] * (data[1][1] * data[2][2] - data[1][2] * data[2][1]) -
251 data[0][1] * (data[1][0] * data[2][2] - data[1][2] * data[2][0]) +
252 data[0][2] * (data[1][0] * data[2][1] - data[1][1] * data[2][0]);
259 Matrix3x3 inverse()
const {
261 float det = determinant();
266 float inv_det = 1.0f / det;
267 result.data[0][0] = (data[1][1] * data[2][2] - data[1][2] * data[2][1]) * inv_det;
268 result.data[0][1] = (data[0][2] * data[2][1] - data[0][1] * data[2][2]) * inv_det;
269 result.data[0][2] = (data[0][1] * data[1][2] - data[0][2] * data[1][1]) * inv_det;
270 result.data[1][0] = (data[1][2] * data[2][0] - data[1][0] * data[2][2]) * inv_det;
271 result.data[1][1] = (data[0][0] * data[2][2] - data[0][2] * data[2][0]) * inv_det;
272 result.data[1][2] = (data[0][2] * data[1][0] - data[0][0] * data[1][2]) * inv_det;
273 result.data[2][0] = (data[1][0] * data[2][1] - data[1][1] * data[2][0]) * inv_det;
274 result.data[2][1] = (data[0][1] * data[2][0] - data[0][0] * data[2][1]) * inv_det;
275 result.data[2][2] = (data[0][0] * data[1][1] - data[0][1] * data[1][0]) * inv_det;
283 float trace()
const {
return data[0][0] + data[1][1] + data[2][2]; }
290 Matrix3x3 operator*(
float scalar)
const {
292 for (
int i = 0; i < 3; ++i) {
293 for (
int j = 0; j < 3; ++j) {
294 result.data[i][j] = data[i][j] * scalar;
305 Matrix3x3 operator/(
float scalar)
const {
307 if (scalar != 0.0f) {
308 float inv_scalar = 1.0f / scalar;
309 for (
int i = 0; i < 3; ++i) {
310 for (
int j = 0; j < 3; ++j) {
311 result.data[i][j] = data[i][j] * inv_scalar;
319 bool operator==(
const Matrix3x3 &other)
const {
320 for (
int i = 0; i < 3; ++i) {
321 for (
int j = 0; j < 3; ++j) {
322 if (data[i][j] != other.data[i][j]) {
331 bool operator!=(
const Matrix3x3 &other)
const {
return !(*
this == other); }
340 float get(
int row,
int col)
const {
341 if (row >= 0 && row < 3 && col >= 0 && col < 3) {
342 return data[row][col];
353 void set(
int row,
int col,
float value) {
354 if (row >= 0 && row < 3 && col >= 0 && col < 3) {
355 data[row][col] = value;
364 Matrix3x3 elementMultiply(
const Matrix3x3 &other)
const {
366 for (
int i = 0; i < 3; ++i) {
367#if defined(PROMEKI_HAS_SSE2)
368 __m128 row = _mm_loadu_ps(data[i]);
369 __m128 other_row = _mm_loadu_ps(other.data[i]);
370 _mm_storeu_ps(result.data[i], _mm_mul_ps(row, other_row));
371#elif defined(PROMEKI_HAS_NEON)
372 float32x4_t row = vld1q_f32(data[i]);
373 float32x4_t other_row = vld1q_f32(other.data[i]);
374 vst1q_f32(result.data[i], vmulq_f32(row, other_row));
376 for (
int j = 0; j < 3; ++j) {
377 result.data[i][j] = data[i][j] * other.data[i][j];
392 Matrix3x3 elementDivide(
const Matrix3x3 &other)
const {
394 for (
int i = 0; i < 3; ++i) {
395#if defined(PROMEKI_HAS_SSE2)
396 __m128 row = _mm_loadu_ps(data[i]);
397 __m128 other_row = _mm_loadu_ps(other.data[i]);
400 __m128 mask = _mm_cmpneq_ps(other_row, _mm_set1_ps(0.0f));
401 __m128 div = _mm_and_ps(_mm_div_ps(row, other_row), mask);
402 _mm_storeu_ps(result.data[i], div);
403#elif defined(PROMEKI_HAS_NEON) && defined(PROMEKI_ARCH_AARCH64)
404 float32x4_t row = vld1q_f32(data[i]);
405 float32x4_t other_row = vld1q_f32(other.data[i]);
409 uint32x4_t zero_mask = vceqq_f32(other_row, vdupq_n_f32(0.0f));
410 float32x4_t div = vdivq_f32(row, other_row);
411 uint32x4_t bits = vbicq_u32(vreinterpretq_u32_f32(div), zero_mask);
412 vst1q_f32(result.data[i], vreinterpretq_f32_u32(bits));
414 for (
int j = 0; j < 3; ++j) {
415 result.data[i][j] = other.data[i][j] != 0.0f
416 ? data[i][j] / other.data[i][j]
428 void vectorTransform(
float vector[3])
const {
430 result[0] = data[0][0] * vector[0] + data[0][1] * vector[1] + data[0][2] * vector[2];
431 result[1] = data[1][0] * vector[0] + data[1][1] * vector[1] + data[1][2] * vector[2];
432 result[2] = data[2][0] * vector[0] + data[2][1] * vector[1] + data[2][2] * vector[2];
433 vector[0] = result[0];
434 vector[1] = result[1];
435 vector[2] = result[2];