commit 10633fb7052c199286994a5580317e064b582c21
parent 54f81882da338cfe10b71aa924dd0df1d73741a2
Author: Egor Achkasov <eaachkasov@edu.hse.ru>
Date: Fri, 10 Jan 2025 11:55:09 +0100
Init x86 non-vector mmse (no nosqrt yet)
Diffstat:
| M | include/mmserv.h | | | 117 | +++++++++++++++++++++++++++++--------------------------------------------------- |
| M | main.c | | | 57 | ++++++++++++++++++++++++++++++++++++++++++++++++--------- |
| M | src/mmserv.c | | | 700 | ++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------- |
3 files changed, 538 insertions(+), 336 deletions(-)
diff --git a/include/mmserv.h b/include/mmserv.h
@@ -3,17 +3,16 @@
#include "define.h"
-#include <stdint.h> // for int16_t, int32_t
-#include <stddef.h> // for size_t
+#include <stddef.h> /* for size_t */
/*
* Typedefs
*/
typedef struct {
- data_t re;
- data_t im;
-} complex;
+ data_t* re;
+ data_t* im;
+} vcomplex;
/** Calculate MMSE estimation of x in y = H*x + n
* \param H matrix of channel coefficients. Shape [NUM_RX_ANT][NUM_TX_ANT][NUM_SC]
@@ -22,10 +21,10 @@ typedef struct {
* \param x_MMSE output MMSE estimation of x. Shape [NUM_TX_ANT][NUM_SC]
*/
void mmse(
- IN complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex y[NUM_RX_ANT][NUM_SC],
- IN complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex x_MMSE[NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *H,
+ IN vcomplex *y,
+ IN vcomplex *R,
+ OUT vcomplex *x_MMSE);
/** Calculate MMSE estimation of x in y = H*x + n. Uses a sqrt-free Cholesky decomposition.
* \param H matrix of channel coefficients. Shape [NUM_RX_ANT][NUM_TX_ANT][NUM_SC]
@@ -34,42 +33,10 @@ void mmse(
* \param x_MMSE output MMSE estimation of x. Shape [NUM_TX_ANT][NUM_SC]
*/
void mmse_nosqrt(
- IN complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex y[NUM_RX_ANT][NUM_SC],
- IN complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex x_MMSE[NUM_TX_ANT][NUM_SC]);
-
-/*
- * Complex operations
- */
-
-/** Create a complex number from real and imaginary parts */
-complex cmake(IN data_t re, IN data_t im);
-
-/** Complex multiplication */
-complex cmul(IN complex a, IN complex b);
-
-/** Complex absolute value squared */
-data_t cabs2(IN complex a);
-
-/** Complex addition */
-complex cadd(IN complex a, IN complex b);
-void cadd_acc(IN complex a, IN complex b);
-
-/** Complex subtraction */
-complex csub(IN complex a, IN complex b);
-
-/** Complex conjugate */
-complex cconj(IN complex a);
-
-/** Complex division */
-complex cdiv(IN complex a, IN complex b);
-
-/** Square root of a natural number */
-data_t sqrt(IN data_t x);
-
-/** Complex root */
-complex csqrt(IN complex a);
+ IN vcomplex *H,
+ IN vcomplex *y,
+ IN vcomplex *R,
+ OUT vcomplex *x_MMSE);
/*
* Complex matrix operations
@@ -80,16 +47,16 @@ complex csqrt(IN complex a);
* \param AH output matrix. Shape [NUM_TX_ANT][NUM_RX_ANT][NUM_SC]
*/
void cmat_hermitian_transpose_RxTx(
- IN complex A[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex AH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ OUT vcomplex *AH);
/** Calculate the Hermitian transpose of a matrix A
* \param A input matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
* \param AH output matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
*/
void cmat_hermitian_transpose_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex AH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ OUT vcomplex *AH);
/** Multiply two matrices A and B
* \param A input matrix. Shape [NUM_TX_ANT][NUM_RX_ANT][NUM_SC]
@@ -97,9 +64,9 @@ void cmat_hermitian_transpose_TxTx(
* \param result output matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
*/
void cmatmul_TxRx_RxTx(
- IN complex A[NUM_TX_ANT][NUM_RX_ANT][NUM_SC],
- IN complex B[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ IN vcomplex *B,
+ OUT vcomplex *result);
/** Multiply two matrices A and B
* \param A input matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
@@ -107,9 +74,9 @@ void cmatmul_TxRx_RxTx(
* \param result output matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
*/
void cmatmul_TxTx_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex B[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ IN vcomplex *B,
+ OUT vcomplex *result);
/** Add two matrices A and B
* \param A input matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
@@ -117,9 +84,9 @@ void cmatmul_TxTx_TxTx(
* \param result output matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
*/
void cmatadd_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex B[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ IN vcomplex *B,
+ OUT vcomplex *result);
/** Perfom Cholesky decomposition of a square matrix A
* such that A = L*L^H with Cholesky–Banachiewicz algorithm
@@ -127,8 +94,8 @@ void cmatadd_TxTx(
* \param L output lower triangular matrix L. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
*/
void ccholesky_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ OUT vcomplex *L);
/** Perfom Cholesky decomposition of a square matrix A
* such that A = L*D*L^H with Cholesky–Banachiewicz algorithm,
@@ -137,12 +104,12 @@ void ccholesky_TxTx(
* This function does not calculate use csqrt and sqrt functions.
* \param A input square matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
* \param L output lower triangular matrix L. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
- * \param D output diagonal matrix L. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
+ * \param D output diagonal of the diagonal matrix D. Shape [NUM_TX_ANT][NUM_SC]
*/
void ccholesky_nosqrt_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ OUT vcomplex *L,
+ OUT vcomplex *D);
/** Multiply a matrix A with a vector b
* \param A input matrix. Shape [NUM_TX_ANT][NUM_RX_ANT][NUM_SC]
@@ -150,9 +117,9 @@ void ccholesky_nosqrt_TxTx(
* \param result output vector. Shape [NUM_TX_ANT][NUM_SC]
*/
void cmatvecmul_TxRx(
- IN complex A[NUM_TX_ANT][NUM_RX_ANT][NUM_SC],
- IN complex b[NUM_RX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *A,
+ IN vcomplex *b,
+ OUT vcomplex *result);
/** Find z from L*z = b with a forward substitution
* \param L lower triangular matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
@@ -160,9 +127,9 @@ void cmatvecmul_TxRx(
* \param result output vector z. Shape [NUM_TX_ANT][NUM_SC]
*/
void cforwardsub_TxTx(
- IN complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex b[NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *L,
+ IN vcomplex *b,
+ OUT vcomplex *result);
/** Find x from U*x = b with a backward substitution
* \param U upper triangular matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC]
@@ -170,9 +137,9 @@ void cforwardsub_TxTx(
* \param result output vector x. Shape [NUM_TX_ANT][NUM_SC]
*/
void cbackwardsub_TxTx(
- IN complex U[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex b[NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *U,
+ IN vcomplex *b,
+ OUT vcomplex *result);
/** Calculate mean squared error between the original x and the estimated x
* \param x original x. Shape [NUM_TX_ANT][NUM_SC]
@@ -180,7 +147,7 @@ void cbackwardsub_TxTx(
* \return mean squared error
*/
acc_t mse(
- IN complex x[NUM_TX_ANT][NUM_SC],
- IN complex x_MMSE[NUM_TX_ANT][NUM_SC]);
+ IN vcomplex *x,
+ IN vcomplex *x_MMSE);
#endif /* __MMSERV_H */
diff --git a/main.c b/main.c
@@ -1,19 +1,58 @@
#include "include/mmserv.h"
-#include "../common/util.h"
-#include "printf.h"
+#include <stdio.h> /* for printf */
+
+/* Import a binary file */
+#define IMPORT_BIN(sect, file, sym) asm (\
+ ".section " #sect "\n" /* Change section */\
+ ".balign 4\n" /* Word alignment */\
+ ".global " #sym "\n" /* Export the object address */\
+ #sym ":\n" /* Define the object label */\
+ ".incbin \"" file "\"\n" /* Import the file */\
+ ".balign 4\n" /* Word alignment */\
+ ".section \".text\"\n") /* Restore section */
+
+/* Import the data */
+IMPORT_BIN(".rodata", "data/x_re.bin", x_re);
+IMPORT_BIN(".rodata", "data/x_im.bin", x_im);
+IMPORT_BIN(".rodata", "data/H_re.bin", H_re);
+IMPORT_BIN(".rodata", "data/H_im.bin", H_im);
+IMPORT_BIN(".rodata", "data/R_re.bin", R_re);
+IMPORT_BIN(".rodata", "data/R_im.bin", R_im);
+IMPORT_BIN(".rodata", "data/y_re.bin", y_re);
+IMPORT_BIN(".rodata", "data/y_im.bin", y_im);
/* Transmitted signal */
-extern complex x[NUM_TX_ANT][NUM_SC];
+extern data_t x_re[NUM_TX_ANT][NUM_SC];
+extern data_t x_im[NUM_TX_ANT][NUM_SC];
/* Channel */
-extern complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC];
+extern data_t H_re[NUM_RX_ANT][NUM_TX_ANT][NUM_SC];
+extern data_t H_im[NUM_RX_ANT][NUM_TX_ANT][NUM_SC];
/* Noise covariance matrix */
-extern complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+extern data_t R_re[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+extern data_t R_im[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
/* Received signal */
-extern complex y[NUM_RX_ANT][NUM_SC];
+extern data_t y_re[NUM_RX_ANT][NUM_SC];
+extern data_t y_im[NUM_RX_ANT][NUM_SC];
int main() {
- complex x_MMSE[NUM_TX_ANT][NUM_SC];
- mmse(H, y, R, x_MMSE);
- printf("MSE: %f\n", mse(x, x_MMSE));
+ /* Cast the data to vcomplex */
+ vcomplex x, H, R, y;
+ x.re = (data_t *)x_re;
+ x.im = (data_t *)x_im;
+ H.re = (data_t *)H_re;
+ H.im = (data_t *)H_im;
+ R.re = (data_t *)R_re;
+ R.im = (data_t *)R_im;
+ y.re = (data_t *)y_re;
+ y.im = (data_t *)y_im;
+
+ /* Calculate the MMSE approximation */
+ data_t x_MMSE_re[NUM_TX_ANT][NUM_SC];
+ data_t x_MMSE_im[NUM_TX_ANT][NUM_SC];
+ vcomplex x_MMSE;
+ x_MMSE.re = (data_t *)x_MMSE_re;
+ x_MMSE.im = (data_t *)x_MMSE_im;
+ mmse(&H, &y, &R, &x_MMSE);
+ printf("%f\n", mse(&x, &x_MMSE));
}
diff --git a/src/mmserv.c b/src/mmserv.c
@@ -1,13 +1,30 @@
#include "../include/mmserv.h"
-#include "../../common/rivec/vector_defines.h"
-#include "riscv_vector.h"
+#include <stddef.h> /* for size_t */
#ifdef DEBUG
-#include "../../common/runtime.h"
-#include "../../common/util.h"
-#include "printf.h"
-#include <stdio.h>
+#include <stdint.h> /* for uint64_t */
+#include <stdio.h> /* for printf */
+
+#ifdef _WIN32
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+
+static uint64_t start = 0, end = 0;
+void start_timer()
+{
+ start = __rdtsc();
+}
+void stop_timer()
+{
+ end = __rdtsc();
+}
+unsigned long long get_timer()
+{
+ return end - start;
+}
#define TIME(msg, func, ...) \
start_timer(); \
@@ -24,63 +41,20 @@
* Complex functions
*/
-complex cmake(IN data_t re, IN data_t im)
-{
- complex t;
- t.re = re;
- t.im = im;
- return t;
-}
-complex cmul(IN complex a, IN complex b)
-{
- complex t;
- t.re = a.re * b.re - a.im * b.im;
- t.im = a.im * b.re + a.re * b.im;
- return t;
-}
-data_t cabs2(IN complex a)
-{
- return a.re * a.re + a.im * a.im;
-}
-complex cadd(IN complex a, IN complex b)
-{
- complex t;
- t.re = a.re + b.re;
- t.im = a.im + b.im;
- return t;
-}
-void cadd_acc(IN complex a, IN complex b)
-{
- a.re += b.re;
- a.im += b.im;
-}
-complex csub(IN complex a, IN complex b)
-{
- complex t;
- t.re = a.re - b.re;
- t.im = a.im - b.im;
- return t;
-}
-complex cconj(IN complex a)
-{
- complex t;
- t.re = a.re;
- t.im = -a.im;
- return t;
-}
-complex cdiv(IN complex a, IN complex b)
+void cdiv(
+ IN data_t a_re, IN data_t a_im,
+ IN data_t b_re, IN data_t b_im,
+ OUT data_t *res_re, OUT data_t *res_im)
{
- complex t;
- t.im = b.re * b.re + b.im * b.im;
- t.re = (a.re * b.re + a.im * b.im) / t.im;
- t.im = (a.im * b.re - a.re * b.im) / t.im;
- return t;
+ *res_im = b_re * b_re + b_im * b_im;
+ *res_re = (a_re * b_re + a_im * b_im) / *res_im;
+ *res_im = (a_im * b_re - a_re * b_im) / *res_im;
}
data_t sqrt(IN data_t x)
{
data_t lo, hi, mid;
- data_t eps = 1e-6;
+ data_t eps = 1e-4;
if (x < eps)
return 0;
@@ -93,27 +67,33 @@ data_t sqrt(IN data_t x)
hi = x;
}
while (hi - lo > eps){
- mid = (lo + hi) / 2;
+ mid = (lo + hi) / 2.f;
if (mid * mid > x) hi = mid;
else lo = mid;
}
return mid;
}
-complex csqrt(IN complex a)
+void csqrt(
+ IN data_t re, IN data_t im,
+ OUT data_t *res_re, OUT data_t *res_im)
{
- if (a.im == 0){
- if (a.re < 0)
- return cmake(0, sqrt(-a.re));
- else
- return cmake(sqrt(a.re), 0);
+ if (im == 0){
+ if (re < 0) {
+ *res_re = 0;
+ *res_im = sqrt(-re);
+ return;
+ }
+ else {
+ *res_re = sqrt(re);
+ *res_im = 0;
+ return;
+ }
}
- data_t length = sqrt(cabs2(a));
- complex res;
- res.re = sqrt((length + a.re) / 2);
- res.im = sqrt((length - a.re) / 2);
- if (a.im < 0)
- res.re = -res.re;
- return res;
+ data_t length = sqrt(re * re + im * im);
+ *res_re = sqrt((length + re) / 2.f);
+ *res_im = sqrt((length - re) / 2.f);
+ if (im < 0.f)
+ *res_re = -*res_re;
}
/*
@@ -121,197 +101,357 @@ complex csqrt(IN complex a)
*/
void cmat_hermitian_transpose_RxTx(
- IN complex A[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex AH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC])
+ IN vcomplex *A,
+ OUT vcomplex *AH)
{
- uint32_t i, j, k;
+ size_t i, j, k, off_ijk = 0, off_jik;
for (i = 0; i < NUM_RX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- AH[i][j][k] = cconj(A[j][i][k]);
+ for (j = 0; j < NUM_TX_ANT; ++j) {
+ off_jik = j * NUM_RX_ANT * NUM_SC + i * NUM_SC;
+ for (k = 0; k < NUM_SC; ++k) {
+ AH->re[off_jik] = A->re[off_ijk];
+ AH->im[off_jik] = -A->im[off_ijk];
+ ++off_ijk;
+ ++off_jik;
+ }
+ }
}
void cmat_hermitian_transpose_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex AH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC])
+ IN vcomplex *A,
+ OUT vcomplex *AH)
{
- uint32_t i, j, k;
+ size_t i, j, k, off_ijk = 0, off_jik;
for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- AH[i][j][k] = cconj(A[j][i][k]);
+ for (j = 0; j < NUM_TX_ANT; ++j) {
+ off_jik = j * NUM_TX_ANT * NUM_SC + i * NUM_SC;
+ for (k = 0; k < NUM_SC; ++k) {
+ AH->re[off_jik] = A->re[off_ijk];
+ AH->im[off_jik] = -A->im[off_ijk];
+ ++off_ijk;
+ ++off_jik;
+ }
+ }
}
void cmatmul_TxRx_RxTx(
- IN complex A[NUM_TX_ANT][NUM_RX_ANT][NUM_SC],
- IN complex B[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC])
+ IN vcomplex *A,
+ IN vcomplex *B,
+ OUT vcomplex *result)
{
- uint32_t i, j, k, l;
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- result[i][j][k] = cmake(0, 0);
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_RX_ANT; ++k)
- for (l = 0; l < NUM_SC; ++l)
- result[i][j][l] = cadd(result[i][j][l], cmul(A[i][k][l], B[k][j][l]));
+ size_t i, j, k, l;
+ size_t off_ijk = 0, off_ilk, off_ljk;
+ data_t A_re, A_im, B_re, B_im;
+
+ for (i = 0; i < NUM_TX_ANT * NUM_TX_ANT * NUM_SC; ++i)
+ result->re[i] = result->im[i] = 0.f;
+
+ for (i = 0; i != NUM_TX_ANT; ++i) {
+ for (j = 0; j != NUM_RX_ANT; ++j) {
+ for (k = 0; k != NUM_SC; ++k) {
+ off_ilk = i * NUM_RX_ANT * NUM_SC + k;
+ off_ljk = j * NUM_SC + k;
+ for (l = 0; l != NUM_RX_ANT; ++l) {
+ A_re = A->re[off_ilk];
+ A_im = A->im[off_ilk];
+ B_re = B->re[off_ljk];
+ B_im = B->im[off_ljk];
+ result->re[off_ijk] += A_re * B_re - A_im * B_im;
+ result->im[off_ijk] += A_re * B_im + A_im * B_re;
+ off_ilk += NUM_SC;
+ off_ljk += NUM_TX_ANT * NUM_SC;
+ }
+ ++off_ijk;
+ }
+ }
+ }
}
void cmatmul_TxTx_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex B[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC])
+ IN vcomplex *A,
+ IN vcomplex *B,
+ OUT vcomplex *result)
{
- uint32_t i, j, k, l;
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- result[i][j][k] = cmake(0, 0);
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_TX_ANT; ++k)
- for (l = 0; l < NUM_SC; ++l)
- result[i][j][l] = cadd(result[i][j][l], cmul(A[i][k][l], B[k][j][l]));
+ size_t i, j, k, l;
+ size_t off_ijk = 0, off_ilk, off_ljk;
+ data_t A_re, A_im, B_re, B_im;
+
+ for (i = 0; i < NUM_TX_ANT * NUM_TX_ANT * NUM_SC; ++i)
+ result->re[i] = result->im[i] = 0.f;
+
+ for (i = 0; i != NUM_TX_ANT; ++i) {
+ for (j = 0; j != NUM_TX_ANT; ++j) {
+ for (k = 0; k != NUM_SC; ++k) {
+ off_ilk = i * NUM_TX_ANT * NUM_SC + k;
+ off_ljk = j * NUM_SC + k;
+ for (l = 0; l != NUM_TX_ANT; ++l) {
+ A_re = A->re[off_ilk];
+ A_im = A->im[off_ilk];
+ B_re = B->re[off_ljk];
+ B_im = B->im[off_ljk];
+ result->re[off_ijk] += A_re * B_re - A_im * B_im;
+ result->im[off_ijk] += A_re * B_im + A_im * B_re;
+ off_ilk += NUM_SC;
+ off_ljk += NUM_TX_ANT * NUM_SC;
+ }
+ ++off_ijk;
+ }
+ }
+ }
}
void cmatadd_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex B[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC])
+ IN vcomplex *A,
+ IN vcomplex *B,
+ OUT vcomplex *result)
{
- uint32_t i, j, k;
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- result[i][j][k] = cadd(A[i][j][k], B[i][j][k]);
+ size_t off_ijk = 0, off_result = 0;
+ for (;off_ijk != NUM_TX_ANT * NUM_TX_ANT * NUM_SC;) {
+ result->re[off_result] = A->re[off_ijk] + B->re[off_ijk];
+ result->im[off_result] = A->im[off_ijk] + B->im[off_ijk];
+ ++off_ijk;
+ ++off_result;
+ }
}
void ccholesky_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC])
+ IN vcomplex *A,
+ OUT vcomplex *L)
{
- uint32_t i, j, k, l;
- complex sum;
+ size_t i, j, k, l;
+ size_t off_ijk, off_ilk, off_jlk, off_jjk;
+ data_t sum_re, sum_im;
+ data_t t1_re, t1_im, t2_re, t2_im;
+
/* Init upper triangle with zeros */
for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = i+1; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- L[i][j][k] = cmake(0, 0);
+ for (j = i+1; j < NUM_TX_ANT; ++j) {
+ off_ijk = i * NUM_TX_ANT * NUM_SC + j * NUM_SC;
+ for (k = 0; k < NUM_SC; ++k) {
+ L->re[off_ijk] = L->im[off_ijk] = 0;
+ ++off_ijk;
+ }
+ }
/* Calculate diagonal and lower triangular entries */
for (i = 0; i < NUM_TX_ANT; ++i)
for (j = 0; j < i+1; ++j)
for (k = 0; k < NUM_SC; ++k){
- sum = A[i][j][k];
- for (l = 0; l < j; ++l)
- sum = csub(sum, cmul(L[i][l][k], cconj(L[j][l][k])));
+ off_ijk = i * NUM_TX_ANT * NUM_SC + j * NUM_SC + k;
+ sum_re = A->re[off_ijk];
+ sum_im = A->im[off_ijk];
+ for (l = 0; l < j; ++l) {
+ off_ilk = i * NUM_TX_ANT * NUM_SC + l * NUM_SC + k;
+ off_jlk = j * NUM_TX_ANT * NUM_SC + l * NUM_SC + k;
+ t1_re = L->re[off_ilk];
+ t1_im = L->im[off_ilk];
+ t2_re = L->re[off_jlk];
+ t2_im = L->im[off_jlk];
+ sum_re -= t1_re * t2_re + t1_im * t2_im;
+ sum_im -= t1_im * t2_re - t1_re * t2_im;
+ }
if (i == j)
- L[i][j][k] = csqrt(sum);
- else
- L[i][j][k] = cdiv(sum, L[j][j][k]);
+ csqrt(
+ sum_re, sum_im,
+ &L->re[off_ijk],
+ &L->im[off_ijk]);
+ else {
+ off_jjk = j * NUM_TX_ANT * NUM_SC + j * NUM_SC + k;
+ cdiv(
+ sum_re, sum_im,
+ L->re[off_jjk],
+ L->im[off_jjk],
+ &L->re[off_ijk],
+ &L->im[off_ijk]);
+ }
}
}
void ccholesky_nosqrt_TxTx(
- IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC])
+ IN vcomplex *A,
+ OUT vcomplex *L,
+ OUT vcomplex *D)
{
- uint32_t i, j, k, l;
- complex sum, t;
+ size_t i, j, k, l;
+ size_t off_iik, off_ijk, off_ilk, off_jlk;
+ size_t off_ik, off_jk;
+ data_t t1_re, t1_im, t2_re, t2_im;
+ data_t sum_re, sum_im;
+
/* Init zeros in upper triangles of L and D */
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = i+1; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- D[i][j][k] = L[i][j][k] = cmake(0, 0);
+ for (off_ijk = 0; off_ijk < NUM_TX_ANT * NUM_TX_ANT * NUM_SC; ++off_ijk)
+ L->re[off_ijk] = L->im[off_ijk] = D->re[off_ijk] = D->im[off_ijk] = 0;
/* Init zeros in lower triangle of D */
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < i; ++j)
- for (k = 0; k < NUM_SC; ++k)
- D[i][j][k] = cmake(0, 0);
/* Init ones in the diagonal of the result */
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (k = 0; k < NUM_SC; ++k)
- L[i][i][k] = cmake(1, 0);
+ for (off_ik = 0; off_ik < NUM_TX_ANT * NUM_SC; ++off_ik) {
+ L->im[off_ik] = D->re[off_ik] = D->im[off_ik] = 0;
+ L->re[off_ik] = 1;
+ }
/* Calculate the lower triangle of the result */
- for (i = 0; i < NUM_TX_ANT; ++i){
- /* L_ij = (A_ij - \sum_{l=0}^{j-1} L_il L_jl^* D_k) / D_j */
- for (j = 0; j < i; ++j){
- sum.re = 0;
- sum.im = 0;
- for (l = 0; l < j; ++l)
- for (k = 0; k < NUM_SC; ++k){
- /* TODO there is one sum for all scs what leads to contamination. Fix that */
- t = cmul(L[i][l][k], cconj(L[j][l][k]));
- t = cmul(t, D[l][l][k]);
- cadd_acc(sum, t);
+ for (k = 0; k != NUM_SC; ++k) {
+ for (i = 0; i != NUM_TX_ANT; ++i) {
+ off_iik = i * NUM_TX_ANT * NUM_SC + i * NUM_SC + k;
+ /* L_ij = (A_ij - \sum_{l=0}^{j-1} L_il L_jl^* D_k) / D_j */
+ for (j = 0; j != i; ++j) {
+ for (l = 0; l != j; ++l) {
+ off_ilk = i * NUM_TX_ANT * NUM_SC + l * NUM_SC + k;
+ off_jlk = j * NUM_TX_ANT * NUM_SC + l * NUM_SC + k;
+ t1_re = L->re[off_ilk];
+ t1_im = L->im[off_ilk];
+ t2_re = L->re[off_jlk];
+ t2_im = L->im[off_jlk];
+ sum_re += t1_re * t2_re + t1_im * t2_im;
+ sum_im += t1_im * t2_re - t1_re * t2_im;
}
- for (k = 0; k < NUM_SC; ++k){
- L[i][j][k] = csub(A[i][j][k], sum);
- L[i][j][k] = cdiv(L[i][j][k], D[j][j][k]);
+ off_ijk = i * NUM_TX_ANT * NUM_SC + j * NUM_SC + k;
+ off_jk = j * NUM_SC + k;
+ t1_re = A->re[off_ijk] - sum_re;
+ t1_im = A->im[off_ijk] - sum_im;
+ cdiv(
+ t1_re, t1_im,
+ D->re[off_jk], D->im[off_jk],
+ &L->re[off_ijk],
+ &L->im[off_ijk]);
}
- }
- /* D_i = A_ii - \sum_{j=0}^{i-1} L_ij L_ij^* D_j */
- sum.re = 0;
- sum.im = 0;
- for (j = 0; j < i; ++j)
- for (k = 0; k < NUM_SC; ++k){
- /* TODO there is one sum for all scs what leads to contamination. Fix that */
- t = cmul(L[i][j][k], cconj(L[i][j][k]));
- t = cmul(t, D[j][j][k]);
- cadd_acc(sum, t);
+ /* D_i = A_ii - \sum_{j=0}^{i-1} L_ij L_ij^* D_j */
+ sum_re = sum_im = 0;
+ off_ik = i * NUM_SC + k;
+ for (j = 0; j != i; ++j) {
+ off_ijk = i * NUM_TX_ANT * NUM_SC + j * NUM_SC + k;
+ off_jk = j * NUM_SC + k;
+ t1_re = L->re[off_ijk];
+ t1_im = L->im[off_ijk];
+ t2_re = t1_re * t1_re + t1_im * t1_im;
+ t2_im = t1_re * t1_im - t1_im * t1_re;
+ t1_re = D->re[off_jk];
+ t1_im = D->im[off_jk];
+ sum_re += t2_re * t1_re - t2_im * t1_im;
+ sum_im += t2_re * t1_im + t2_im * t1_re;
}
- for (k = 0; k < NUM_SC; ++k)
- D[i][i][k] = csub(A[i][i][k], sum);
+ D->re[off_ik] = A->re[off_iik] - sum_re;
+ D->im[off_ik] = A->im[off_iik] - sum_im;
+ }
}
}
void cmatvecmul_TxRx(
- IN complex A[NUM_TX_ANT][NUM_RX_ANT][NUM_SC],
- IN complex b[NUM_RX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_SC])
+ IN vcomplex *A,
+ IN vcomplex *b,
+ OUT vcomplex *result)
{
- uint32_t i, j, k;
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_SC; ++j)
- result[i][j] = cmake(0., 0.);
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_RX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k)
- result[i][k] = cadd(result[i][k], cmul(A[i][j][k], b[j][k]));
+ size_t i, j, k;
+ size_t off_ik, off_ijk = 0, off_jk;
+ size_t off_ik_bck;
+ data_t A_re, A_im, b_re, b_im;
+
+ for (i = 0; i < NUM_TX_ANT * NUM_SC; ++i)
+ result->re[i] = result->im[i] = 0.f;
+
+ for (i = 0; i < NUM_TX_ANT; ++i) {
+ off_jk = 0;
+ off_ik_bck = i * NUM_SC;
+ for (j = 0; j < NUM_RX_ANT; ++j) {
+ off_ik = off_ik_bck;
+ for (k = 0; k < NUM_SC; ++k) {
+ A_re = A->re[off_ijk];
+ A_im = A->im[off_ijk];
+ b_re = b->re[off_jk];
+ b_im = b->im[off_jk];
+ result->re[off_ik] += A_re * b_re - A_im * b_im;
+ result->im[off_ik] += A_re * b_im + A_im * b_re;
+ ++off_ik;
+ ++off_ijk;
+ ++off_jk;
+ }
+ }
+ }
}
void cforwardsub_TxTx(
- IN complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex b[NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_SC])
+ IN vcomplex *L,
+ IN vcomplex *b,
+ OUT vcomplex *result)
{
- uint32_t i, j, k;
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_SC; ++j){
- result[i][j] = b[i][j];
- for (k = 0; k < i; ++k)
- result[i][j] = csub(result[i][j], cmul(L[i][k][j], result[k][j]));
- result[i][j] = cdiv(result[i][j], L[i][i][j]);
+ size_t i, j, k;
+ size_t off_ij = 0, off_ikj, off_kj, off_iij;
+ size_t off_ikj_bck;
+ data_t L_re, L_im, result_re, result_im;
+
+ for (i = 0; i < NUM_TX_ANT; ++i) {
+ off_ij = i * NUM_SC;
+ off_ikj_bck = i * NUM_TX_ANT * NUM_SC;
+ off_iij = i * NUM_TX_ANT * NUM_SC + i * NUM_SC;
+ for (j = 0; j < NUM_SC; ++j) {
+ result->re[off_ij] = b->re[off_ij];
+ result->im[off_ij] = b->im[off_ij];
+
+ off_ikj = off_ikj_bck;
+ off_kj = j;
+ for (k = 0; k < i; ++k) {
+ L_re = L->re[off_ikj];
+ L_im = L->im[off_ikj];
+ result_re = result->re[off_kj];
+ result_im = result->im[off_kj];
+ result->re[off_ij] -= L_re * result_re - L_im * result_im;
+ result->im[off_ij] -= L_re * result_im + L_im * result_re;
+ off_kj += NUM_SC;
+ off_ikj += NUM_SC;
+ }
+
+ result_re = result->re[off_ij];
+ result_im = result->im[off_ij];
+ cdiv(
+ result_re, result_im,
+ L->re[off_iij], L->im[off_iij],
+ &result->re[off_ij],
+ &result->im[off_ij]);
+
+ ++off_ij;
+ ++off_ikj_bck;
+ ++off_iij;
}
+ }
}
void cbackwardsub_TxTx(
- IN complex U[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex b[NUM_TX_ANT][NUM_SC],
- OUT complex result[NUM_TX_ANT][NUM_SC])
+ IN vcomplex *U,
+ IN vcomplex *b,
+ OUT vcomplex *result)
{
- uint32_t i, j, k;
- for (i = NUM_TX_ANT; i != (uint32_t)-1; --i)
+ /* TODO optimize offsets */
+
+ size_t i, j, k;
+ size_t off_ij, off_ikj, off_kj, off_iij;
+ data_t U_re, U_im, result_re, result_im;
+
+ for (i = NUM_TX_ANT - 1; i != (size_t)-1; --i) {
for (j = 0; j < NUM_SC; ++j){
- result[i][j] = b[i][j];
- for (k = i+1; k < NUM_TX_ANT; ++k)
- result[i][j] = csub(result[i][j], cmul(U[i][k][j], result[k][j]));
- result[i][j] = cdiv(result[i][j], U[i][i][j]);
+ off_ij = i * NUM_SC + j;
+ off_iij = i * NUM_TX_ANT * NUM_SC + i * NUM_SC + j;
+ result->re[off_ij] = b->re[off_ij];
+ result->im[off_ij] = b->im[off_ij];
+
+ for (k = i+1; k < NUM_TX_ANT; ++k){
+ off_ikj = i * NUM_TX_ANT * NUM_SC + k * NUM_SC + j;
+ off_kj = k * NUM_SC + j;
+ U_re = U->re[off_ikj];
+ U_im = U->im[off_ikj];
+ result_re = result->re[off_kj];
+ result_im = result->im[off_kj];
+ result->re[off_ij] -= U_re * result_re - U_im * result_im;
+ result->im[off_ij] -= U_re * result_im + U_im * result_re;
+ }
+
+ result_re = result->re[off_ij];
+ result_im = result->im[off_ij];
+ cdiv(
+ result_re, result_im,
+ U->re[off_iij], U->im[off_iij],
+ &result->re[off_ij],
+ &result->im[off_ij]);
}
+ }
}
/*
@@ -319,124 +459,180 @@ void cbackwardsub_TxTx(
*/
void mmse(
- IN complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex y[NUM_RX_ANT][NUM_SC],
- IN complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex x_MMSE[NUM_TX_ANT][NUM_SC])
+ IN vcomplex *H,
+ IN vcomplex *y,
+ IN vcomplex *R,
+ OUT vcomplex *x_MMSE)
{
- complex HH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
- complex HH_H[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- complex HHy[NUM_TX_ANT][NUM_SC];
- complex z[NUM_TX_ANT][NUM_SC];
- complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ vcomplex HH, HH_H, L, HHy, z, LH;
+
+ data_t HH_re[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
+ data_t HH_im[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
+ data_t HH_H_re[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t HH_H_im[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t L_re[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t L_im[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t HHy_re[NUM_TX_ANT][NUM_SC];
+ data_t HHy_im[NUM_TX_ANT][NUM_SC];
+ data_t z_re[NUM_TX_ANT][NUM_SC];
+ data_t z_im[NUM_TX_ANT][NUM_SC];
+ data_t LH_re[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t LH_im[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+
+ HH.re = (data_t *)HH_re;
+ HH.im = (data_t *)HH_im;
+ HH_H.re = (data_t *)HH_H_re;
+ HH_H.im = (data_t *)HH_H_im;
+ L.re = (data_t *)L_re;
+ L.im = (data_t *)L_im;
+ HHy.re = (data_t *)HHy_re;
+ HHy.im = (data_t *)HHy_im;
+ z.re = (data_t *)z_re;
+ z.im = (data_t *)z_im;
+ LH.re = (data_t *)LH_re;
+ LH.im = (data_t *)LH_im;
/* H^H */
TIME(
"Hermitian transpose (RxTx): %ld\n",
cmat_hermitian_transpose_RxTx,
- H, HH);
+ H, &HH);
/* H^H*H */
TIME(
"Matmul (TxRx x RxTx): %ld\n",
cmatmul_TxRx_RxTx,
- HH, H, HH_H);
+ &HH, H, &HH_H);
/* H^H*H + R */
TIME(
"Matadd (TxTx + TxTx): %ld\n",
cmatadd_TxTx,
- HH_H, R, HH_H);
+ &HH_H, R, &HH_H);
/* L: (H^H*H + R) = L*L^H */
TIME(
"Cholesky (TxTx): %ld\n",
ccholesky_TxTx,
- HH_H, L);
+ &HH_H, &L);
/* z: L*z = H^H*y */
TIME(
"Matrix-vector multiplication (TxRx x Rx): %ld\n",
cmatvecmul_TxRx,
- HH, y, HHy);
+ &HH, y, &HHy);
TIME(
"Forward substitution (TxTx): %ld\n",
cforwardsub_TxTx,
- L, HHy, z);
+ &L, &HHy, &z);
/* x_MMSE: L^H*x_MMSE = z */
TIME(
"Hermitian transpose (TxTx): %ld\n",
cmat_hermitian_transpose_TxTx,
- L, LH);
+ &L, &LH);
TIME(
"Backward substitution (TxTx): %ld\n",
cbackwardsub_TxTx,
- LH, z, x_MMSE);
+ &LH, &z, x_MMSE);
}
void mmse_nosqrt(
- IN complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC],
- IN complex y[NUM_RX_ANT][NUM_SC],
- IN complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
- OUT complex x_MMSE[NUM_TX_ANT][NUM_SC])
+ IN vcomplex *H,
+ IN vcomplex *y,
+ IN vcomplex *R,
+ OUT vcomplex *x_MMSE)
{
- /* H^H */
- complex HH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
- complex HH_H[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- /* TODO D should be a vector to save memory */
- complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- complex HHy[NUM_TX_ANT][NUM_SC];
- complex z[NUM_TX_ANT][NUM_SC];
- complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ vcomplex HH, HH_H, L, D, HHy, z, LH;
+ data_t HH_re[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
+ data_t HH_im[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
+ data_t HH_H_re[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t HH_H_im[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t L_re[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t L_im[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t D_re[NUM_TX_ANT][NUM_SC];
+ data_t D_im[NUM_TX_ANT][NUM_SC];
+ data_t HHy_re[NUM_TX_ANT][NUM_SC];
+ data_t HHy_im[NUM_TX_ANT][NUM_SC];
+ data_t z_re[NUM_TX_ANT][NUM_SC];
+ data_t z_im[NUM_TX_ANT][NUM_SC];
+ data_t LH_re[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+ data_t LH_im[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+
+ HH.re = (data_t *)HH_re;
+ HH.im = (data_t *)HH_im;
+ HH_H.re = (data_t *)HH_H_re;
+ HH_H.im = (data_t *)HH_H_im;
+ L.re = (data_t *)L_re;
+ L.im = (data_t *)L_im;
+ D.re = (data_t *)D_re;
+ D.im = (data_t *)D_im;
+ HHy.re = (data_t *)HHy_re;
+ HHy.im = (data_t *)HHy_im;
+ z.re = (data_t *)z_re;
+ z.im = (data_t *)z_im;
+ LH.re = (data_t *)LH_re;
+ LH.im = (data_t *)LH_im;
+
+ /* H^H */
TIME(
"Hermitian transpose (RxTx): %ld\n",
cmat_hermitian_transpose_RxTx,
- H, HH);
+ H, &HH);
TIME(
"Matmul (TxRx x RxTx): %ld\n",
cmatmul_TxRx_RxTx,
- HH, H, HH_H);
+ &HH, H, &HH_H);
TIME(
"Matadd (TxTx + TxTx): %ld\n",
cmatadd_TxTx,
- HH_H, R, HH_H);
+ &HH_H, R, &HH_H);
/* L, D: (H^H*H + R) = L*D*L^H */
TIME(
"Cholesky nosqrt (TxTx): %ld\n",
ccholesky_nosqrt_TxTx,
- HH_H, L, D);
+ &HH_H, &L, &D);
/* z: L*z = H^H*y */
TIME(
"Matrix-vector multiplication (TxRx x Rx): %ld\n",
cmatvecmul_TxRx,
- HH, y, HHy);
+ &HH, y, &HHy);
TIME(
"Forward substitution (TxTx): %ld\n",
cforwardsub_TxTx,
- L, HHy, z);
+ &L, &HHy, &z);
/* x_MMSE: L^H*x_MMSE = D^-1*z */
start_timer();
- for (uint32_t i = 0; i != NUM_TX_ANT; ++i)
- for (uint32_t j = 0; j != NUM_SC; ++j)
- z[i][j] = cdiv(z[i][j], D[i][i][j]);
+ data_t z_t_re, z_t_im;
+ for (size_t off_ij = 0; off_ij < NUM_TX_ANT * NUM_SC; ++off_ij) {
+ z_t_re = z.re[off_ij];
+ z_t_im = z.im[off_ij];
+ cdiv(
+ z_t_re, z_t_im,
+ D.re[off_ij],
+ D.im[off_ij],
+ &z.re[off_ij],
+ &z.im[off_ij]);
+ }
stop_timer();
- printf("Division (TxTx): %ld\n", get_timer());
+ printf("Division (Tx): %ld\n", get_timer());
TIME(
"Hermitian transpose (TxTx): %ld\n",
cmat_hermitian_transpose_TxTx,
- L, LH);
+ &L, &LH);
TIME(
"Backward substitution (TxTx): %ld\n",
cbackwardsub_TxTx,
- LH, z, x_MMSE);
+ &LH, &z, x_MMSE);
}
acc_t mse(
- IN complex x[NUM_TX_ANT][NUM_SC],
- IN complex x_MMSE[NUM_TX_ANT][NUM_SC])
+ IN vcomplex *x,
+ IN vcomplex *x_MMSE)
{
- acc_t sum = 0;
- for (uint32_t i = 0; i != NUM_TX_ANT; ++i)
- for (uint32_t j = 0; j != NUM_SC; ++j)
- sum += cabs2(csub(x[i][j], x_MMSE[i][j]));
- return sum / (NUM_TX_ANT * NUM_SC);
+ acc_t sum = 0.;
+ size_t off_ij = 0;
+ data_t sub1, sub2;
+ for (; off_ij != NUM_TX_ANT * NUM_SC; ++off_ij) {
+ sub1 = x->re[off_ij] - x_MMSE->re[off_ij];
+ sub2 = x->im[off_ij] - x_MMSE->im[off_ij];
+ sum += (sub1 * sub1 + sub2 * sub2) / (data_t)(NUM_TX_ANT * NUM_SC);
+ }
+ return sum;
}