mmserv

Minimum Mean Square Error detection on RISC-V Vector Extention
git clone https://git.ea.contact/mmserv
Log | Files | Refs | README

commit bf6a94e5a9d53997aaf89d915327aba66411079c
parent 9362e7014154afd7772f908b36aee6cada2b8b7b
Author: Egor Achkasov <eaachkasov@edu.hse.ru>
Date:   Fri,  8 Nov 2024 00:01:47 +0100

Change data type to float

Diffstat:
Minc/define.h | 7+++++--
Minc/mmserv.h | 25++++++++++++++++---------
Mmain.c | 46++++++++++++++++++++++++----------------------
Mscripts/gen_data.py | 10+++++-----
Mscripts/mmse.py | 57+++++++++++++++++++++++++--------------------------------
Mscripts/util.py | 18+++++++++---------
Mscripts/viz_compare_x.py | 4++--
Msrc/mmserv.c | 170+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
8 files changed, 187 insertions(+), 150 deletions(-)

diff --git a/inc/define.h b/inc/define.h @@ -1,8 +1,8 @@ #ifndef __DEFINE_H #define __DEFINE_H -#define NUM_RX_ANT 12 -#define NUM_TX_ANT 12 +#define NUM_RX_ANT 4 +#define NUM_TX_ANT 4 #define NUM_SC 1 #define NOSQRT 1 @@ -10,4 +10,7 @@ #define IN #define OUT +typedef float data_t; +typedef float acc_t; + #endif diff --git a/inc/mmserv.h b/inc/mmserv.h @@ -11,12 +11,9 @@ * Typedefs */ -typedef int16_t data_t; -typedef int32_t acc_t; - typedef struct { - acc_t re; - acc_t im; + data_t re; + data_t im; } complex; /** Calculate MMSE estimation of x in y = H*x + n @@ -54,7 +51,7 @@ complex cmake(IN data_t re, IN data_t im); complex cmul(IN complex a, IN complex b); /** Complex absolute value squared */ -acc_t cabs2(IN complex a); +data_t cabs2(IN complex a); /** Complex addition */ complex cadd(IN complex a, IN complex b); @@ -70,7 +67,7 @@ complex cconj(IN complex a); complex cdiv(IN complex a, IN complex b); /** Square root of a natural number */ -data_t sqrt(IN acc_t x); +data_t sqrt(IN data_t x); /** Complex root */ complex csqrt(IN complex a); @@ -105,6 +102,16 @@ void cmatmul_TxRx_RxTx( IN complex B[NUM_RX_ANT][NUM_TX_ANT][NUM_SC], OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]); +/** Multiply two matrices A and B + * \param A input matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC] + * \param B input matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC] + * \param result output matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC] + */ +void cmatmul_TxTx_TxTx( + IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], + IN complex B[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], + OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]); + /** Add two matrices A and B * \param A input matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC] * \param B input matrix. Shape [NUM_TX_ANT][NUM_TX_ANT][NUM_SC] @@ -180,7 +187,7 @@ void cbackwardsub_TxTx( void load_data( IN const char *file, IN size_t size, - OUT uint16_t *out); + OUT data_t *out); /** Save data to a binary file * \param file path to the file @@ -188,7 +195,7 @@ void load_data( */ void save_data( IN const char* file, - IN uint16_t x_mmse[NUM_TX_ANT][NUM_SC][2]); + IN data_t x_mmse[NUM_TX_ANT][NUM_SC][2]); #endif /* __MMSERV_H */ diff --git a/main.c b/main.c @@ -1,52 +1,54 @@ #include "inc/mmserv.h" int main() { + uint32_t i, j, k; + /* Load the data */ - uint16_t x_raw[NUM_TX_ANT][NUM_SC][2]; /* Transmitted signal */ - uint16_t H_raw[NUM_RX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Channel */ - uint16_t R_raw[NUM_TX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Noise covariance matrix */ - uint16_t y_raw[NUM_RX_ANT][NUM_SC][2]; /* Received signal */ - load_data("data/x.bin", NUM_TX_ANT*NUM_SC*2, x_raw); - load_data("data/H.bin", NUM_RX_ANT*NUM_TX_ANT*NUM_SC*2, H_raw); - load_data("data/y.bin", NUM_RX_ANT*NUM_SC*2, y_raw); - load_data("data/R.bin", NUM_RX_ANT*NUM_SC*2, R_raw); + data_t x_raw[NUM_TX_ANT][NUM_SC][2]; /* Transmitted signal */ + data_t H_raw[NUM_RX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Channel */ + data_t R_raw[NUM_TX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Noise covariance matrix */ + data_t y_raw[NUM_RX_ANT][NUM_SC][2]; /* Received signal */ + load_data("data\\x.bin", NUM_TX_ANT*NUM_SC*2, x_raw); + load_data("data\\H.bin", NUM_RX_ANT*NUM_TX_ANT*NUM_SC*2, H_raw); + load_data("data\\R.bin", NUM_TX_ANT*NUM_RX_ANT*NUM_SC*2, R_raw); + load_data("data\\y.bin", NUM_RX_ANT*NUM_SC*2, y_raw); /* Cast the data into complex data structures */ complex x[NUM_TX_ANT][NUM_SC]; complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC]; complex y[NUM_RX_ANT][NUM_SC]; complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_SC; ++j){ + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_SC; ++j){ x[i][j].re = x_raw[i][j][0]; x[i][j].im = x_raw[i][j][1]; } - for (uint32_t i = 0; i < NUM_RX_ANT; ++i) - for (uint32_t j = 0; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k){ + for (i = 0; i < NUM_RX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k){ H[i][j][k].re = H_raw[i][j][k][0]; H[i][j][k].im = H_raw[i][j][k][1]; } - for (uint32_t i = 0; i < NUM_RX_ANT; ++i) - for (uint32_t j = 0; j < NUM_SC; ++j){ + for (i = 0; i < NUM_RX_ANT; ++i) + for (j = 0; j < NUM_SC; ++j){ y[i][j].re = y_raw[i][j][0]; y[i][j].im = y_raw[i][j][1]; } - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k){ + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k){ R[i][j][k].re = R_raw[i][j][k][0]; R[i][j][k].im = R_raw[i][j][k][1]; } /* Calculate the MMSE approximation */ complex x_MMSE[NUM_TX_ANT][NUM_SC]; - mmse_nosqrt(H, y, R, x_MMSE); + mmse(H, y, R, x_MMSE); /* Interleave the result */ - uint16_t res[NUM_TX_ANT][NUM_SC][2]; - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_SC; ++j){ + data_t res[NUM_TX_ANT][NUM_SC][2]; + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_SC; ++j){ res[i][j][0] = x_MMSE[i][j].re; res[i][j][1] = x_MMSE[i][j].im; } diff --git a/scripts/gen_data.py b/scripts/gen_data.py @@ -20,7 +20,7 @@ WRITE_DATA_BIN = "--bin" in argv WRITE_DATA_S = "--s" in argv NUM_RX_ANT, NUM_TX_ANT, NUM_SC = read_defines() -NOISE_STD_DEVIATION = np.sqrt(.5) / 10 # noise standard deviation +NOISE_STD_DEVIATION = np.sqrt(.5) / 100 # noise standard deviation # Transmitter signal x = random((NUM_TX_ANT, NUM_SC)) \ @@ -58,10 +58,10 @@ class Section: sections = [ - Section("x", "data/x.txt", "3", 16, x.size * 2), - Section("H", "data/H.txt", "3", 16, H.size * 2), - Section("R", "data/R.txt", "3", 16, R.size * 2), - Section("y", "data/y.txt", "3", 16, y.size * 2), + Section("x", "data/x.txt", "3", 32, x.size * 2), + Section("H", "data/H.txt", "3", 32, H.size * 2), + Section("R", "data/R.txt", "3", 32, R.size * 2), + Section("y", "data/y.txt", "3", 32, y.size * 2), ] # Create "data" directory if it does not exist diff --git a/scripts/mmse.py b/scripts/mmse.py @@ -8,38 +8,31 @@ from os import path, makedirs NUM_RX_ANT, NUM_TX_ANT, NUM_SC = read_defines() # Read the data from the files -x = deinterleave(np.fromfile("data/x.bin", dtype=np.int16), (NUM_TX_ANT, NUM_SC)) -H = deinterleave(np.fromfile("data/H.bin", dtype=np.int16), (NUM_RX_ANT, NUM_TX_ANT, NUM_SC)) -R = deinterleave(np.fromfile("data/R.bin", dtype=np.int16), (NUM_TX_ANT, NUM_TX_ANT, NUM_SC)) -y = deinterleave(np.fromfile("data/y.bin", dtype=np.int16), (NUM_RX_ANT, NUM_SC)) - -# Move the SC dimension to the beginning -x = np.moveaxis(x, -1, 0) -H = np.moveaxis(H, -1, 0) -R = np.moveaxis(R, -1, 0) -y = np.moveaxis(y, -1, 0) - -HH = H.conj().transpose(0, 2, 1) -L = np.linalg.cholesky(HH @ H + R) -HHy = np.einsum("...ij,...j->...i", HH, y) - -z = np.empty((NUM_SC, NUM_TX_ANT), dtype=np.complex64) -for i in range(NUM_TX_ANT): - z[:, i] = (HHy[:, i] - np.sum([L[:, i, j]*z[:, j] for j in range(i)], 0)) / L[:, i, i] - -LH = L.conj().transpose(0, 2, 1) -x_mmse = np.empty((NUM_SC, NUM_TX_ANT), dtype=np.complex64) -for i in range(NUM_TX_ANT-1, -1, -1): - x_mmse[:, i] = (z[:, i] - np.sum([LH[:, i, j]*x_mmse[:, j] for j in range(i+1, NUM_TX_ANT)], 0)) / LH[:, i, i] - -# Move the SC dimension back to the end -x_mmse = np.moveaxis(x_mmse, 0, -1) -x = np.moveaxis(x, 0, -1) -H = np.moveaxis(H, 0, -1) -R = np.moveaxis(R, 0, -1) -y = np.moveaxis(y, 0, -1) - -# Cast the complex data to int16 +x = deinterleave(np.fromfile("data/x.bin", dtype=np.float32), (NUM_TX_ANT, NUM_SC)) +H = deinterleave(np.fromfile("data/H.bin", dtype=np.float32), (NUM_RX_ANT, NUM_TX_ANT, NUM_SC)) +R = deinterleave(np.fromfile("data/R.bin", dtype=np.float32), (NUM_TX_ANT, NUM_TX_ANT, NUM_SC)) +y = deinterleave(np.fromfile("data/y.bin", dtype=np.float32), (NUM_RX_ANT, NUM_SC)) + +x_mmse = np.empty((NUM_TX_ANT, NUM_SC), np.complex64) + +for sc in range(NUM_SC): + HH = H[..., sc].conj().T + L = np.linalg.cholesky(HH @ H[..., sc] + R[..., sc]) + HHy = np.einsum("ij,j->i", HH, y[:, sc]) + print("\nHHy:\n", HHy) + print("\nL:\n", L) + + z = np.empty((NUM_TX_ANT,), dtype=np.complex64) + for i in range(NUM_TX_ANT): + z[i] = (HHy[i] - np.sum([L[i, j]*z[j] for j in range(i)])) / L[i, i] + print("\nz:\n", z) + + LH = L.conj().T + for i in range(NUM_TX_ANT-1, -1, -1): + x_mmse[i, sc] = (z[i] - np.sum([LH[i, j]*x_mmse[j, sc] for j in range(i+1, NUM_TX_ANT)], 0)) / LH[i, i] + print("\nx_mmse:\n", x_mmse) + +# Output the data x_mmse_data = interleave(x_mmse) out_dir = path.join(path.dirname(__file__), "..", "out") if not path.exists(out_dir): diff --git a/scripts/util.py b/scripts/util.py @@ -29,31 +29,31 @@ def read_defines(): def interleave(data: np.ndarray) -> np.ndarray: - """Cast a np.complex64 array to np.int16 array. + """Cast a np.complex64 array to np.float32 array. Args: data (np.ndarray): The complex data to be casted. Dtype: np.complex64 Returns: - np.ndarray: The interleaved data. Dtype: np.int16 + np.ndarray: The interleaved data. Dtype: np.float32 """ - res = np.empty(2 * data.size, dtype=np.int16) - res[0::2] = data.flatten().real.astype(np.float16).view(np.int16) - res[1::2] = data.flatten().imag.astype(np.float16).view(np.int16) + res = np.empty(2 * data.size, dtype=np.float32) + res[0::2] = data.flatten().real.astype(np.float32) + res[1::2] = data.flatten().imag.astype(np.float32) return res def deinterleave(data: np.ndarray, shape: tuple) -> np.ndarray: - """Cast a np.int16 array to np.complex64 array. + """Cast a np.float32 array to np.complex64 array. Args: - data (np.ndarray): The interleaved data to be casted. Dtype: np.int16 + data (np.ndarray): The interleaved data to be casted. Dtype: np.float32 shape (tuple): The shape of the deinterleaved data. Returns: np.ndarray: The deinterleaved data. Dtype: np.complex64 """ res = np.empty(shape, np.complex64) - res.real = data[0::2].view(np.float16).astype(np.float32).reshape(shape) - res.imag = data[1::2].view(np.float16).astype(np.float32).reshape(shape) + res.real = data[0::2].reshape(shape) + res.imag = data[1::2].reshape(shape) return res diff --git a/scripts/viz_compare_x.py b/scripts/viz_compare_x.py @@ -10,9 +10,9 @@ from matplotlib import pyplot as plt NUM_RX_ANT, NUM_TX_ANT, NUM_SC = read_defines() with open("out/x_mmse.bin", "rb") as f: - x_mmse = deinterleave(np.fromfile(f, dtype=np.int16), (NUM_TX_ANT, NUM_SC)) + x_mmse = deinterleave(np.fromfile(f, dtype=np.float32), (NUM_TX_ANT, NUM_SC)) with open("data/x.bin", "rb") as f: - x = deinterleave(np.fromfile(f, dtype=np.int16), (NUM_TX_ANT, NUM_SC)) + x = deinterleave(np.fromfile(f, dtype=np.float32), (NUM_TX_ANT, NUM_SC)) for tx in range(NUM_TX_ANT): plt.scatter(x_mmse[tx].real, diff --git a/src/mmserv.c b/src/mmserv.c @@ -20,7 +20,7 @@ complex cmul(IN complex a, IN complex b) t.im = a.im * b.re + a.re * b.im; return t; } -acc_t cabs2(IN complex a) +data_t cabs2(IN complex a) { return a.re * a.re + a.im * a.im; } @@ -59,10 +59,10 @@ complex cdiv(IN complex a, IN complex b) return t; } -data_t sqrt(IN acc_t x) +data_t sqrt(IN data_t x) { if (x < 2) return x; - acc_t lo = 1, hi = x, mid; + data_t lo = 1, hi = x, mid; while (100 * lo * lo < x) lo *= 10; while (hi * hi / 100 > x) hi /= 10; for (int i = 0; i < 100; i++) { @@ -91,9 +91,10 @@ void cmat_hermitian_transpose_RxTx( IN complex A[NUM_RX_ANT][NUM_TX_ANT][NUM_SC], OUT complex AH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC]) { - for (uint32_t i = 0; i < NUM_RX_ANT; ++i) - for (uint32_t j = 0; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k) + uint32_t i, j, k; + for (i = 0; i < NUM_RX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) AH[i][j][k] = cconj(A[j][i][k]); } @@ -101,9 +102,10 @@ void cmat_hermitian_transpose_TxTx( IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], OUT complex AH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]) { - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k) + uint32_t i, j, k; + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) AH[i][j][k] = cconj(A[j][i][k]); } @@ -112,10 +114,32 @@ void cmatmul_TxRx_RxTx( IN complex B[NUM_RX_ANT][NUM_TX_ANT][NUM_SC], OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]) { - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_RX_ANT; ++k) - for (uint32_t l = 0; l < NUM_SC; ++l) + uint32_t i, j, k, l; + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) + result[i][j][k] = cmake(0, 0); + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_RX_ANT; ++k) + for (l = 0; l < NUM_SC; ++l) + result[i][j][l] = cadd(result[i][j][l], cmul(A[i][k][l], B[k][j][l])); +} + +void cmatmul_TxTx_TxTx( + IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], + IN complex B[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], + OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]) +{ + uint32_t i, j, k, l; + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) + result[i][j][k] = cmake(0, 0); + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_TX_ANT; ++k) + for (l = 0; l < NUM_SC; ++l) result[i][j][l] = cadd(result[i][j][l], cmul(A[i][k][l], B[k][j][l])); } @@ -124,9 +148,10 @@ void cmatadd_TxTx( IN complex B[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], OUT complex result[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]) { - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k) + uint32_t i, j, k; + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) result[i][j][k] = cadd(A[i][j][k], B[i][j][k]); } @@ -134,32 +159,25 @@ void ccholesky_TxTx( IN complex A[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], OUT complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]) { + uint32_t i, j, k, l; + complex sum; /* Init upper triangle with zeros */ - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = i+1; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k) + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = i+1; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) L[i][j][k] = cmake(0, 0); /* Calculate diagonal and lower triangular entries */ - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < i+1; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k){ - complex sum = {0, 0}; - for (uint32_t l = 0; l < j; ++l) + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < i+1; ++j) + for (k = 0; k < NUM_SC; ++k){ + sum.re = 0; + sum.im = 0; + for (l = 0; l < j; ++l) cadd_acc(sum, cmul(L[i][l][k], cconj(L[j][l][k]))); if (i == j) L[i][j][k] = csqrt(csub(A[i][i][k], sum)); else L[i][j][k] = cdiv(csub(A[i][j][k], sum), L[j][j][k]); - - // TODO delete - #if 0 - for (uint32_t i = 0; i < NUM_TX_ANT; ++i){ - for (uint32_t j = 0; j < NUM_TX_ANT; ++j) - printf("(%6i, %6i) ", L[i][j][0].re, L[i][j][0].im); - printf("\n"); - } - printf("\n"); - #endif } } @@ -168,47 +186,53 @@ void ccholesky_nosqrt_TxTx( OUT complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], OUT complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]) { + uint32_t i, j, k, l; + complex sum, t; /* Init zeros in upper triangles of L and D */ - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = i+1; j < NUM_TX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k) + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = i+1; j < NUM_TX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) D[i][j][k] = L[i][j][k] = cmake(0, 0); /* Init zeros in lower triangle of D */ - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < i; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k) + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < i; ++j) + for (k = 0; k < NUM_SC; ++k) D[i][j][k] = cmake(0, 0); /* Init ones in the diagonal of the result */ - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t k = 0; k < NUM_SC; ++k) + for (i = 0; i < NUM_TX_ANT; ++i) + for (k = 0; k < NUM_SC; ++k) L[i][i][k] = cmake(1, 0); /* Calculate the lower triangle of the result */ - for (uint32_t i = 0; i < NUM_TX_ANT; ++i){ + for (i = 0; i < NUM_TX_ANT; ++i){ /* L_ij = (A_ij - \sum_{l=0}^{j-1} L_il L_jl^* D_k) / D_j */ - for (uint32_t j = 0; j < i; ++j){ - complex sum = {0, 0}; - for (uint32_t l = 0; l < j; ++l) - for (uint32_t k = 0; k < NUM_SC; ++k){ + for (j = 0; j < i; ++j){ + sum.re = 0; + sum.im = 0; + for (l = 0; l < j; ++l) + for (k = 0; k < NUM_SC; ++k){ + /* TODO there is one sum for all scs what leads to contamination. Fix that */ complex t = cmul(L[i][l][k], cconj(L[j][l][k])); t = cmul(t, D[l][l][k]); cadd_acc(sum, t); } - for (uint32_t k = 0; k < NUM_SC; ++k){ + for (k = 0; k < NUM_SC; ++k){ L[i][j][k] = csub(A[i][j][k], sum); L[i][j][k] = cdiv(L[i][j][k], D[j][j][k]); } } /* D_i = A_ii - \sum_{j=0}^{i-1} L_ij L_ij^* D_j */ - complex sum = {0, 0}; - for (uint32_t j = 0; j < i; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k){ - complex t = cmul(L[i][j][k], cconj(L[i][j][k])); + sum.re = 0; + sum.im = 0; + for (j = 0; j < i; ++j) + for (k = 0; k < NUM_SC; ++k){ + /* TODO there is one sum for all scs what leads to contamination. Fix that */ + t = cmul(L[i][j][k], cconj(L[i][j][k])); t = cmul(t, D[j][j][k]); cadd_acc(sum, t); } - for (uint32_t k = 0; k < NUM_SC; ++k) + for (k = 0; k < NUM_SC; ++k) D[i][i][k] = csub(A[i][i][k], sum); } } @@ -218,9 +242,13 @@ void cmatvecmul_TxRx( IN complex b[NUM_RX_ANT][NUM_SC], OUT complex result[NUM_TX_ANT][NUM_SC]) { - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_RX_ANT; ++j) - for (uint32_t k = 0; k < NUM_SC; ++k) + uint32_t i, j, k; + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_SC; ++j) + result[i][j] = cmake(0., 0.); + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_RX_ANT; ++j) + for (k = 0; k < NUM_SC; ++k) result[i][k] = cadd(result[i][k], cmul(A[i][j][k], b[j][k])); } @@ -229,10 +257,11 @@ void cforwardsub_TxTx( IN complex b[NUM_TX_ANT][NUM_SC], OUT complex result[NUM_TX_ANT][NUM_SC]) { - for (uint32_t i = 0; i < NUM_TX_ANT; ++i) - for (uint32_t j = 0; j < NUM_SC; ++j){ + uint32_t i, j, k; + for (i = 0; i < NUM_TX_ANT; ++i) + for (j = 0; j < NUM_SC; ++j){ result[i][j] = b[i][j]; - for (uint32_t k = 0; k < i; ++k) + for (k = 0; k < i; ++k) result[i][j] = csub(result[i][j], cmul(L[i][k][j], result[k][j])); result[i][j] = cdiv(result[i][j], L[i][i][j]); } @@ -243,10 +272,11 @@ void cbackwardsub_TxTx( IN complex b[NUM_TX_ANT][NUM_SC], OUT complex result[NUM_TX_ANT][NUM_SC]) { - for (uint32_t i = NUM_TX_ANT; i > 0; --i) - for (uint32_t j = 0; j < NUM_SC; ++j){ + uint32_t i, j, k; + for (i = NUM_TX_ANT; i != (uint32_t)-1; --i) + for (j = 0; j < NUM_SC; ++j){ result[i][j] = b[i][j]; - for (uint32_t k = i+1; k < NUM_TX_ANT; ++k) + for (k = i+1; k < NUM_TX_ANT; ++k) result[i][j] = csub(result[i][j], cmul(U[i][k][j], result[k][j])); result[i][j] = cdiv(result[i][j], U[i][i][j]); } @@ -259,7 +289,7 @@ void cbackwardsub_TxTx( void load_data( IN const char* file, IN size_t size, - OUT uint16_t *out) + OUT data_t *out) { FILE *fp = fopen(file, "rb"); @@ -267,7 +297,7 @@ void load_data( fprintf(stderr, "Error opening file %s.\n", file); exit(8); } - if ((fread(out, sizeof(uint16_t), size, fp)) != size) { + if ((fread(out, sizeof(data_t), size, fp)) != size) { fprintf(stderr, "Error reading file %s.\n", file); exit(8); } @@ -277,7 +307,7 @@ void load_data( void save_data( IN const char* file, - IN uint16_t x_mmse[NUM_TX_ANT][NUM_SC][2]) + IN data_t x_mmse[NUM_TX_ANT][NUM_SC][2]) { FILE *fp = fopen(file, "wb"); @@ -285,7 +315,7 @@ void save_data( fprintf(stderr, "Error opening file %s.\n", file); exit(8); } - if ((fwrite(x_mmse, sizeof(uint16_t), NUM_TX_ANT*NUM_SC*2, fp)) != NUM_TX_ANT*NUM_SC*2) { + if ((fwrite(x_mmse, sizeof(data_t), NUM_TX_ANT*NUM_SC*2, fp)) != NUM_TX_ANT*NUM_SC*2) { fprintf(stderr, "Error writing file %s.\n", file); exit(8); } @@ -341,7 +371,7 @@ void mmse_nosqrt( cmatadd_TxTx(HH_H, R, HH_H); /* L, D: (H^H*H + R) = L*D*L^H */ complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; + complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; /* TODO D should be a vector to save memory*/ ccholesky_nosqrt_TxTx(HH_H, L, D); /* z: L*z = H^H*y */ complex HHy[NUM_TX_ANT][NUM_SC]; @@ -349,8 +379,10 @@ void mmse_nosqrt( complex z[NUM_TX_ANT][NUM_SC]; cforwardsub_TxTx(L, HHy, z); /* x_MMSE: L^H*x_MMSE = D^-1*z */ + for (uint32_t i = 0; i != NUM_TX_ANT; ++i) + for (uint32_t j = 0; j != NUM_SC; ++j) + z[i][j] = cdiv(z[i][j], D[i][i][j]); complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; cmat_hermitian_transpose_TxTx(L, LH); - cbackwardsub_TxTx(LH, z, x_MMSE); - cmatvecmul_TxRx(D, x_MMSE, x_MMSE); + cbackwardsub_TxTx(L, z, x_MMSE); }