mmserv

Minimum Mean Square Error detection on RISC-V Vector Extention
git clone https://git.ea.contact/mmserv
Log | Files | Refs | README

commit 0a85f271d5d79b211d5c8a6c38491485cd95c271
parent 504337abf84361c83f30fef1dac4ee1f2384aed1
Author: Egor Achkasov <eaachkasov@edu.hse.ru>
Date:   Fri, 20 Dec 2024 16:19:22 +0100

Merge ara branch into master

Diffstat:
Minclude/define.h | 2++
Minclude/mmserv.h | 4++--
Mmain.c | 58++++++++++------------------------------------------------
Mscript/gen_data.py | 8++++----
Msrc/mmserv.c | 161++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
5 files changed, 149 insertions(+), 84 deletions(-)

diff --git a/include/define.h b/include/define.h @@ -7,6 +7,8 @@ #define NOSQRT 1 +#define DEBUG + #define IN #define OUT diff --git a/include/mmserv.h b/include/mmserv.h @@ -11,8 +11,8 @@ */ typedef struct { - data_t re; - data_t im; + data_t re; + data_t im; } complex; /** Calculate MMSE estimation of x in y = H*x + n diff --git a/main.c b/main.c @@ -5,7 +5,7 @@ void load_data( IN char* filepath, IN size_t size, - OUT data_t* buff) + OUT void* buff) { FILE* f = fopen(filepath, "r"); if (!f) { @@ -23,66 +23,28 @@ int main() { uint32_t i, j, k; /* Load the data */ - data_t x_raw[NUM_TX_ANT][NUM_SC][2]; /* Transmitted signal */ - data_t H_raw[NUM_RX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Channel */ - data_t R_raw[NUM_TX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Noise covariance matrix */ - data_t y_raw[NUM_RX_ANT][NUM_SC][2]; /* Received signal */ + complex x[NUM_TX_ANT][NUM_SC]; /* Transmitted signal */ + complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC]; /* Channel */ + complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; /* Noise covariance matrix */ + complex y[NUM_RX_ANT][NUM_SC]; /* Received signal */ - load_data("data/x.bin", NUM_TX_ANT * NUM_SC * 2, x_raw); - load_data("data/H.bin", NUM_RX_ANT * NUM_TX_ANT * NUM_SC * 2, H_raw); - load_data("data/R.bin", NUM_TX_ANT * NUM_TX_ANT * NUM_SC * 2, R_raw); - load_data("data/y.bin", NUM_RX_ANT * NUM_SC * 2, y_raw); - - /* Cast the data into complex data structures */ - complex x[NUM_TX_ANT][NUM_SC]; - complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC]; - complex y[NUM_RX_ANT][NUM_SC]; - complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - for (i = 0; i < NUM_TX_ANT; ++i) - for (j = 0; j < NUM_SC; ++j){ - x[i][j].re = x_raw[i][j][0]; - x[i][j].im = x_raw[i][j][1]; - } - for (i = 0; i < NUM_RX_ANT; ++i) - for (j = 0; j < NUM_TX_ANT; ++j) - for (k = 0; k < NUM_SC; ++k){ - H[i][j][k].re = H_raw[i][j][k][0]; - H[i][j][k].im = H_raw[i][j][k][1]; - } - for (i = 0; i < NUM_RX_ANT; ++i) - for (j = 0; j < NUM_SC; ++j){ - y[i][j].re = y_raw[i][j][0]; - y[i][j].im = y_raw[i][j][1]; - } - for (i = 0; i < NUM_TX_ANT; ++i) - for (j = 0; j < NUM_TX_ANT; ++j) - for (k = 0; k < NUM_SC; ++k){ - R[i][j][k].re = R_raw[i][j][k][0]; - R[i][j][k].im = R_raw[i][j][k][1]; - } + load_data("data/x.bin", NUM_TX_ANT * NUM_SC * 2, x); + load_data("data/H.bin", NUM_RX_ANT * NUM_TX_ANT * NUM_SC * 2, H); + load_data("data/R.bin", NUM_TX_ANT * NUM_TX_ANT * NUM_SC * 2, R); + load_data("data/y.bin", NUM_RX_ANT * NUM_SC * 2, y); /* Calculate the MMSE approximation */ complex x_MMSE[NUM_TX_ANT][NUM_SC]; mmse(H, y, R, x_MMSE); - - /* Print MSE */ printf("%f\n", mse(x, x_MMSE)); - /* Interleave the result */ - data_t res[NUM_TX_ANT][NUM_SC][2]; - for (i = 0; i < NUM_TX_ANT; ++i) - for (j = 0; j < NUM_SC; ++j){ - res[i][j][0] = x_MMSE[i][j].re; - res[i][j][1] = x_MMSE[i][j].im; - } - /* Save the result */ FILE* f = fopen("out/x_mmse.bin", "w"); if (!f) { fprintf(stderr, "Error: could not open file out/x_MMSE.bin\n"); exit(8); } - if ((fwrite(res, sizeof(data_t), NUM_TX_ANT * NUM_SC * 2, f)) != NUM_TX_ANT * NUM_SC * 2) { + if ((fwrite(x_MMSE, sizeof(data_t), NUM_TX_ANT * NUM_SC * 2, f)) != NUM_TX_ANT * NUM_SC * 2) { fprintf(stderr, "Error: could not write file out/x_MMSE.bin\n"); exit(8); } diff --git a/script/gen_data.py b/script/gen_data.py @@ -58,10 +58,10 @@ class Section: sections = [ - Section("x_raw", "data/x.txt", "3", 32, x.size * 2), - Section("H_raw", "data/H.txt", "3", 32, H.size * 2), - Section("R_raw", "data/R.txt", "3", 32, R.size * 2), - Section("y_raw", "data/y.txt", "3", 32, y.size * 2), + Section("x", "data/x.txt", "3", 32, x.size * 2), + Section("H", "data/H.txt", "3", 32, H.size * 2), + Section("R", "data/R.txt", "3", 32, R.size * 2), + Section("y", "data/y.txt", "3", 32, y.size * 2), ] # Create "data" directory if it does not exist diff --git a/src/mmserv.c b/src/mmserv.c @@ -1,5 +1,43 @@ #include "../include/mmserv.h" +#include "../../common/rivec/vector_defines.h" +#include "riscv_vector.h" + +/* Profiling */ +#ifdef DEBUG +#include <stdio.h> + +#ifdef _WIN32 +#include <intrin.h> +#else +#include <x86intrin.h> +#endif + +static uint64_t start, stop; +void start_timer() +{ + start = __rdtsc(); +} +void stop_timer() +{ + stop = __rdtsc(); +} +uint64_t get_timer() +{ + return stop - start; +} + +#define TIME(msg, func, ...) \ + start_timer(); \ + func(__VA_ARGS__); \ + stop_timer(); \ + printf(msg, get_timer()); + +#else +#define TIME(msg, func, ...) func(__VA_ARGS__); + +#endif + /* * Complex functions */ @@ -179,6 +217,7 @@ void ccholesky_TxTx( for (j = i+1; j < NUM_TX_ANT; ++j) for (k = 0; k < NUM_SC; ++k) L[i][j][k] = cmake(0, 0); + /* Calculate diagonal and lower triangular entries */ for (i = 0; i < NUM_TX_ANT; ++i) for (j = 0; j < i+1; ++j) @@ -215,7 +254,6 @@ void ccholesky_nosqrt_TxTx( for (k = 0; k < NUM_SC; ++k) L[i][i][k] = cmake(1, 0); - /* Calculate the lower triangle of the result */ for (i = 0; i < NUM_TX_ANT; ++i){ /* L_ij = (A_ij - \sum_{l=0}^{j-1} L_il L_jl^* D_k) / D_j */ @@ -225,7 +263,7 @@ void ccholesky_nosqrt_TxTx( for (l = 0; l < j; ++l) for (k = 0; k < NUM_SC; ++k){ /* TODO there is one sum for all scs what leads to contamination. Fix that */ - complex t = cmul(L[i][l][k], cconj(L[j][l][k])); + t = cmul(L[i][l][k], cconj(L[j][l][k])); t = cmul(t, D[l][l][k]); cadd_acc(sum, t); } @@ -304,26 +342,51 @@ void mmse( IN complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC], OUT complex x_MMSE[NUM_TX_ANT][NUM_SC]) { - /* H^H */ complex HH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC]; - cmat_hermitian_transpose_RxTx(H, HH); - /* H^H*H */ complex HH_H[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - cmatmul_TxRx_RxTx(HH, H, HH_H); - /* H^H*H + R */ - cmatadd_TxTx(HH_H, R, HH_H); - /* L: (H^H*H + R) = L*L^H */ complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - ccholesky_TxTx(HH_H, L); - /* z: L*z = H^H*y */ complex HHy[NUM_TX_ANT][NUM_SC]; - cmatvecmul_TxRx(HH, y, HHy); complex z[NUM_TX_ANT][NUM_SC]; - cforwardsub_TxTx(L, HHy, z); - /* x_MMSE: L^H*x_MMSE = z */ complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - cmat_hermitian_transpose_TxTx(L, LH); - cbackwardsub_TxTx(LH, z, x_MMSE); + + /* H^H */ + TIME( + "Hermitian transpose (RxTx): %ld\n", + cmat_hermitian_transpose_RxTx, + H, HH); + /* H^H*H */ + TIME( + "Matmul (TxRx x RxTx): %ld\n", + cmatmul_TxRx_RxTx, + HH, H, HH_H); + /* H^H*H + R */ + TIME( + "Matadd (TxTx + TxTx): %ld\n", + cmatadd_TxTx, + HH_H, R, HH_H); + /* L: (H^H*H + R) = L*L^H */ + TIME( + "Cholesky (TxTx): %ld\n", + ccholesky_TxTx, + HH_H, L); + /* z: L*z = H^H*y */ + TIME( + "Matrix-vector multiplication (TxRx x Rx): %ld\n", + cmatvecmul_TxRx, + HH, y, HHy); + TIME( + "Forward substitution (TxTx): %ld\n", + cforwardsub_TxTx, + L, HHy, z); + /* x_MMSE: L^H*x_MMSE = z */ + TIME( + "Hermitian transpose (TxTx): %ld\n", + cmat_hermitian_transpose_TxTx, + L, LH); + TIME( + "Backward substitution (TxTx): %ld\n", + cbackwardsub_TxTx, + LH, z, x_MMSE); } void mmse_nosqrt( @@ -334,28 +397,66 @@ void mmse_nosqrt( { /* H^H */ complex HH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC]; - cmat_hermitian_transpose_RxTx(H, HH); - /* H^H*H */ complex HH_H[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - cmatmul_TxRx_RxTx(HH, H, HH_H); - /* H^H*H + R */ - cmatadd_TxTx(HH_H, R, HH_H); - /* L, D: (H^H*H + R) = L*D*L^H */ complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; /* TODO D should be a vector to save memory*/ - ccholesky_nosqrt_TxTx(HH_H, L, D); - /* z: L*z = H^H*y */ + /* TODO D should be a vector to save memory */ + complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; complex HHy[NUM_TX_ANT][NUM_SC]; - cmatvecmul_TxRx(HH, y, HHy); complex z[NUM_TX_ANT][NUM_SC]; - cforwardsub_TxTx(L, HHy, z); + complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; + + TIME( + "Hermitian transpose (RxTx): %ld\n", + cmat_hermitian_transpose_RxTx, + H, HH); + TIME( + "Matmul (TxRx x RxTx): %ld\n", + cmatmul_TxRx_RxTx, + HH, H, HH_H); + TIME( + "Matadd (TxTx + TxTx): %ld\n", + cmatadd_TxTx, + HH_H, R, HH_H); + /* L, D: (H^H*H + R) = L*D*L^H */ + TIME( + "Cholesky nosqrt (TxTx): %ld\n", + ccholesky_nosqrt_TxTx, + HH_H, L, D); + /* z: L*z = H^H*y */ + TIME( + "Matrix-vector multiplication (TxRx x Rx): %ld\n", + cmatvecmul_TxRx, + HH, y, HHy); + TIME( + "Forward substitution (TxTx): %ld\n", + cforwardsub_TxTx, + L, HHy, z); /* x_MMSE: L^H*x_MMSE = D^-1*z */ + start_timer(); for (uint32_t i = 0; i != NUM_TX_ANT; ++i) for (uint32_t j = 0; j != NUM_SC; ++j) z[i][j] = cdiv(z[i][j], D[i][i][j]); - complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; - cmat_hermitian_transpose_TxTx(L, LH); - cbackwardsub_TxTx(L, z, x_MMSE); + stop_timer(); + printf("Division (TxTx): %ld\n", get_timer()); + TIME( + "Hermitian transpose (TxTx): %ld\n", + cmat_hermitian_transpose_TxTx, + L, LH); + TIME( + "Backward substitution (TxTx): %ld\n", + cbackwardsub_TxTx, + LH, z, x_MMSE); +} + +acc_t mse( + IN complex x[NUM_TX_ANT][NUM_SC], + IN complex x_MMSE[NUM_TX_ANT][NUM_SC]) +{ + acc_t sum = 0; + for (uint32_t i = 0; i != NUM_TX_ANT; ++i) + for (uint32_t j = 0; j != NUM_SC; ++j) + sum += cabs2(csub(x[i][j], x_MMSE[i][j])); + return sum / (NUM_TX_ANT * NUM_SC); } acc_t mse(