commit 0a85f271d5d79b211d5c8a6c38491485cd95c271
parent 504337abf84361c83f30fef1dac4ee1f2384aed1
Author: Egor Achkasov <eaachkasov@edu.hse.ru>
Date: Fri, 20 Dec 2024 16:19:22 +0100
Merge ara branch into master
Diffstat:
5 files changed, 149 insertions(+), 84 deletions(-)
diff --git a/include/define.h b/include/define.h
@@ -7,6 +7,8 @@
#define NOSQRT 1
+#define DEBUG
+
#define IN
#define OUT
diff --git a/include/mmserv.h b/include/mmserv.h
@@ -11,8 +11,8 @@
*/
typedef struct {
- data_t re;
- data_t im;
+ data_t re;
+ data_t im;
} complex;
/** Calculate MMSE estimation of x in y = H*x + n
diff --git a/main.c b/main.c
@@ -5,7 +5,7 @@
void load_data(
IN char* filepath,
IN size_t size,
- OUT data_t* buff)
+ OUT void* buff)
{
FILE* f = fopen(filepath, "r");
if (!f) {
@@ -23,66 +23,28 @@ int main() {
uint32_t i, j, k;
/* Load the data */
- data_t x_raw[NUM_TX_ANT][NUM_SC][2]; /* Transmitted signal */
- data_t H_raw[NUM_RX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Channel */
- data_t R_raw[NUM_TX_ANT][NUM_TX_ANT][NUM_SC][2]; /* Noise covariance matrix */
- data_t y_raw[NUM_RX_ANT][NUM_SC][2]; /* Received signal */
+ complex x[NUM_TX_ANT][NUM_SC]; /* Transmitted signal */
+ complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC]; /* Channel */
+ complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; /* Noise covariance matrix */
+ complex y[NUM_RX_ANT][NUM_SC]; /* Received signal */
- load_data("data/x.bin", NUM_TX_ANT * NUM_SC * 2, x_raw);
- load_data("data/H.bin", NUM_RX_ANT * NUM_TX_ANT * NUM_SC * 2, H_raw);
- load_data("data/R.bin", NUM_TX_ANT * NUM_TX_ANT * NUM_SC * 2, R_raw);
- load_data("data/y.bin", NUM_RX_ANT * NUM_SC * 2, y_raw);
-
- /* Cast the data into complex data structures */
- complex x[NUM_TX_ANT][NUM_SC];
- complex H[NUM_RX_ANT][NUM_TX_ANT][NUM_SC];
- complex y[NUM_RX_ANT][NUM_SC];
- complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_SC; ++j){
- x[i][j].re = x_raw[i][j][0];
- x[i][j].im = x_raw[i][j][1];
- }
- for (i = 0; i < NUM_RX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k){
- H[i][j][k].re = H_raw[i][j][k][0];
- H[i][j][k].im = H_raw[i][j][k][1];
- }
- for (i = 0; i < NUM_RX_ANT; ++i)
- for (j = 0; j < NUM_SC; ++j){
- y[i][j].re = y_raw[i][j][0];
- y[i][j].im = y_raw[i][j][1];
- }
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_TX_ANT; ++j)
- for (k = 0; k < NUM_SC; ++k){
- R[i][j][k].re = R_raw[i][j][k][0];
- R[i][j][k].im = R_raw[i][j][k][1];
- }
+ load_data("data/x.bin", NUM_TX_ANT * NUM_SC * 2, x);
+ load_data("data/H.bin", NUM_RX_ANT * NUM_TX_ANT * NUM_SC * 2, H);
+ load_data("data/R.bin", NUM_TX_ANT * NUM_TX_ANT * NUM_SC * 2, R);
+ load_data("data/y.bin", NUM_RX_ANT * NUM_SC * 2, y);
/* Calculate the MMSE approximation */
complex x_MMSE[NUM_TX_ANT][NUM_SC];
mmse(H, y, R, x_MMSE);
-
- /* Print MSE */
printf("%f\n", mse(x, x_MMSE));
- /* Interleave the result */
- data_t res[NUM_TX_ANT][NUM_SC][2];
- for (i = 0; i < NUM_TX_ANT; ++i)
- for (j = 0; j < NUM_SC; ++j){
- res[i][j][0] = x_MMSE[i][j].re;
- res[i][j][1] = x_MMSE[i][j].im;
- }
-
/* Save the result */
FILE* f = fopen("out/x_mmse.bin", "w");
if (!f) {
fprintf(stderr, "Error: could not open file out/x_MMSE.bin\n");
exit(8);
}
- if ((fwrite(res, sizeof(data_t), NUM_TX_ANT * NUM_SC * 2, f)) != NUM_TX_ANT * NUM_SC * 2) {
+ if ((fwrite(x_MMSE, sizeof(data_t), NUM_TX_ANT * NUM_SC * 2, f)) != NUM_TX_ANT * NUM_SC * 2) {
fprintf(stderr, "Error: could not write file out/x_MMSE.bin\n");
exit(8);
}
diff --git a/script/gen_data.py b/script/gen_data.py
@@ -58,10 +58,10 @@ class Section:
sections = [
- Section("x_raw", "data/x.txt", "3", 32, x.size * 2),
- Section("H_raw", "data/H.txt", "3", 32, H.size * 2),
- Section("R_raw", "data/R.txt", "3", 32, R.size * 2),
- Section("y_raw", "data/y.txt", "3", 32, y.size * 2),
+ Section("x", "data/x.txt", "3", 32, x.size * 2),
+ Section("H", "data/H.txt", "3", 32, H.size * 2),
+ Section("R", "data/R.txt", "3", 32, R.size * 2),
+ Section("y", "data/y.txt", "3", 32, y.size * 2),
]
# Create "data" directory if it does not exist
diff --git a/src/mmserv.c b/src/mmserv.c
@@ -1,5 +1,43 @@
#include "../include/mmserv.h"
+#include "../../common/rivec/vector_defines.h"
+#include "riscv_vector.h"
+
+/* Profiling */
+#ifdef DEBUG
+#include <stdio.h>
+
+#ifdef _WIN32
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+
+static uint64_t start, stop;
+void start_timer()
+{
+ start = __rdtsc();
+}
+void stop_timer()
+{
+ stop = __rdtsc();
+}
+uint64_t get_timer()
+{
+ return stop - start;
+}
+
+#define TIME(msg, func, ...) \
+ start_timer(); \
+ func(__VA_ARGS__); \
+ stop_timer(); \
+ printf(msg, get_timer());
+
+#else
+#define TIME(msg, func, ...) func(__VA_ARGS__);
+
+#endif
+
/*
* Complex functions
*/
@@ -179,6 +217,7 @@ void ccholesky_TxTx(
for (j = i+1; j < NUM_TX_ANT; ++j)
for (k = 0; k < NUM_SC; ++k)
L[i][j][k] = cmake(0, 0);
+
/* Calculate diagonal and lower triangular entries */
for (i = 0; i < NUM_TX_ANT; ++i)
for (j = 0; j < i+1; ++j)
@@ -215,7 +254,6 @@ void ccholesky_nosqrt_TxTx(
for (k = 0; k < NUM_SC; ++k)
L[i][i][k] = cmake(1, 0);
-
/* Calculate the lower triangle of the result */
for (i = 0; i < NUM_TX_ANT; ++i){
/* L_ij = (A_ij - \sum_{l=0}^{j-1} L_il L_jl^* D_k) / D_j */
@@ -225,7 +263,7 @@ void ccholesky_nosqrt_TxTx(
for (l = 0; l < j; ++l)
for (k = 0; k < NUM_SC; ++k){
/* TODO there is one sum for all scs what leads to contamination. Fix that */
- complex t = cmul(L[i][l][k], cconj(L[j][l][k]));
+ t = cmul(L[i][l][k], cconj(L[j][l][k]));
t = cmul(t, D[l][l][k]);
cadd_acc(sum, t);
}
@@ -304,26 +342,51 @@ void mmse(
IN complex R[NUM_TX_ANT][NUM_TX_ANT][NUM_SC],
OUT complex x_MMSE[NUM_TX_ANT][NUM_SC])
{
- /* H^H */
complex HH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
- cmat_hermitian_transpose_RxTx(H, HH);
- /* H^H*H */
complex HH_H[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- cmatmul_TxRx_RxTx(HH, H, HH_H);
- /* H^H*H + R */
- cmatadd_TxTx(HH_H, R, HH_H);
- /* L: (H^H*H + R) = L*L^H */
complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- ccholesky_TxTx(HH_H, L);
- /* z: L*z = H^H*y */
complex HHy[NUM_TX_ANT][NUM_SC];
- cmatvecmul_TxRx(HH, y, HHy);
complex z[NUM_TX_ANT][NUM_SC];
- cforwardsub_TxTx(L, HHy, z);
- /* x_MMSE: L^H*x_MMSE = z */
complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- cmat_hermitian_transpose_TxTx(L, LH);
- cbackwardsub_TxTx(LH, z, x_MMSE);
+
+ /* H^H */
+ TIME(
+ "Hermitian transpose (RxTx): %ld\n",
+ cmat_hermitian_transpose_RxTx,
+ H, HH);
+ /* H^H*H */
+ TIME(
+ "Matmul (TxRx x RxTx): %ld\n",
+ cmatmul_TxRx_RxTx,
+ HH, H, HH_H);
+ /* H^H*H + R */
+ TIME(
+ "Matadd (TxTx + TxTx): %ld\n",
+ cmatadd_TxTx,
+ HH_H, R, HH_H);
+ /* L: (H^H*H + R) = L*L^H */
+ TIME(
+ "Cholesky (TxTx): %ld\n",
+ ccholesky_TxTx,
+ HH_H, L);
+ /* z: L*z = H^H*y */
+ TIME(
+ "Matrix-vector multiplication (TxRx x Rx): %ld\n",
+ cmatvecmul_TxRx,
+ HH, y, HHy);
+ TIME(
+ "Forward substitution (TxTx): %ld\n",
+ cforwardsub_TxTx,
+ L, HHy, z);
+ /* x_MMSE: L^H*x_MMSE = z */
+ TIME(
+ "Hermitian transpose (TxTx): %ld\n",
+ cmat_hermitian_transpose_TxTx,
+ L, LH);
+ TIME(
+ "Backward substitution (TxTx): %ld\n",
+ cbackwardsub_TxTx,
+ LH, z, x_MMSE);
}
void mmse_nosqrt(
@@ -334,28 +397,66 @@ void mmse_nosqrt(
{
/* H^H */
complex HH[NUM_TX_ANT][NUM_RX_ANT][NUM_SC];
- cmat_hermitian_transpose_RxTx(H, HH);
- /* H^H*H */
complex HH_H[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- cmatmul_TxRx_RxTx(HH, H, HH_H);
- /* H^H*H + R */
- cmatadd_TxTx(HH_H, R, HH_H);
- /* L, D: (H^H*H + R) = L*D*L^H */
complex L[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC]; /* TODO D should be a vector to save memory*/
- ccholesky_nosqrt_TxTx(HH_H, L, D);
- /* z: L*z = H^H*y */
+ /* TODO D should be a vector to save memory */
+ complex D[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
complex HHy[NUM_TX_ANT][NUM_SC];
- cmatvecmul_TxRx(HH, y, HHy);
complex z[NUM_TX_ANT][NUM_SC];
- cforwardsub_TxTx(L, HHy, z);
+ complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
+
+ TIME(
+ "Hermitian transpose (RxTx): %ld\n",
+ cmat_hermitian_transpose_RxTx,
+ H, HH);
+ TIME(
+ "Matmul (TxRx x RxTx): %ld\n",
+ cmatmul_TxRx_RxTx,
+ HH, H, HH_H);
+ TIME(
+ "Matadd (TxTx + TxTx): %ld\n",
+ cmatadd_TxTx,
+ HH_H, R, HH_H);
+ /* L, D: (H^H*H + R) = L*D*L^H */
+ TIME(
+ "Cholesky nosqrt (TxTx): %ld\n",
+ ccholesky_nosqrt_TxTx,
+ HH_H, L, D);
+ /* z: L*z = H^H*y */
+ TIME(
+ "Matrix-vector multiplication (TxRx x Rx): %ld\n",
+ cmatvecmul_TxRx,
+ HH, y, HHy);
+ TIME(
+ "Forward substitution (TxTx): %ld\n",
+ cforwardsub_TxTx,
+ L, HHy, z);
/* x_MMSE: L^H*x_MMSE = D^-1*z */
+ start_timer();
for (uint32_t i = 0; i != NUM_TX_ANT; ++i)
for (uint32_t j = 0; j != NUM_SC; ++j)
z[i][j] = cdiv(z[i][j], D[i][i][j]);
- complex LH[NUM_TX_ANT][NUM_TX_ANT][NUM_SC];
- cmat_hermitian_transpose_TxTx(L, LH);
- cbackwardsub_TxTx(L, z, x_MMSE);
+ stop_timer();
+ printf("Division (TxTx): %ld\n", get_timer());
+ TIME(
+ "Hermitian transpose (TxTx): %ld\n",
+ cmat_hermitian_transpose_TxTx,
+ L, LH);
+ TIME(
+ "Backward substitution (TxTx): %ld\n",
+ cbackwardsub_TxTx,
+ LH, z, x_MMSE);
+}
+
+acc_t mse(
+ IN complex x[NUM_TX_ANT][NUM_SC],
+ IN complex x_MMSE[NUM_TX_ANT][NUM_SC])
+{
+ acc_t sum = 0;
+ for (uint32_t i = 0; i != NUM_TX_ANT; ++i)
+ for (uint32_t j = 0; j != NUM_SC; ++j)
+ sum += cabs2(csub(x[i][j], x_MMSE[i][j]));
+ return sum / (NUM_TX_ANT * NUM_SC);
}
acc_t mse(