mmserv

Minimum Mean Square Error detection on RISC-V Vector Extention
git clone https://git.ea.contact/mmserv
Log | Files | Refs | README

cforwardsub.c (3807B)


      1 #include "../include/common.h"
      2 
      3 #include <stddef.h>
      4 
      5 /** Complex forward substitution L*z = HHy
      6  * 
      7  * z_t = (HHy_t - \sum_{tt=0}^{t-1} L_{t tt} z_tt) / L_{t t} (for LL / float solution)
      8  * z_t = (HHy_t - \sum_{tt=0}^{t-1} L_{t tt} z_tt) (for LDL / fixed solution)
      9  * 
     10  * \global g_L lower triangular matrix. Shape [NUM_TX][NUM_TX][NUM_SC]
     11  * \global g_HHy vector. Shape [NUM_TX][NUM_SC]
     12  * \global g_z output vector. Shape [NUM_TX][NUM_SC]
     13  */
     14 void cforwardsub()
     15 {
     16   size_t t, tt, s;
     17   size_t off_L, off_HHy, off_z;
     18 
     19 #if defined(ARCH_x86) || defined(ARCH_rv)
     20   acc_t sum_re, sum_im;
     21 
     22   for (t = 0; t < NUM_TX; ++t) {
     23     for (s = 0; s < NUM_SC; ++s) {
     24       sum_re = sum_im = 0;
     25       for (tt = 0; tt < t; ++tt) {
     26         off_L = t * NUM_TX * NUM_SC + tt * NUM_SC + s;
     27         off_z = tt * NUM_SC + s;
     28         sum_re += (acc_t)g_L.re[off_L] * (acc_t)g_z.re[off_z]
     29                 - (acc_t)g_L.im[off_L] * (acc_t)g_z.im[off_z];
     30         sum_im += (acc_t)g_L.re[off_L] * (acc_t)g_z.im[off_z]
     31                 + (acc_t)g_L.im[off_L] * (acc_t)g_z.re[off_z];
     32       }
     33       off_HHy = t * NUM_SC + s;
     34       off_z = t * NUM_SC + s;
     35 #if defined(DATA_TYPE_float)
     36       off_L = t * NUM_TX * NUM_SC + t * NUM_SC + s;
     37       g_z.re[off_z] = (g_HHy.re[off_HHy] - (data_t)sum_re) / g_L.re[off_L];
     38       g_z.im[off_z] = (g_HHy.im[off_HHy] - (data_t)sum_im) / g_L.re[off_L];
     39 #elif defined(DATA_TYPE_fixed)
     40       g_z.re[off_z] = g_HHy.re[off_HHy] - (data_t)(sum_re >> FP_Q);
     41       g_z.im[off_z] = g_HHy.im[off_HHy] - (data_t)(sum_im >> FP_Q);
     42 #else
     43 #error "Unknown data type"
     44 #endif
     45     }
     46   }
     47 #elif defined(ARCH_rvv)
     48   size_t sz, vl;
     49 
     50   for (t = 0; t < NUM_TX; ++t) {
     51     sz = NUM_SC;
     52     s = 0;
     53 
     54     while (sz > 0) {
     55       off_HHy = t * NUM_SC + s;
     56 
     57       /* Initialize z as HHy */
     58       /* v0 - z real part */
     59       /* v4 - z imaginary part */
     60       __asm__ volatile (
     61         "vsetvli %0, %1, e32, m4, ta, ma\n"
     62         "vle32.v v0, (%2)\n"
     63         "vle32.v v4, (%3)\n"
     64         : "=r"(vl)
     65         : "r"(sz),
     66           "r"(&g_HHy.re[off_HHy]), "r"(&g_HHy.im[off_HHy])
     67       );
     68 
     69       for (tt = 0; tt != t; ++tt) {
     70         off_L = t * NUM_TX * NUM_SC + tt * NUM_SC + s;
     71         off_z = tt * NUM_SC + s;
     72 
     73         /* HHy_t - sum L_{t tt} * z_{tt} */
     74         __asm__ volatile (
     75           "vle32.v v8, (%0)\n"
     76           "vle32.v v12, (%1)\n"
     77           "vle32.v v16, (%2)\n"
     78           "vle32.v v20, (%3)\n"
     79 #if defined(DATA_TYPE_float)
     80           /* real part */
     81           "vfnmsac.vv v0, v8, v16\n"
     82           "vfmacc.vv v0, v12, v20\n"
     83           /* imaginary part */
     84           "vfnmsac.vv v4, v12, v16\n"
     85           "vfnmsac.vv v4, v8, v20\n"
     86 #elif defined(DATA_TYPE_fixed)
     87           /* real part */
     88           "vsmul.vv v24, v8, v16\n"
     89           "vssub.vv v0, v0, v24\n"
     90           "vsmul.vv v24, v12, v20\n"
     91           "vsadd.vv v0, v0, v24\n"
     92           /* imaginary part */
     93           "vsmul.vv v24, v12, v16\n"
     94           "vssub.vv v4, v4, v24\n"
     95           "vsmul.vv v24, v8, v20\n"
     96           "vssub.vv v4, v4, v24\n"
     97 #else
     98 #error "Unknown data type"
     99 #endif
    100           :
    101           : "r"(&g_L.re[off_L]), "r"(&g_L.im[off_L]),
    102             "r"(&g_z.re[off_z]), "r"(&g_z.im[off_z])
    103         );
    104       }
    105 
    106 #if defined(DATA_TYPE_float)
    107       /* Divide by L_ii */
    108       /* L_ii imaginary part is zero, so we ignore it */
    109       off_L = t * NUM_TX * NUM_SC + t * NUM_SC + s;
    110       __asm__ volatile (
    111         "vle32.v v8, (%0)\n"
    112         "vfdiv.vv v0, v0, v8\n"
    113         "vfdiv.vv v4, v4, v8\n"
    114         :
    115         : "r"(&g_L.re[off_L])
    116       );
    117 #endif
    118 
    119       /* Store result */
    120       off_z = t * NUM_SC + s;
    121       __asm__ volatile (
    122         "vse32.v v0, (%0)\n"
    123         "vse32.v v4, (%1)\n"
    124         :
    125         : "r"(&g_z.re[off_z]), "r"(&g_z.im[off_z])
    126       );
    127 
    128       s += vl;
    129       sz -= vl;
    130     }
    131   }
    132 
    133 #else
    134 #error "Unknown architecture"
    135 #endif
    136 }