zita-convolver: always scope branches and loops

This commit is contained in:
Robin Gareus 2021-03-29 15:34:44 +02:00
parent eebe4e467b
commit a270dc76d6
Signed by: rgareus
GPG Key ID: A090BCE02CF57F04
2 changed files with 150 additions and 78 deletions

View File

@ -37,8 +37,9 @@ static float*
calloc_real (uint32_t k)
{
float* p = fftwf_alloc_real (k);
if (!p)
if (!p) {
throw (Converror (Converror::MEM_ALLOC));
}
memset (p, 0, k * sizeof (float));
return p;
}
@ -47,8 +48,9 @@ static fftwf_complex*
calloc_complex (uint32_t k)
{
fftwf_complex* p = fftwf_alloc_complex (k);
if (!p)
if (!p) {
throw (Converror (Converror::MEM_ALLOC));
}
memset (p, 0, k * sizeof (fftwf_complex));
return p;
}
@ -85,8 +87,9 @@ Convproc::set_options (uint32_t options)
void
Convproc::set_skipcnt (uint32_t skipcnt)
{
if ((_quantum == _minpart) && (_quantum == _maxpart))
if ((_quantum == _minpart) && (_quantum == _maxpart)) {
_skipcnt = skipcnt;
}
}
int
@ -102,8 +105,9 @@ Convproc::configure (uint32_t ninp,
int prio, step, d, r, s;
float cfft, cmac;
if (_state != ST_IDLE)
if (_state != ST_IDLE) {
return Converror::BAD_STATE;
}
if ( (ninp < 1) || (ninp > MAXINP)
|| (nout < 1) || (nout > MAXOUT)
|| (quantum & (quantum - 1))
@ -115,25 +119,30 @@ Convproc::configure (uint32_t ninp,
|| (minpart > MAXDIVIS * quantum)
|| (maxpart & (maxpart - 1))
|| (maxpart > MAXPART)
|| (maxpart < minpart))
|| (maxpart < minpart)) {
return Converror::BAD_PARAM;
}
nmin = (ninp < nout) ? ninp : nout;
if (density <= 0.0f)
if (density <= 0.0f) {
density = 1.0f / nmin;
if (density > 1.0f)
}
if (density > 1.0f) {
density = 1.0f;
cfft = _fft_cost * (ninp + nout);
cmac = _mac_cost * ninp * nout * density;
step = (cfft < 4 * cmac) ? 1 : 2;
}
cfft = _fft_cost * (ninp + nout);
cmac = _mac_cost * ninp * nout * density;
step = (cfft < 4 * cmac) ? 1 : 2;
if (step == 2) {
r = maxpart / minpart;
s = (r & 0xAAAA) ? 1 : 2;
} else
} else {
s = 1;
nmin = (s == 1) ? 2 : 6;
if (minpart == quantum)
}
nmin = (s == 1) ? 2 : 6;
if (minpart == quantum) {
nmin++;
}
prio = 0;
size = quantum;
while (size < minpart) {
@ -148,8 +157,9 @@ Convproc::configure (uint32_t ninp,
r = 1 << s;
d = npar - nmin;
d = d - (d + r - 1) / r;
if (cfft < d * cmac)
if (cfft < d * cmac) {
npar = nmin;
}
}
_convlev[pind] = new Convlevel ();
_convlev[pind]->configure (prio, offs, npar, size, _options);
@ -171,10 +181,12 @@ Convproc::configure (uint32_t ninp,
_latecnt = 0;
_inpsize = 2 * size;
for (i = 0; i < ninp; i++)
for (i = 0; i < ninp; i++) {
_inpbuff[i] = new float[_inpsize];
for (i = 0; i < nout; i++)
}
for (i = 0; i < nout; i++) {
_outbuff[i] = new float[_minpart];
}
} catch (...) {
cleanup ();
return Converror::MEM_ALLOC;
@ -194,10 +206,13 @@ Convproc::impdata_create (uint32_t inp,
{
uint32_t j;
if (_state != ST_STOP)
if (_state != ST_STOP) {
return Converror::BAD_STATE;
if ((inp >= _ninp) || (out >= _nout))
}
if ((inp >= _ninp) || (out >= _nout)) {
return Converror::BAD_PARAM;
}
try {
for (j = 0; j < _nlevels; j++) {
_convlev[j]->impdata_write (inp, out, step, data, ind0, ind1, true);
@ -214,10 +229,12 @@ Convproc::impdata_clear (uint32_t inp, uint32_t out)
{
uint32_t k;
if (_state < ST_STOP)
if (_state < ST_STOP) {
return Converror::BAD_STATE;
for (k = 0; k < _nlevels; k++)
}
for (k = 0; k < _nlevels; k++) {
_convlev[k]->impdata_clear (inp, out);
}
return 0;
}
@ -231,10 +248,12 @@ Convproc::impdata_update (uint32_t inp,
{
uint32_t j;
if (_state < ST_STOP)
if (_state < ST_STOP) {
return Converror::BAD_STATE;
if ((inp >= _ninp) || (out >= _nout))
}
if ((inp >= _ninp) || (out >= _nout)) {
return Converror::BAD_PARAM;
}
for (j = 0; j < _nlevels; j++) {
_convlev[j]->impdata_write (inp, out, step, data, ind0, ind1, false);
}
@ -249,14 +268,18 @@ Convproc::impdata_link (uint32_t inp1,
{
uint32_t j;
if ((inp1 >= _ninp) || (out1 >= _nout))
if ((inp1 >= _ninp) || (out1 >= _nout)) {
return Converror::BAD_PARAM;
if ((inp2 >= _ninp) || (out2 >= _nout))
}
if ((inp2 >= _ninp) || (out2 >= _nout)) {
return Converror::BAD_PARAM;
if ((inp1 == inp2) && (out1 == out2))
}
if ((inp1 == inp2) && (out1 == out2)) {
return Converror::BAD_PARAM;
if (_state != ST_STOP)
}
if (_state != ST_STOP) {
return Converror::BAD_STATE;
}
try {
for (j = 0; j < _nlevels; j++) {
_convlev[j]->impdata_link (inp1, out1, inp2, out2);
@ -273,14 +296,18 @@ Convproc::reset (void)
{
uint32_t k;
if (_state == ST_IDLE)
if (_state == ST_IDLE) {
return Converror::BAD_STATE;
for (k = 0; k < _ninp; k++)
}
for (k = 0; k < _ninp; k++) {
memset (_inpbuff[k], 0, _inpsize * sizeof (float));
for (k = 0; k < _nout; k++)
}
for (k = 0; k < _nout; k++) {
memset (_outbuff[k], 0, _minpart * sizeof (float));
for (k = 0; k < _nlevels; k++)
}
for (k = 0; k < _nlevels; k++) {
_convlev[k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
}
return 0;
}
@ -289,8 +316,9 @@ Convproc::start_process (int abspri, int policy)
{
uint32_t k;
if (_state != ST_STOP)
if (_state != ST_STOP) {
return Converror::BAD_STATE;
}
_latecnt = 0;
_inpoffs = 0;
_outoffs = 0;
@ -309,30 +337,37 @@ Convproc::process (bool sync)
uint32_t k;
int f = 0;
if (_state != ST_PROC)
if (_state != ST_PROC) {
return 0;
}
_inpoffs += _quantum;
if (_inpoffs == _inpsize)
if (_inpoffs == _inpsize) {
_inpoffs = 0;
}
_outoffs += _quantum;
if (_outoffs == _minpart) {
_outoffs = 0;
for (k = 0; k < _nout; k++)
for (k = 0; k < _nout; k++) {
memset (_outbuff[k], 0, _minpart * sizeof (float));
for (k = 0; k < _nlevels; k++)
}
for (k = 0; k < _nlevels; k++) {
f |= _convlev[k]->readout (sync, _skipcnt);
if (_skipcnt < _minpart)
}
if (_skipcnt < _minpart) {
_skipcnt = 0;
else
} else {
_skipcnt -= _minpart;
}
if (f) {
if (++_latecnt >= 5) {
if (~_options & OPT_LATE_CONTIN)
if (~_options & OPT_LATE_CONTIN) {
stop_process ();
}
f |= FL_LOAD;
}
} else
} else {
_latecnt = 0;
}
}
return f;
}
@ -342,10 +377,12 @@ Convproc::stop_process (void)
{
uint32_t k;
if (_state != ST_PROC)
if (_state != ST_PROC) {
return Converror::BAD_STATE;
for (k = 0; k < _nlevels; k++)
}
for (k = 0; k < _nlevels; k++) {
_convlev[k]->stop ();
}
_state = ST_WAIT;
return 0;
}
@ -393,8 +430,9 @@ Convproc::check_stop (void)
{
uint32_t k;
for (k = 0; (k < _nlevels) && (_convlev[k]->_stat == Convlevel::ST_IDLE); k++)
for (k = 0; (k < _nlevels) && (_convlev[k]->_stat == Convlevel::ST_IDLE); k++) {
;
}
if (k == _nlevels) {
_state = ST_STOP;
return true;
@ -407,8 +445,9 @@ Convproc::print (FILE* F)
{
uint32_t k;
for (k = 0; k < _nlevels; k++)
for (k = 0; k < _nlevels; k++) {
_convlev[k]->print (F);
}
}
#ifdef ENABLE_VECTOR_MODE
@ -458,8 +497,9 @@ Convlevel::configure (int prio,
_freq_data = calloc_complex (_parsize + 1);
_plan_r2c = fftwf_plan_dft_r2c_1d (2 * _parsize, _time_data, _freq_data, fftwopt);
_plan_c2r = fftwf_plan_dft_c2r_1d (2 * _parsize, _freq_data, _time_data, fftwopt);
if (_plan_r2c && _plan_c2r)
if (_plan_r2c && _plan_c2r) {
return;
}
throw (Converror (Converror::MEM_ALLOC));
}
@ -481,19 +521,23 @@ Convlevel::impdata_write (uint32_t inp,
n = i1 - i0;
i0 = _offs - i0;
i1 = i0 + _npar * _parsize;
if ((i0 >= n) || (i1 <= 0))
if ((i0 >= n) || (i1 <= 0)) {
return;
}
if (create) {
M = findmacnode (inp, out, true);
if (M == 0 || M->_link)
if (M == 0 || M->_link) {
return;
if (M->_fftb == 0)
}
if (M->_fftb == 0) {
M->alloc_fftb (_npar);
}
} else {
M = findmacnode (inp, out, false);
if (M == 0 || M->_link || M->_fftb == 0)
if (M == 0 || M->_link || M->_fftb == 0) {
return;
}
}
norm = 0.5f / _parsize;
@ -508,12 +552,14 @@ Convlevel::impdata_write (uint32_t inp,
memset (_prep_data, 0, 2 * _parsize * sizeof (float));
j0 = (i0 < 0) ? 0 : i0;
j1 = (i1 > n) ? n : i1;
for (j = j0; j < j1; j++)
for (j = j0; j < j1; j++) {
_prep_data[j - i0] = norm * data[j * step];
}
fftwf_execute_dft_r2c (_plan_r2c, _prep_data, _freq_data);
#ifdef ENABLE_VECTOR_MODE
if (_options & OPT_VECTOR_MODE)
if (_options & OPT_VECTOR_MODE) {
fftswap (_freq_data);
}
#endif
for (j = 0; j <= (int)_parsize; j++) {
fftb[j][0] += _freq_data[j][0];
@ -532,8 +578,9 @@ Convlevel::impdata_clear (uint32_t inp, uint32_t out)
Macnode* M;
M = findmacnode (inp, out, false);
if (M == 0 || M->_link || M->_fftb == 0)
if (M == 0 || M->_link || M->_fftb == 0) {
return;
}
for (i = 0; i < _npar; i++) {
if (M->_fftb[i]) {
memset (M->_fftb[i], 0, (_parsize + 1) * sizeof (fftwf_complex));
@ -551,8 +598,9 @@ Convlevel::impdata_link (uint32_t inp1,
Macnode* M2;
M1 = findmacnode (inp1, out1, false);
if (!M1)
if (!M1) {
return;
}
M2 = findmacnode (inp2, out2, true);
M2->free_fftb ();
M2->_link = M1;
@ -610,10 +658,12 @@ Convlevel::start (int abspri, int policy)
min = sched_get_priority_min (policy);
max = sched_get_priority_max (policy);
abspri += _prio;
if (abspri > max)
if (abspri > max) {
abspri = max;
if (abspri < min)
abspri = min;
}
if (abspri < min) {
abspri = min;
}
parm.sched_priority = abspri;
pthread_attr_init (&attr);
pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
@ -731,15 +781,18 @@ Convlevel::process (bool skip)
for (X = _inp_list; X; X = X->_next) {
inpd = _inpbuff[X->_inp];
if (n1)
if (n1) {
memcpy (_time_data, inpd + i1, n1 * sizeof (float));
if (n2)
}
if (n2) {
memcpy (_time_data + n1, inpd, n2 * sizeof (float));
}
memset (_time_data + _parsize, 0, _parsize * sizeof (float));
fftwf_execute_dft_r2c (_plan_r2c, _time_data, X->_ffta[_ptind]);
#ifdef ENABLE_VECTOR_MODE
if (_options & OPT_VECTOR_MODE)
if (_options & OPT_VECTOR_MODE) {
fftswap (X->_ffta[_ptind]);
}
#endif
}
@ -781,28 +834,32 @@ Convlevel::process (bool skip)
}
}
}
if (i == 0)
if (i == 0) {
i = _npar;
}
i--;
}
}
#ifdef ENABLE_VECTOR_MODE
if (_options & OPT_VECTOR_MODE)
if (_options & OPT_VECTOR_MODE) {
fftswap (_freq_data);
}
#endif
fftwf_execute_dft_c2r (_plan_c2r, _freq_data, _time_data);
outd = Y->_buff[opi1];
for (k = 0; k < _parsize; k++)
for (k = 0; k < _parsize; k++) {
outd[k] += _time_data[k];
}
outd = Y->_buff[opi2];
memcpy (outd, _time_data + _parsize, _parsize * sizeof (float));
}
}
_ptind++;
if (_ptind == _npar)
if (_ptind == _npar) {
_ptind = 0;
}
}
int
@ -817,28 +874,32 @@ Convlevel::readout (bool sync, uint32_t skipcnt)
_outoffs = 0;
if (_stat == ST_PROC) {
while (_wait) {
if (sync)
if (sync) {
_done.wait ();
else if (_done.trywait ())
} else if (_done.trywait ()) {
break;
}
_wait--;
}
if (++_opind == 3)
if (++_opind == 3) {
_opind = 0;
}
_trig.post ();
_wait++;
} else {
process (skipcnt >= 2 * _parsize);
if (++_opind == 3)
if (++_opind == 3) {
_opind = 0;
}
}
}
for (Y = _out_list; Y; Y = Y->_next) {
p = Y->_buff[_opind] + _outoffs;
q = _outbuff[Y->_out];
for (i = 0; i < _outsize; i++)
for (i = 0; i < _outsize; i++) {
q[i] += p[i];
}
}
return (_wait > 1) ? _bits : 0;
@ -857,32 +918,38 @@ Convlevel::findmacnode (uint32_t inp, uint32_t out, bool create)
Outnode* Y;
Macnode* M;
for (X = _inp_list; X && (X->_inp != inp); X = X->_next)
for (X = _inp_list; X && (X->_inp != inp); X = X->_next) {
;
}
if (!X) {
if (!create)
if (!create) {
return 0;
}
X = new Inpnode (inp);
X->_next = _inp_list;
_inp_list = X;
X->alloc_ffta (_npar, _parsize);
}
for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next)
for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next) {
;
}
if (!Y) {
if (!create)
if (!create) {
return 0;
}
Y = new Outnode (out, _parsize);
Y->_next = _out_list;
_out_list = Y;
}
for (M = Y->_list; M && (M->_inpn != X); M = M->_next)
for (M = Y->_list; M && (M->_inpn != X); M = M->_next) {
;
}
if (!M) {
if (!create)
if (!create) {
return 0;
}
M = new Macnode (X);
M->_next = Y->_list;
Y->_list = M;
@ -939,8 +1006,9 @@ Inpnode::alloc_ffta (uint16_t npar, int32_t size)
void
Inpnode::free_ffta (void)
{
if (!_ffta)
if (!_ffta) {
return;
}
for (uint16_t i = 0; i < _npar; i++) {
fftwf_free (_ffta[i]);
}
@ -976,8 +1044,9 @@ Macnode::alloc_fftb (uint16_t npar)
void
Macnode::free_fftb (void)
{
if (!_fftb)
if (!_fftb) {
return;
}
for (uint16_t i = 0; i < _npar; i++) {
fftwf_free (_fftb[i]);
}

View File

@ -116,8 +116,9 @@ public:
{
pthread_mutex_lock (&_mutex);
_count++;
if (_count == 1)
if (_count == 1) {
pthread_cond_signal (&_cond);
}
pthread_mutex_unlock (&_mutex);
return 0;
}
@ -125,8 +126,9 @@ public:
int wait (void)
{
pthread_mutex_lock (&_mutex);
while (_count < 1)
while (_count < 1) {
pthread_cond_wait (&_cond, &_mutex);
}
_count--;
pthread_mutex_unlock (&_mutex);
return 0;
@ -134,8 +136,9 @@ public:
int trywait (void)
{
if (pthread_mutex_trylock (&_mutex))
if (pthread_mutex_trylock (&_mutex)) {
return -1;
}
if (_count < 1) {
pthread_mutex_unlock (&_mutex);
return -1;