paulstretch_cpp

PaulStretch
Log | Files | Refs | LICENSE

kiss_fft.c (11972B)


      1 /*
      2 Copyright (c) 2003-2004, Mark Borgerding
      3 
      4 All rights reserved.
      5 
      6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
      9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
     10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
     11 
     12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     13 */
     14 
     15 
     16 #include "_kiss_fft_guts.h"
     17 /* The guts header contains all the multiplication and addition macros that are defined for
     18  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
     19  */
     20 
     21 static kiss_fft_cpx *scratchbuf=NULL;
     22 static size_t nscratchbuf=0;
     23 static kiss_fft_cpx *tmpbuf=NULL;
     24 static size_t ntmpbuf=0;
     25 
     26 #define CHECKBUF(buf,nbuf,n) \
     27     do { \
     28         if ( nbuf < (size_t)(n) ) {\
     29             free(buf); \
     30             buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
     31             nbuf = (size_t)(n); \
     32         } \
     33    }while(0)
     34 
     35 
     36 static void kf_bfly2(
     37         kiss_fft_cpx * Fout,
     38         const size_t fstride,
     39         const kiss_fft_cfg st,
     40         int m
     41         )
     42 {
     43     kiss_fft_cpx * Fout2;
     44     kiss_fft_cpx * tw1 = st->twiddles;
     45     kiss_fft_cpx t;
     46     Fout2 = Fout + m;
     47     do{
     48         C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
     49 
     50         C_MUL (t,  *Fout2 , *tw1);
     51         tw1 += fstride;
     52         C_SUB( *Fout2 ,  *Fout , t );
     53         C_ADDTO( *Fout ,  t );
     54         ++Fout2;
     55         ++Fout;
     56     }while (--m);
     57 }
     58 
     59 static void kf_bfly4(
     60         kiss_fft_cpx * Fout,
     61         const size_t fstride,
     62         const kiss_fft_cfg st,
     63         const size_t m
     64         )
     65 {
     66     kiss_fft_cpx *tw1,*tw2,*tw3;
     67     kiss_fft_cpx scratch[6];
     68     size_t k=m;
     69     const size_t m2=2*m;
     70     const size_t m3=3*m;
     71 
     72     tw3 = tw2 = tw1 = st->twiddles;
     73 
     74     do {
     75         C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
     76 
     77         C_MUL(scratch[0],Fout[m] , *tw1 );
     78         C_MUL(scratch[1],Fout[m2] , *tw2 );
     79         C_MUL(scratch[2],Fout[m3] , *tw3 );
     80 
     81         C_SUB( scratch[5] , *Fout, scratch[1] );
     82         C_ADDTO(*Fout, scratch[1]);
     83         C_ADD( scratch[3] , scratch[0] , scratch[2] );
     84         C_SUB( scratch[4] , scratch[0] , scratch[2] );
     85         C_SUB( Fout[m2], *Fout, scratch[3] );
     86         tw1 += fstride;
     87         tw2 += fstride*2;
     88         tw3 += fstride*3;
     89         C_ADDTO( *Fout , scratch[3] );
     90 
     91         if(st->inverse) {
     92             Fout[m].r = scratch[5].r - scratch[4].i;
     93             Fout[m].i = scratch[5].i + scratch[4].r;
     94             Fout[m3].r = scratch[5].r + scratch[4].i;
     95             Fout[m3].i = scratch[5].i - scratch[4].r;
     96         }else{
     97             Fout[m].r = scratch[5].r + scratch[4].i;
     98             Fout[m].i = scratch[5].i - scratch[4].r;
     99             Fout[m3].r = scratch[5].r - scratch[4].i;
    100             Fout[m3].i = scratch[5].i + scratch[4].r;
    101         }
    102         ++Fout;
    103     }while(--k);
    104 }
    105 
    106 static void kf_bfly3(
    107          kiss_fft_cpx * Fout,
    108          const size_t fstride,
    109          const kiss_fft_cfg st,
    110          size_t m
    111          )
    112 {
    113      size_t k=m;
    114      const size_t m2 = 2*m;
    115      kiss_fft_cpx *tw1,*tw2;
    116      kiss_fft_cpx scratch[5];
    117      kiss_fft_cpx epi3;
    118      epi3 = st->twiddles[fstride*m];
    119 
    120      tw1=tw2=st->twiddles;
    121 
    122      do{
    123          C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
    124 
    125          C_MUL(scratch[1],Fout[m] , *tw1);
    126          C_MUL(scratch[2],Fout[m2] , *tw2);
    127 
    128          C_ADD(scratch[3],scratch[1],scratch[2]);
    129          C_SUB(scratch[0],scratch[1],scratch[2]);
    130          tw1 += fstride;
    131          tw2 += fstride*2;
    132 
    133          Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
    134          Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
    135 
    136          C_MULBYSCALAR( scratch[0] , epi3.i );
    137 
    138          C_ADDTO(*Fout,scratch[3]);
    139 
    140          Fout[m2].r = Fout[m].r + scratch[0].i;
    141          Fout[m2].i = Fout[m].i - scratch[0].r;
    142 
    143          Fout[m].r -= scratch[0].i;
    144          Fout[m].i += scratch[0].r;
    145 
    146          ++Fout;
    147      }while(--k);
    148 }
    149 
    150 static void kf_bfly5(
    151         kiss_fft_cpx * Fout,
    152         const size_t fstride,
    153         const kiss_fft_cfg st,
    154         int m
    155         )
    156 {
    157     kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
    158     int u;
    159     kiss_fft_cpx scratch[13];
    160     kiss_fft_cpx * twiddles = st->twiddles;
    161     kiss_fft_cpx *tw;
    162     kiss_fft_cpx ya,yb;
    163     ya = twiddles[fstride*m];
    164     yb = twiddles[fstride*2*m];
    165 
    166     Fout0=Fout;
    167     Fout1=Fout0+m;
    168     Fout2=Fout0+2*m;
    169     Fout3=Fout0+3*m;
    170     Fout4=Fout0+4*m;
    171 
    172     tw=st->twiddles;
    173     for ( u=0; u<m; ++u ) {
    174         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
    175         scratch[0] = *Fout0;
    176 
    177         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
    178         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
    179         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
    180         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
    181 
    182         C_ADD( scratch[7],scratch[1],scratch[4]);
    183         C_SUB( scratch[10],scratch[1],scratch[4]);
    184         C_ADD( scratch[8],scratch[2],scratch[3]);
    185         C_SUB( scratch[9],scratch[2],scratch[3]);
    186 
    187         Fout0->r += scratch[7].r + scratch[8].r;
    188         Fout0->i += scratch[7].i + scratch[8].i;
    189 
    190         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
    191         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
    192 
    193         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
    194         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
    195 
    196         C_SUB(*Fout1,scratch[5],scratch[6]);
    197         C_ADD(*Fout4,scratch[5],scratch[6]);
    198 
    199         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
    200         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
    201         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
    202         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
    203 
    204         C_ADD(*Fout2,scratch[11],scratch[12]);
    205         C_SUB(*Fout3,scratch[11],scratch[12]);
    206 
    207         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
    208     }
    209 }
    210 
    211 /* perform the butterfly for one stage of a mixed radix FFT */
    212 static void kf_bfly_generic(
    213         kiss_fft_cpx * Fout,
    214         const size_t fstride,
    215         const kiss_fft_cfg st,
    216         int m,
    217         int p
    218         )
    219 {
    220     int u,k,q1,q;
    221     kiss_fft_cpx * twiddles = st->twiddles;
    222     kiss_fft_cpx t;
    223     int Norig = st->nfft;
    224 
    225     CHECKBUF(scratchbuf,nscratchbuf,p);
    226 
    227     for ( u=0; u<m; ++u ) {
    228         k=u;
    229         for ( q1=0 ; q1<p ; ++q1 ) {
    230             scratchbuf[q1] = Fout[ k  ];
    231             C_FIXDIV(scratchbuf[q1],p);
    232             k += m;
    233         }
    234 
    235         k=u;
    236         for ( q1=0 ; q1<p ; ++q1 ) {
    237             int twidx=0;
    238             Fout[ k ] = scratchbuf[0];
    239             for (q=1;q<p;++q ) {
    240                 twidx += fstride * k;
    241                 if (twidx>=Norig) twidx-=Norig;
    242                 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
    243                 C_ADDTO( Fout[ k ] ,t);
    244             }
    245             k += m;
    246         }
    247     }
    248 }
    249 
    250 static
    251 void kf_work(
    252         kiss_fft_cpx * Fout,
    253         const kiss_fft_cpx * f,
    254         const size_t fstride,
    255         int in_stride,
    256         int * factors,
    257         const kiss_fft_cfg st
    258         )
    259 {
    260     kiss_fft_cpx * Fout_beg=Fout;
    261     const int p=*factors++; /* the radix  */
    262     const int m=*factors++; /* stage's fft length/p */
    263     const kiss_fft_cpx * Fout_end = Fout + p*m;
    264 
    265     if (m==1) {
    266         do{
    267             *Fout = *f;
    268             f += fstride*in_stride;
    269         }while(++Fout != Fout_end );
    270     }else{
    271         do{
    272             kf_work( Fout , f, fstride*p, in_stride, factors,st);
    273             f += fstride*in_stride;
    274         }while( (Fout += m) != Fout_end );
    275     }
    276 
    277     Fout=Fout_beg;
    278 
    279     switch (p) {
    280         case 2: kf_bfly2(Fout,fstride,st,m); break;
    281         case 3: kf_bfly3(Fout,fstride,st,m); break; 
    282         case 4: kf_bfly4(Fout,fstride,st,m); break;
    283         case 5: kf_bfly5(Fout,fstride,st,m); break; 
    284         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
    285     }
    286 }
    287 
    288 /*  facbuf is populated by p1,m1,p2,m2, ...
    289     where 
    290     p[i] * m[i] = m[i-1]
    291     m0 = n                  */
    292 static 
    293 void kf_factor(int n,int * facbuf)
    294 {
    295     int p=4;
    296     double floor_sqrt;
    297     floor_sqrt = floor( sqrt((double)n) );
    298 
    299     /*factor out powers of 4, powers of 2, then any remaining primes */
    300     do {
    301         while (n % p) {
    302             switch (p) {
    303                 case 4: p = 2; break;
    304                 case 2: p = 3; break;
    305                 default: p += 2; break;
    306             }
    307             if (p > floor_sqrt)
    308                 p = n;          /* no more factors, skip to end */
    309         }
    310         n /= p;
    311         *facbuf++ = p;
    312         *facbuf++ = n;
    313     } while (n > 1);
    314 }
    315 
    316 /*
    317  *
    318  * User-callable function to allocate all necessary storage space for the fft.
    319  *
    320  * The return value is a contiguous block of memory, allocated with malloc.  As such,
    321  * It can be freed with free(), rather than a kiss_fft-specific function.
    322  * */
    323 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
    324 {
    325     kiss_fft_cfg st=NULL;
    326     size_t memneeded = sizeof(struct kiss_fft_state)
    327         + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
    328 
    329     if ( lenmem==NULL ) {
    330         st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
    331     }else{
    332         if (mem != NULL && *lenmem >= memneeded)
    333             st = (kiss_fft_cfg)mem;
    334         *lenmem = memneeded;
    335     }
    336     if (st) {
    337         int i;
    338         st->nfft=nfft;
    339         st->inverse = inverse_fft;
    340 
    341         for (i=0;i<nfft;++i) {
    342             const double pi=3.141592653589793238462643383279502884197169399375105820974944;
    343             double phase = -2*pi*i / nfft;
    344             if (st->inverse)
    345                 phase *= -1;
    346             kf_cexp(st->twiddles+i, phase );
    347         }
    348 
    349         kf_factor(nfft,st->factors);
    350     }
    351     return st;
    352 }
    353 
    354 
    355 
    356     
    357 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
    358 {
    359     if (fin == fout) {
    360         CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
    361         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
    362         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
    363     }else{
    364         kf_work( fout, fin, 1,in_stride, st->factors,st );
    365     }
    366 }
    367 
    368 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
    369 {
    370     kiss_fft_stride(cfg,fin,fout,1);
    371 }
    372 
    373 
    374 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the 
    375    buffers from CHECKBUF
    376  */ 
    377 void kiss_fft_cleanup(void)
    378 {
    379     free(scratchbuf);
    380     scratchbuf = NULL;
    381     nscratchbuf=0;
    382     free(tmpbuf);
    383     tmpbuf=NULL;
    384     ntmpbuf=0;
    385 }
    386 
    387 int kiss_fft_next_fast_size(int n)
    388 {
    389     while(1) {
    390         int m=n;
    391         while ( (m%2) == 0 ) m/=2;
    392         while ( (m%3) == 0 ) m/=3;
    393         while ( (m%5) == 0 ) m/=5;
    394         if (m<=1)
    395             break; /* n is completely factorable by twos, threes, and fives */
    396         n++;
    397     }
    398     return n;
    399 }