diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..2ae80c5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,82 @@ +name: impulse CI +on: + push: + paths: + - 'tests/**' + - '**' + - 'impulse.nimble' + - '.github/workflows/ci.yml' + pull_request: + paths: + - 'tests/**' + - '**' + - 'impulse.nimble' + - '.github/workflows/ci.yml' + +jobs: + build: + strategy: + fail-fast: false + matrix: + branch: [version-1-6, devel] + target: [linux, macos, windows] + include: + - target: linux + builder: ubuntu-latest + - target: macos + builder: macos-latest + - target: windows + builder: windows-latest + name: '${{ matrix.target }} (${{ matrix.branch }})' + runs-on: ${{ matrix.builder }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + path: impulse + + - name: Setup Nim + uses: alaviss/setup-nim@0.1.1 + with: + path: nim + version: ${{ matrix.branch }} + + - name: Setup nimble & deps + shell: bash + run: | + cd impulse + nimble refresh -y + nimble install -y + + - name: Run tests + shell: bash + run: | + cd impulse + nimble -y test + + - name: Build docs + if: > + github.event_name == 'push' && github.ref == 'refs/heads/master' && + matrix.target == 'linux' && matrix.branch == 'devel' + shell: bash + run: | + cd impulse + branch=${{ github.ref }} + branch=${branch##*/} + nimble doc --project --outdir:docs \ + '--git.url:https://github.com/${{ github.repository }}' \ + '--git.commit:${{ github.sha }}' \ + "--git.devel:$branch" \ + impulse.nim + # Ignore failures for older Nim + cp docs/{the,}index.html || true + + - name: Publish docs + if: > + github.event_name == 'push' && github.ref == 'refs/heads/master' && + matrix.target == 'linux' && matrix.branch == 'devel' + uses: crazy-max/ghaction-github-pages@v1 + with: + build_dir: impulse/docs + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index 8f6fcd2..f8dddff 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,76 @@ Impulse will be a collection of signal processing primitives for Nim. ## FFT Currently this library only consists of an FFT module, which wraps -[PocketFFT](https://gitlab.mpcdf.mpg.de/mtr/pocketfft) in form of a -header-only C++ version, https://github.com/mreineck/pocketfft +[PocketFFT](https://gitlab.mpcdf.mpg.de/mtr/pocketfft). -To use it, please import the submodule directly. For example: +For the C++ backend an optimized version of PocketFFT is used, in the +form of a header-only version, + +https://github.com/mreineck/pocketfft + +For the C backend we use the PocketFFT directly (the C library linked +before). + +Note that the API differs slightly between the two. See the two +examples below. + +### C example + +The `pocketfft` submodule can to be imported manually using +`import impulse/fft/pocketfft` or one can simply import the `fft` +submodule as shown below. + +```nim +import impulse/fft + +template isClose(a1, a2, eps: untyped): untyped = + for i in 0 ..< a1.len: + doAssert abs(a1[i] - a2[i]) < eps, "Is: " & $a1[i] & " | " & $a2[i] + +block Array: + let dIn = [1.0, 2.0, 1.0, -1.0, 1.5] + let dOut = fft(dIn, forward = true) # forward or backwards? + echo dIn + echo dOut + isClose(dIn, fft(dOut, forward = false), eps = 1e-10) + # [1.0, 2.0, 1.0, -1.0, 1.5] + # @[4.5, 2.081559480312316, -1.651098762732523, -1.831559480312316, 1.608220406444071] +block Seq: + let dIn = @[1.0, 2.0, 1.0, -1.0, 1.5] + let dOut = fft(dIn, forward = true) # forward or backwards? + echo dIn + echo dOut + isClose(dIn, fft(dOut, forward = false), eps = 1e-10) + # @[1.0, 2.0, 1.0, -1.0, 1.5] + # @[4.5, 2.081559480312316, -1.651098762732523, -1.831559480312316, 1.608220406444071] +block Tensor: + let dIn = @[1.0, 2.0, 1.0, -1.0, 1.5].toTensor + let dOut = fft(dIn, forward = true) # forward or backwards? + echo dIn + echo dOut + isClose(dIn, fft(dOut, forward = false), eps = 1e-10) + # Tensor[system.float] of shape "[5]" on backend "Cpu" + # 1 2 1 -1 1.5 + # Tensor[system.float] of shape "[5]" on backend "Cpu" + # 4.5 2.08156 -1.6511 -1.83156 1.60822 +import std / complex +block Complex: + let dIn = [complex(1.0), complex(2.0), complex(1.0), complex(-1.0), complex(1.5)] + let dOut = fft(dIn, forward = true) # forward or backwards? + echo dIn + echo dOut + isClose(dIn, fft(dOut, forward = false), eps = 1e-10) + # [(1.0, 0.0), (2.0, 0.0), (1.0, 0.0), (-1.0, 0.0), (1.5, 0.0)] + # @[(4.5, 0.0), (2.081559480312316, -1.651098762732523), (-1.831559480312316, 1.608220406444071), (-1.831559480312316, -1.608220406444071), (2.081559480312316, 1.651098762732523)] +``` + + +### C++ example + +When compiling on the C++ backend, the API is a bit different: ```nim -import impulse/fft/pocketfft +import impulse/fft import std / complex let dIn = @[1.0, 2.0, 1.0, -1.0, 1.5] diff --git a/impulse.nimble b/impulse.nimble index 78afcaa..f9d160b 100644 --- a/impulse.nimble +++ b/impulse.nimble @@ -9,3 +9,8 @@ license = "MIT" # Dependencies requires "nim >= 1.6.0" +requires "arraymancer >= 0.7.28" + +task test, "Run standard tests": + exec "nim c -r tests/test_fft.nim" + exec "nim c -r tests/test_fft2.nim" diff --git a/impulse/fft.nim b/impulse/fft.nim index 1636957..2c6b9a8 100644 --- a/impulse/fft.nim +++ b/impulse/fft.nim @@ -4,3 +4,6 @@ # * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. + +import fft/pocketfft +export pocketfft diff --git a/impulse/fft/README.md b/impulse/fft/README.md index 42a3707..070fe94 100644 --- a/impulse/fft/README.md +++ b/impulse/fft/README.md @@ -8,6 +8,10 @@ and in particular uses dynamic memory allocation We currently use PocketFFT. +For the C++ backend we use a header only version of it. For the C +backend we use the regular C PocketFFT implementation (which we build +alongside the Nim program). + Commit 49b813232507470a047727712acda105b84c7815 has an initial pure Nim implementation that follows the PocketFFT algorithms which in turn follow diff --git a/impulse/fft/c_pocketfft/LICENSE.md b/impulse/fft/c_pocketfft/LICENSE.md new file mode 100644 index 0000000..1b5163d --- /dev/null +++ b/impulse/fft/c_pocketfft/LICENSE.md @@ -0,0 +1,25 @@ +Copyright (C) 2010-2019 Max-Planck-Society +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. +* Neither the name of the copyright holder nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/impulse/fft/c_pocketfft/README.md b/impulse/fft/c_pocketfft/README.md new file mode 100644 index 0000000..71179a6 --- /dev/null +++ b/impulse/fft/c_pocketfft/README.md @@ -0,0 +1,55 @@ +PocketFFT +--------- + +This is a heavily modified implementation of FFTPack [1,2], with the following +advantages: + +- strictly C99 compliant +- more accurate twiddle factor computation +- very fast plan generation +- worst case complexity for transform sizes with large prime factors is + `N*log(N)`, because Bluestein's algorithm [3] is used for these cases. + +License +------- + +3-clause BSD (see LICENSE.md) + + +Some code details +----------------- + +Twiddle factor computation: + +- making use of symmetries to reduce number of sin/cos evaluations +- all angles are reduced to the range `[0; pi/4]` for higher accuracy +- an adapted implementation of `sincospi()` is used, which actually computes + `sin(x)` and `(cos(x)-1)`. +- if `n` sin/cos pairs are required, the adjusted `sincospi()` is only called + `2*sqrt(n)` times; the remaining values are obtained by evaluating the + angle addition theorems in a numerically accurate way. + +Parallel invocation: + +- Plans only contain read-only data; all temporary arrays are allocated and + deallocated during an individual FFT execution. This means that a single plan + can be used in several threads at the same time. + +Efficient codelets are available for the factors: + +- 2, 3, 4, 5, 7, 11 for complex-valued FFTs +- 2, 3, 4, 5 for real-valued FFTs + +Larger prime factors are handled by somewhat less efficient, generic routines. + +For lengths with very large prime factors, Bluestein's algorithm is used, and +instead of an FFT of length `n`, a convolution of length `n2 >= 2*n-1` +is performed, where `n2` is chosen to be highly composite. + + +[1] Swarztrauber, P. 1982, Vectorizing the Fast Fourier Transforms + (New York: Academic Press), 51 + +[2] https://www.netlib.org/fftpack/ + +[3] https://en.wikipedia.org/wiki/Chirp_Z-transform diff --git a/impulse/fft/c_pocketfft/ffttest.c b/impulse/fft/c_pocketfft/ffttest.c new file mode 100644 index 0000000..27ffa72 --- /dev/null +++ b/impulse/fft/c_pocketfft/ffttest.c @@ -0,0 +1,101 @@ +/* + * This file is part of pocketfft. + * Licensed under a 3-clause BSD style license - see LICENSE.md + */ + +/* + * Test codes for pocketfft. + * + * Copyright (C) 2004-2018 Max-Planck-Society + * \author Martin Reinecke + */ + +#include +#include +#include +#include +#include "pocketfft_src.h" + +#define maxlen 10 //8192 + +static void fill_random (double *data, size_t length) + { + for (size_t m=0; mepsilon) + { + printf("problem at real length %i: %e\n",length,err); + ret = 1; + } + errsum+=err; + } + printf("errsum: %e\n",errsum); + return ret; + } + +static int test_complex(void) + { + double data[2*maxlen], odata[2*maxlen]; + fill_random (odata, 2*maxlen); + const double epsilon=2e-15; + int ret = 0; + double errsum=0; + for (int length=1; length<=maxlen; ++length) + { + memcpy (data,odata,2*length*sizeof(double)); + cfft_plan plan = make_cfft_plan (length); + cfft_forward(plan, data, 1.); + cfft_backward(plan, data, 1./length); + destroy_cfft_plan (plan); + double err = errcalc (data, odata, 2*length); + if (err>epsilon) + { + printf("problem at complex length %i: %e\n",length,err); + ret = 1; + } + errsum+=err; + } + printf("errsum: %e\n",errsum); + return ret; + } + +int main(void) + { + int ret = 0; + ret = test_real(); + ret += test_complex(); + return ret; + } diff --git a/impulse/fft/c_pocketfft/pocketfft.c b/impulse/fft/c_pocketfft/pocketfft.c new file mode 100644 index 0000000..de1af3e --- /dev/null +++ b/impulse/fft/c_pocketfft/pocketfft.c @@ -0,0 +1,2190 @@ +/* + * This file is part of pocketfft. + * Licensed under a 3-clause BSD style license - see LICENSE.md + */ + +/* + * Main implementation file. + * + * Copyright (C) 2004-2018 Max-Planck-Society + * \author Martin Reinecke + */ + +#include +#include + +#include "pocketfft.h" + +#define RALLOC(type,num) \ + ((type *)malloc((num)*sizeof(type))) +#define DEALLOC(ptr) \ + do { free(ptr); (ptr)=NULL; } while(0) + +#define SWAP(a,b,type) \ + do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0) + +#ifdef __GNUC__ +#define NOINLINE __attribute__((noinline)) +#define WARN_UNUSED_RESULT __attribute__ ((warn_unused_result)) +#else +#define NOINLINE +#define WARN_UNUSED_RESULT +#endif + +// adapted from https://stackoverflow.com/questions/42792939/ +// CAUTION: this function only works for arguments in the range [-0.25; 0.25]! +static void my_sincosm1pi (double a, double *restrict res) + { + double s = a * a; + /* Approximate cos(pi*x)-1 for x in [-0.25,0.25] */ + double r = -1.0369917389758117e-4; + r = fma (r, s, 1.9294935641298806e-3); + r = fma (r, s, -2.5806887942825395e-2); + r = fma (r, s, 2.3533063028328211e-1); + r = fma (r, s, -1.3352627688538006e+0); + r = fma (r, s, 4.0587121264167623e+0); + r = fma (r, s, -4.9348022005446790e+0); + double c = r*s; + /* Approximate sin(pi*x) for x in [-0.25,0.25] */ + r = 4.6151442520157035e-4; + r = fma (r, s, -7.3700183130883555e-3); + r = fma (r, s, 8.2145868949323936e-2); + r = fma (r, s, -5.9926452893214921e-1); + r = fma (r, s, 2.5501640398732688e+0); + r = fma (r, s, -5.1677127800499516e+0); + s = s * a; + r = r * s; + s = fma (a, 3.1415926535897931e+0, r); + res[0] = c; + res[1] = s; + } + +NOINLINE static void calc_first_octant(size_t den, double * restrict res) + { + size_t n = (den+4)>>3; + if (n==0) return; + res[0]=1.; res[1]=0.; + if (n==1) return; + size_t l1=(size_t)sqrt(n); + for (size_t i=1; in) end = n-start; + for (size_t i=1; i>2; + size_t i=0, idx1=0, idx2=2*ndone-2; + for (; i+1>1; + double * p = res+n-1; + calc_first_octant(n<<2, p); + int i4=0, in=n, i=0; + for (; i4<=in-i4; ++i, i4+=4) // octant 0 + { + res[2*i] = p[2*i4]; res[2*i+1] = p[2*i4+1]; + } + for (; i4-in <= 0; ++i, i4+=4) // octant 1 + { + int xm = in-i4; + res[2*i] = p[2*xm+1]; res[2*i+1] = p[2*xm]; + } + for (; i4<=3*in-i4; ++i, i4+=4) // octant 2 + { + int xm = i4-in; + res[2*i] = -p[2*xm+1]; res[2*i+1] = p[2*xm]; + } + for (; i>2; + if ((n&7)==0) + res[quart] = res[quart+1] = hsqt2; + for (size_t i=2, j=2*quart-2; i>1; + if ((n&3)==0) + for (size_t i=0; i>1))<<1)==n) + { res=2; n=tmp; } + + size_t limit=(size_t)sqrt(n+0.01); + for (size_t x=3; x<=limit; x+=2) + while (((tmp=(n/x))*x)==n) + { + res=x; + n=tmp; + limit=(size_t)sqrt(n+0.01); + } + if (n>1) res=n; + + return res; + } + +NOINLINE static double cost_guess (size_t n) + { + const double lfp=1.1; // penalty for non-hardcoded larger factors + size_t ni=n; + double result=0.; + size_t tmp; + while (((tmp=(n>>1))<<1)==n) + { result+=2; n=tmp; } + + size_t limit=(size_t)sqrt(n+0.01); + for (size_t x=3; x<=limit; x+=2) + while ((tmp=(n/x))*x==n) + { + result+= (x<=5) ? x : lfp*x; // penalize larger prime factors + n=tmp; + limit=(size_t)sqrt(n+0.01); + } + if (n>1) result+=(n<=5) ? n : lfp*n; + + return result*ni; + } + +/* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ +NOINLINE static size_t good_size(size_t n) + { + if (n<=6) return n; + + size_t bestfac=2*n; + for (size_t f2=1; f2=n) bestfac=f235711; + return bestfac; + } + +typedef struct cmplx { + double r,i; +} cmplx; + +#define NFCT 25 +typedef struct cfftp_fctdata + { + size_t fct; + cmplx *tw, *tws; + } cfftp_fctdata; + +typedef struct cfftp_plan_i + { + size_t length, nfct; + cmplx *mem; + cfftp_fctdata fct[NFCT]; + } cfftp_plan_i; +typedef struct cfftp_plan_i * cfftp_plan; + +#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; } +#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; } +#define SCALEC(a,b) { a.r*=b; a.i*=b; } +#define ROT90(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; } +#define ROTM90(a) { double tmp_=-a.r; a.r=a.i; a.i=tmp_; } +#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] +#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] +#define WA(x,i) wa[(i)-1+(x)*(ido-1)] +/* a = b*c */ +#define A_EQ_B_MUL_C(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; } +/* a = conj(b)*c*/ +#define A_EQ_CB_MUL_C(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; } + +#define PMSIGNC(a,b,c,d) { a.r=c.r+sign*d.r; a.i=c.i+sign*d.i; b.r=c.r-sign*d.r; b.i=c.i-sign*d.i; } +/* a = b*c */ +#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-sign*b.i*c.i; a.i=b.r*c.i+sign*b.i*c.r; } +/* a *= b */ +#define MULPMSIGNCEQ(a,b) { double xtmp=a.r; a.r=b.r*a.r-sign*b.i*a.i; a.i=b.r*a.i+sign*b.i*xtmp; } + +NOINLINE static void pass2b (size_t ido, size_t l1, const cmplx * restrict cc, + cmplx * restrict ch, const cmplx * restrict wa) + { + const size_t cdim=2; + + if (ido==1) + for (size_t k=0; kip) iwal-=ip; + cmplx xwal=wal[iwal]; + iwal+=l; if (iwal>ip) iwal-=ip; + cmplx xwal2=wal[iwal]; + for (size_t ik=0; ikip) iwal-=ip; + cmplx xwal=wal[iwal]; + for (size_t ik=0; iklength==1) return 0; + size_t len=plan->length; + size_t l1=1, nf=plan->nfct; + cmplx *ch = RALLOC(cmplx, len); + if (!ch) return -1; + cmplx *p1=c, *p2=ch; + + for(size_t k1=0; k1fct[k1].fct; + size_t l2=ip*l1; + size_t ido = len/l2; + if (ip==4) + sign>0 ? pass4b (ido, l1, p1, p2, plan->fct[k1].tw) + : pass4f (ido, l1, p1, p2, plan->fct[k1].tw); + else if(ip==2) + sign>0 ? pass2b (ido, l1, p1, p2, plan->fct[k1].tw) + : pass2f (ido, l1, p1, p2, plan->fct[k1].tw); + else if(ip==3) + sign>0 ? pass3b (ido, l1, p1, p2, plan->fct[k1].tw) + : pass3f (ido, l1, p1, p2, plan->fct[k1].tw); + else if(ip==5) + sign>0 ? pass5b (ido, l1, p1, p2, plan->fct[k1].tw) + : pass5f (ido, l1, p1, p2, plan->fct[k1].tw); + else if(ip==7) pass7 (ido, l1, p1, p2, plan->fct[k1].tw, sign); + else if(ip==11) pass11(ido, l1, p1, p2, plan->fct[k1].tw, sign); + else + { + if (passg(ido, ip, l1, p1, p2, plan->fct[k1].tw, plan->fct[k1].tws, sign)) + { DEALLOC(ch); return -1; } + SWAP(p1,p2,cmplx *); + } + SWAP(p1,p2,cmplx *); + l1=l2; + } + if (p1!=c) + { + if (fct!=1.) + for (size_t i=0; ilength; + size_t nfct=0; + while ((length%4)==0) + { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } + if ((length%2)==0) + { + length>>=1; + // factor 2 should be at the front of the factor list + if (nfct>=NFCT) return -1; + plan->fct[nfct++].fct=2; + SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); + } + size_t maxl=(size_t)(sqrt((double)length))+1; + for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; + plan->fct[nfct++].fct=divisor; + length/=divisor; + } + maxl=(size_t)(sqrt((double)length))+1; + } + if (length>1) plan->fct[nfct++].fct=length; + plan->nfct=nfct; + return 0; + } + +NOINLINE static size_t cfftp_twsize (cfftp_plan plan) + { + size_t twsize=0, l1=1; + for (size_t k=0; knfct; ++k) + { + size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); + twsize+=(ip-1)*(ido-1); + if (ip>11) + twsize+=ip; + l1*=ip; + } + return twsize; + } + +NOINLINE WARN_UNUSED_RESULT static int cfftp_comp_twiddle (cfftp_plan plan) + { + size_t length=plan->length; + double *twid = RALLOC(double, 2*length); + if (!twid) return -1; + sincos_2pibyn(length, twid); + size_t l1=1; + size_t memofs=0; + for (size_t k=0; knfct; ++k) + { + size_t ip=plan->fct[k].fct, ido= length/(l1*ip); + plan->fct[k].tw=plan->mem+memofs; + memofs+=(ip-1)*(ido-1); + for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+i-1].r = twid[2*j*l1*i]; + plan->fct[k].tw[(j-1)*(ido-1)+i-1].i = twid[2*j*l1*i+1]; + } + if (ip>11) + { + plan->fct[k].tws=plan->mem+memofs; + memofs+=ip; + for (size_t j=0; jfct[k].tws[j].r = twid[2*j*l1*ido]; + plan->fct[k].tws[j].i = twid[2*j*l1*ido+1]; + } + } + l1*=ip; + } + DEALLOC(twid); + return 0; + } + +static cfftp_plan make_cfftp_plan (size_t length) + { + if (length==0) return NULL; + cfftp_plan plan = RALLOC(cfftp_plan_i,1); + if (!plan) return NULL; + plan->length=length; + plan->nfct=0; + for (size_t i=0; ifct[i]=(cfftp_fctdata){0,0,0}; + plan->mem=0; + if (length==1) return plan; + if (cfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } + size_t tws=cfftp_twsize(plan); + plan->mem=RALLOC(cmplx,tws); + if (!plan->mem) { DEALLOC(plan); return NULL; } + if (cfftp_comp_twiddle(plan)!=0) + { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } + return plan; + } + +static void destroy_cfftp_plan (cfftp_plan plan) + { + DEALLOC(plan->mem); + DEALLOC(plan); + } + +typedef struct rfftp_fctdata + { + size_t fct; + double *tw, *tws; + } rfftp_fctdata; + +typedef struct rfftp_plan_i + { + size_t length, nfct; + double *mem; + rfftp_fctdata fct[NFCT]; + } rfftp_plan_i; +typedef struct rfftp_plan_i * rfftp_plan; + +#define WA(x,i) wa[(i)+(x)*(ido-1)] +#define PM(a,b,c,d) { a=c+d; b=c-d; } +/* (a+ib) = conj(c+id) * (e+if) */ +#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; } + +#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))] +#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))] + +NOINLINE static void radf2 (size_t ido, size_t l1, const double * restrict cc, + double * restrict ch, const double * restrict wa) + { + const size_t cdim=2; + + for (size_t k=0; k1) + { + for (size_t j=1, jc=ip-1; j=ip) iang-=ip; + double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; + iang+=l; if (iang>=ip) iang-=ip; + double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; + iang+=l; if (iang>=ip) iang-=ip; + double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; + iang+=l; if (iang>=ip) iang-=ip; + double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; + for (size_t ik=0; ik=ip) iang-=ip; + double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; + iang+=l; if (iang>=ip) iang-=ip; + double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; + for (size_t ik=0; ik=ip) iang-=ip; + double ar=csarr[2*iang], ai=csarr[2*iang+1]; + for (size_t ik=0; ikip) iang-=ip; + double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; + iang+=l; if(iang>ip) iang-=ip; + double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; + iang+=l; if(iang>ip) iang-=ip; + double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; + iang+=l; if(iang>ip) iang-=ip; + double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; + for (size_t ik=0; ikip) iang-=ip; + double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; + iang+=l; if(iang>ip) iang-=ip; + double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; + for (size_t ik=0; ikip) iang-=ip; + double war=csarr[2*iang], wai=csarr[2*iang+1]; + for (size_t ik=0; iklength==1) return 0; + size_t n=plan->length; + size_t l1=n, nf=plan->nfct; + double *ch = RALLOC(double, n); + if (!ch) return -1; + double *p1=c, *p2=ch; + + for(size_t k1=0; k1fct[k].fct; + size_t ido=n / l1; + l1 /= ip; + if(ip==4) + radf4(ido, l1, p1, p2, plan->fct[k].tw); + else if(ip==2) + radf2(ido, l1, p1, p2, plan->fct[k].tw); + else if(ip==3) + radf3(ido, l1, p1, p2, plan->fct[k].tw); + else if(ip==5) + radf5(ido, l1, p1, p2, plan->fct[k].tw); + else + { + radfg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); + SWAP (p1,p2,double *); + } + SWAP (p1,p2,double *); + } + copy_and_norm(c,p1,n,fct); + DEALLOC(ch); + return 0; + } + +WARN_UNUSED_RESULT +static int rfftp_backward(rfftp_plan plan, double c[], double fct) + { + if (plan->length==1) return 0; + size_t n=plan->length; + size_t l1=1, nf=plan->nfct; + double *ch = RALLOC(double, n); + if (!ch) return -1; + double *p1=c, *p2=ch; + + for(size_t k=0; kfct[k].fct, + ido= n/(ip*l1); + if(ip==4) + radb4(ido, l1, p1, p2, plan->fct[k].tw); + else if(ip==2) + radb2(ido, l1, p1, p2, plan->fct[k].tw); + else if(ip==3) + radb3(ido, l1, p1, p2, plan->fct[k].tw); + else if(ip==5) + radb5(ido, l1, p1, p2, plan->fct[k].tw); + else + radbg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); + SWAP (p1,p2,double *); + l1*=ip; + } + copy_and_norm(c,p1,n,fct); + DEALLOC(ch); + return 0; + } + +WARN_UNUSED_RESULT +static int rfftp_factorize (rfftp_plan plan) + { + size_t length=plan->length; + size_t nfct=0; + while ((length%4)==0) + { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } + if ((length%2)==0) + { + length>>=1; + // factor 2 should be at the front of the factor list + if (nfct>=NFCT) return -1; + plan->fct[nfct++].fct=2; + SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); + } + size_t maxl=(size_t)(sqrt((double)length))+1; + for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; + plan->fct[nfct++].fct=divisor; + length/=divisor; + } + maxl=(size_t)(sqrt((double)length))+1; + } + if (length>1) plan->fct[nfct++].fct=length; + plan->nfct=nfct; + return 0; + } + +static size_t rfftp_twsize(rfftp_plan plan) + { + size_t twsize=0, l1=1; + for (size_t k=0; knfct; ++k) + { + size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); + twsize+=(ip-1)*(ido-1); + if (ip>5) twsize+=2*ip; + l1*=ip; + } + return twsize; + return 0; + } + +WARN_UNUSED_RESULT NOINLINE static int rfftp_comp_twiddle (rfftp_plan plan) + { + size_t length=plan->length; + double *twid = RALLOC(double, 2*length); + if (!twid) return -1; + sincos_2pibyn_half(length, twid); + size_t l1=1; + double *ptr=plan->mem; + for (size_t k=0; knfct; ++k) + { + size_t ip=plan->fct[k].fct, ido=length/(l1*ip); + if (knfct-1) // last factor doesn't need twiddles + { + plan->fct[k].tw=ptr; ptr+=(ip-1)*(ido-1); + for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+2*i-2] = twid[2*j*l1*i]; + plan->fct[k].tw[(j-1)*(ido-1)+2*i-1] = twid[2*j*l1*i+1]; + } + } + if (ip>5) // special factors required by *g functions + { + plan->fct[k].tws=ptr; ptr+=2*ip; + plan->fct[k].tws[0] = 1.; + plan->fct[k].tws[1] = 0.; + for (size_t i=1; i<=(ip>>1); ++i) + { + plan->fct[k].tws[2*i ] = twid[2*i*(length/ip)]; + plan->fct[k].tws[2*i+1] = twid[2*i*(length/ip)+1]; + plan->fct[k].tws[2*(ip-i) ] = twid[2*i*(length/ip)]; + plan->fct[k].tws[2*(ip-i)+1] = -twid[2*i*(length/ip)+1]; + } + } + l1*=ip; + } + DEALLOC(twid); + return 0; + } + +NOINLINE static rfftp_plan make_rfftp_plan (size_t length) + { + if (length==0) return NULL; + rfftp_plan plan = RALLOC(rfftp_plan_i,1); + if (!plan) return NULL; + plan->length=length; + plan->nfct=0; + plan->mem=NULL; + for (size_t i=0; ifct[i]=(rfftp_fctdata){0,0,0}; + if (length==1) return plan; + if (rfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } + size_t tws=rfftp_twsize(plan); + plan->mem=RALLOC(double,tws); + if (!plan->mem) { DEALLOC(plan); return NULL; } + if (rfftp_comp_twiddle(plan)!=0) + { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } + return plan; + } + +NOINLINE static void destroy_rfftp_plan (rfftp_plan plan) + { + DEALLOC(plan->mem); + DEALLOC(plan); + } + +typedef struct fftblue_plan_i + { + size_t n, n2; + cfftp_plan plan; + double *mem; + double *bk, *bkf; + } fftblue_plan_i; +typedef struct fftblue_plan_i * fftblue_plan; + +NOINLINE static fftblue_plan make_fftblue_plan (size_t length) + { + fftblue_plan plan = RALLOC(fftblue_plan_i,1); + if (!plan) return NULL; + plan->n = length; + plan->n2 = good_size(plan->n*2-1); + plan->mem = RALLOC(double, 2*plan->n+2*plan->n2); + if (!plan->mem) { DEALLOC(plan); return NULL; } + plan->bk = plan->mem; + plan->bkf = plan->bk+2*plan->n; + +/* initialize b_k */ + double *tmp = RALLOC(double,4*plan->n); + if (!tmp) { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } + sincos_2pibyn(2*plan->n,tmp); + plan->bk[0] = 1; + plan->bk[1] = 0; + + size_t coeff=0; + for (size_t m=1; mn; ++m) + { + coeff+=2*m-1; + if (coeff>=2*plan->n) coeff-=2*plan->n; + plan->bk[2*m ] = tmp[2*coeff ]; + plan->bk[2*m+1] = tmp[2*coeff+1]; + } + + /* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */ + double xn2 = 1./plan->n2; + plan->bkf[0] = plan->bk[0]*xn2; + plan->bkf[1] = plan->bk[1]*xn2; + for (size_t m=2; m<2*plan->n; m+=2) + { + plan->bkf[m] = plan->bkf[2*plan->n2-m] = plan->bk[m] *xn2; + plan->bkf[m+1] = plan->bkf[2*plan->n2-m+1] = plan->bk[m+1] *xn2; + } + for (size_t m=2*plan->n;m<=(2*plan->n2-2*plan->n+1);++m) + plan->bkf[m]=0.; + plan->plan=make_cfftp_plan(plan->n2); + if (!plan->plan) + { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } + if (cfftp_forward(plan->plan,plan->bkf,1.)!=0) + { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } + DEALLOC(tmp); + + return plan; + } + +NOINLINE static void destroy_fftblue_plan (fftblue_plan plan) + { + DEALLOC(plan->mem); + destroy_cfftp_plan(plan->plan); + DEALLOC(plan); + } + +NOINLINE WARN_UNUSED_RESULT +static int fftblue_fft(fftblue_plan plan, double c[], int isign, double fct) + { + size_t n=plan->n; + size_t n2=plan->n2; + double *bk = plan->bk; + double *bkf = plan->bkf; + double *akf = RALLOC(double, 2*n2); + if (!akf) return -1; + +/* initialize a_k and FFT it */ + if (isign>0) + for (size_t m=0; m<2*n; m+=2) + { + akf[m] = c[m]*bk[m] - c[m+1]*bk[m+1]; + akf[m+1] = c[m]*bk[m+1] + c[m+1]*bk[m]; + } + else + for (size_t m=0; m<2*n; m+=2) + { + akf[m] = c[m]*bk[m] + c[m+1]*bk[m+1]; + akf[m+1] =-c[m]*bk[m+1] + c[m+1]*bk[m]; + } + for (size_t m=2*n; m<2*n2; ++m) + akf[m]=0; + + if (cfftp_forward (plan->plan,akf,fct)!=0) + { DEALLOC(akf); return -1; } + +/* do the convolution */ + if (isign>0) + for (size_t m=0; m<2*n2; m+=2) + { + double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; + akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1]; + akf[m+1] = im; + } + else + for (size_t m=0; m<2*n2; m+=2) + { + double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; + akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1]; + akf[m+1] = im; + } + +/* inverse FFT */ + if (cfftp_backward (plan->plan,akf,1.)!=0) + { DEALLOC(akf); return -1; } + +/* multiply by b_k */ + if (isign>0) + for (size_t m=0; m<2*n; m+=2) + { + c[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1]; + c[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1]; + } + else + for (size_t m=0; m<2*n; m+=2) + { + c[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1]; + c[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1]; + } + DEALLOC(akf); + return 0; + } + +WARN_UNUSED_RESULT +static int cfftblue_backward(fftblue_plan plan, double c[], double fct) + { return fftblue_fft(plan,c,1,fct); } + +WARN_UNUSED_RESULT +static int cfftblue_forward(fftblue_plan plan, double c[], double fct) + { return fftblue_fft(plan,c,-1,fct); } + +WARN_UNUSED_RESULT +static int rfftblue_backward(fftblue_plan plan, double c[], double fct) + { + size_t n=plan->n; + double *tmp = RALLOC(double,2*n); + if (!tmp) return -1; + tmp[0]=c[0]; + tmp[1]=0.; + memcpy (tmp+2,c+1, (n-1)*sizeof(double)); + if ((n&1)==0) tmp[n+1]=0.; + for (size_t m=2; mn; + double *tmp = RALLOC(double,2*n); + if (!tmp) return -1; + for (size_t m=0; mblueplan=0; + plan->packplan=0; + if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) + { + plan->packplan=make_cfftp_plan(length); + if (!plan->packplan) { DEALLOC(plan); return NULL; } + return plan; + } + double comp1 = cost_guess(length); + double comp2 = 2*cost_guess(good_size(2*length-1)); + comp2*=1.5; /* fudge factor that appears to give good overall performance */ + if (comp2blueplan=make_fftblue_plan(length); + if (!plan->blueplan) { DEALLOC(plan); return NULL; } + } + else + { + plan->packplan=make_cfftp_plan(length); + if (!plan->packplan) { DEALLOC(plan); return NULL; } + } + return plan; + } + +void destroy_cfft_plan (cfft_plan plan) + { + if (plan->blueplan) + destroy_fftblue_plan(plan->blueplan); + if (plan->packplan) + destroy_cfftp_plan(plan->packplan); + DEALLOC(plan); + } + +WARN_UNUSED_RESULT int cfft_backward(cfft_plan plan, double c[], double fct) + { + if (plan->packplan) + return cfftp_backward(plan->packplan,c,fct); + // if (plan->blueplan) + return cfftblue_backward(plan->blueplan,c,fct); + } + +WARN_UNUSED_RESULT int cfft_forward(cfft_plan plan, double c[], double fct) + { + if (plan->packplan) + return cfftp_forward(plan->packplan,c,fct); + // if (plan->blueplan) + return cfftblue_forward(plan->blueplan,c,fct); + } + +typedef struct rfft_plan_i + { + rfftp_plan packplan; + fftblue_plan blueplan; + } rfft_plan_i; + +rfft_plan make_rfft_plan (size_t length) + { + if (length==0) return NULL; + rfft_plan plan = RALLOC(rfft_plan_i,1); + if (!plan) return NULL; + plan->blueplan=0; + plan->packplan=0; + if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) + { + plan->packplan=make_rfftp_plan(length); + if (!plan->packplan) { DEALLOC(plan); return NULL; } + return plan; + } + double comp1 = 0.5*cost_guess(length); + double comp2 = 2*cost_guess(good_size(2*length-1)); + comp2*=1.5; /* fudge factor that appears to give good overall performance */ + if (comp2blueplan=make_fftblue_plan(length); + if (!plan->blueplan) { DEALLOC(plan); return NULL; } + } + else + { + plan->packplan=make_rfftp_plan(length); + if (!plan->packplan) { DEALLOC(plan); return NULL; } + } + return plan; + } + +void destroy_rfft_plan (rfft_plan plan) + { + if (plan->blueplan) + destroy_fftblue_plan(plan->blueplan); + if (plan->packplan) + destroy_rfftp_plan(plan->packplan); + DEALLOC(plan); + } + +size_t rfft_length(rfft_plan plan) + { + if (plan->packplan) return plan->packplan->length; + return plan->blueplan->n; + } + +size_t cfft_length(cfft_plan plan) + { + if (plan->packplan) return plan->packplan->length; + return plan->blueplan->n; + } + +WARN_UNUSED_RESULT int rfft_backward(rfft_plan plan, double c[], double fct) + { + if (plan->packplan) + return rfftp_backward(plan->packplan,c,fct); + else // if (plan->blueplan) + return rfftblue_backward(plan->blueplan,c,fct); + } + +WARN_UNUSED_RESULT int rfft_forward(rfft_plan plan, double c[], double fct) + { + if (plan->packplan) + return rfftp_forward(plan->packplan,c,fct); + else // if (plan->blueplan) + return rfftblue_forward(plan->blueplan,c,fct); + } diff --git a/impulse/fft/c_pocketfft/pocketfft.h b/impulse/fft/c_pocketfft/pocketfft.h new file mode 100644 index 0000000..9eb3985 --- /dev/null +++ b/impulse/fft/c_pocketfft/pocketfft.h @@ -0,0 +1,34 @@ +/* + * This file is part of pocketfft. + * Licensed under a 3-clause BSD style license - see LICENSE.md + */ + +/*! \file pocketfft.h + * Public interface of the pocketfft library + * + * Copyright (C) 2008-2018 Max-Planck-Society + * \author Martin Reinecke + */ + +#ifndef POCKETFFT_H +#define POCKETFFT_H + +#include + +struct cfft_plan_i; +typedef struct cfft_plan_i * cfft_plan; +cfft_plan make_cfft_plan (size_t length); +void destroy_cfft_plan (cfft_plan plan); +int cfft_backward(cfft_plan plan, double c[], double fct); +int cfft_forward(cfft_plan plan, double c[], double fct); +size_t cfft_length(cfft_plan plan); + +struct rfft_plan_i; +typedef struct rfft_plan_i * rfft_plan; +rfft_plan make_rfft_plan (size_t length); +void destroy_rfft_plan (rfft_plan plan); +int rfft_backward(rfft_plan plan, double c[], double fct); +int rfft_forward(rfft_plan plan, double c[], double fct); +size_t rfft_length(rfft_plan plan); + +#endif diff --git a/impulse/fft/c_pocketfft/pocketfft.nim b/impulse/fft/c_pocketfft/pocketfft.nim new file mode 100644 index 0000000..b38ec31 --- /dev/null +++ b/impulse/fft/c_pocketfft/pocketfft.nim @@ -0,0 +1,360 @@ +## This is a wrapper of the C library for PocketFFT +## https://gitlab.mpcdf.mpg.de/mtr/pocketfft/ +## +## The file `pocketfft_src.h` is a header only version of the combined +## `pocketfft.h` and `pocketfft.c` files so that we + +import strutils, os +import complex, math +const + pocketFFTPath = currentSourcePath.parentDir + +{.pragma: pocket, header: pocketFFTPath / "pocketfft.h".} +# Make sure to compile the C file! +{.compile: pocketFFTPath / "pocketfft.c".} + +## These are all the relevant types. Most of them are not actually needed. We could also +## just use opaque types for `rfft_plan` and `cfft_plan` and call it a day. +## Note that we depend on the fact that the `cmplx` Complex data type used in +## PocketFFT is binary compatible with Nim's stdlib `Complex64` type (i.e. +## a flat struct of `(64-bit real part, 64-bit imaginary part)`. +const NFCT = 25 +type + rfftp_fctdata = object + fct: csize_t + tw: ptr float + tws: ptr float + + rfftp_plan_i {.importc: "rfftp_plan", pocket.} = object + length: csize_t + nfct: csize_t + mem: ptr float + fct: array[NFCT, rfftp_fctdata] + + cfftp_fctdata = object + fct: csize_t + tw: ptr Complex64 + tws: ptr Complex64 + + cfftp_plan_i {.importc: "cfftp_plan", pocket.} = object + length: csize_t + nfct: csize_t + mem: ptr Complex64 + fct: array[NFCT, cfftp_fctdata] + + cfftp_plan {.importc: "cfftp_plan", pocket.} = ptr cfftp_plan_i + + rfftp_plan {.importc: "rfftp_plan", pocket.} = ptr rfftp_plan_i + + fftblue_plan_i {.importc: "fftblue_plan_i", pocket.} = object + n: csize_t + n2: csize_t + plan: cfftp_plan + mem: ptr float + bk: ptr float + pkf: ptr float + + fftblue_plan {.importc: "fftblue_plan", pocket.} = ptr fftblue_plan_i + + rfft_plan_i = object + packplan: rfftp_plan + blueblan: fftblue_plan + + cfft_plan_i = object + packplan: cfftp_plan + blueplan: fftblue_plan + + cfft_plan {.importc: "cfft_plan", pocket.} = ptr cfft_plan_i + + rfft_plan {.importc: "rfft_plan", pocket.} = ptr rfft_plan_i + +proc make_cfft_plan*(length: csize_t): cfft_plan {.importc: "make_cfft_plan", pocket.} +proc destroy_cfft_plan*(plan: cfft_plan) {.importc: "destroy_cfft_plan", pocket.} +proc cfft_backward*(plan: cfft_plan; c: ptr Complex64; fct: cdouble): cint {.importc: "cfft_backward", pocket.} +proc cfft_forward*(plan: cfft_plan; c: ptr Complex64; fct: cdouble): cint {.importc: "cfft_forward", pocket.} +proc cfft_length*(plan: cfft_plan): csize_t {.importc: "cfft_length", pocket.} + +proc make_rfft_plan*(length: csize_t): rfft_plan {.importc: "make_rfft_plan", pocket, cdecl.} +proc destroy_rfft_plan*(plan: rfft_plan) {.importc: "destroy_rfft_plan", pocket.} +proc rfft_backward*(plan: rfft_plan; c: ptr cdouble; fct: cdouble): cint {.importc: "rfft_backward", pocket.} +proc rfft_forward*(plan: rfft_plan; c: ptr cdouble; fct: cdouble): cint {.importc: "rfft_forward", pocket.} +proc rfft_length*(plan: rfft_plan): csize_t {.importc: "rfft_length", pocket.} + +## Non user facing helper types to wrap the make / call / destroy logic. +type + FFTPlanReal* = object + pocket*: rfft_plan + length*: int + + FFTPlanComplex* = object + pocket*: cfft_plan + length*: int + +when (NimMajor, NimMinor, NimPatch) >= (2, 0, 0): + proc `=destroy`*(plan: FFTPlanReal) = + ## Frees the `rfft_plan` + destroy_rfft_plan(plan.pocket) + + proc `=destroy`(plan: FFTPlanComplex) = + ## Frees the `cfft_plan` + destroy_cfft_plan(plan.pocket) +else: + proc `=destroy`*(plan: var FFTPlanReal) = + ## Frees the `rfft_plan` + destroy_rfft_plan(plan.pocket) + + proc `=destroy`(plan: var FFTPlanComplex) = + ## Frees the `cfft_plan` + destroy_cfft_plan(plan.pocket) + +proc init*(_: typedesc[FFTPlanReal], length: int): FFTPlanReal = + result = FFTPlanReal(pocket: make_rfft_plan(length.csize_t)) + +proc init*(_: typedesc[FFTPlanComplex], length: int): FFTPlanComplex = + result = FFTPlanComplex(pocket: make_cfft_plan(length.csize_t)) + +func isOdd*(i: int): bool = (i and 1) == 1 + +type MemoryView*[T] = ptr UncheckedArray[T] +template address(arg: untyped): untyped = + when (NimMajor, NimMinor, NimPatch) >= (2, 0, 0): + arg.addr + else: + arg.unsafeAddr +func toPtr*[T](ar: openArray[T]): MemoryView[T] = cast[ptr UncheckedArray[T]](ar[0].address) + +proc unpackFFT*(data: MemoryView[float]; outDat: MemoryView[Complex64], inLen: int) = + ## 'Unpacks' a given FFT result from a call to `rfft_forward/backward` as returned + ## by PocketFFT. + ## + ## That is we construct `Complex64` values from the interleaved `Re, Im` values + ## and recover the `0` complex parts of the packed values. + ## + ## Note that it *does not* recover the hermitian conjugate terms of the FFT + ## result. If you wish the entire FFT result as if you had called `cfft` (or the + ## regular user facing `fft` procedure), call `symmetrize` instead / in addition. + let lenOdd = isOdd inLen + var k = 0 + var cmpl = complex(data[0]) # first element `Re(y[0]), Im(y[0]) == 0` + outDat[k] = cmpl + inc k + for i in 1 ..< inLen: # from `i = 1` odd elements are `Re(y[i])` and even `Im(y[i])` + if isOdd i: # set the real part + cmpl.re = data[i] + elif i > 0: # set the complex part and add + cmpl.im = data[i] + outDat[k] = cmpl + inc k + if not lenOdd: # In case input is even length, `data.high` is odd, thus need to add last + cmpl.im = 0.0 + outDat[k] = cmpl + +proc unpackFFT*(data: openArray[float]): seq[Complex64] = + ## Out of place version of the above so that `symmetrize` can call the inplace + ## version without reallocating again. + let outLen = if isOdd data.len: (data.len + 1) div 2 # odd, recover 1 value + else: (data.len + 2) div 2 # recover 2 values + result = newSeq[Complex64](outLen) + unpackFFT(toPtr data, toPtr result, data.len) + +proc symmetrize*[T: float | Complex64](data: MemoryView[T], res: MemoryView[Complex64], inLen, outLen: int) = + ## Recovers the latter half of the FFT terms, i.e. the hermitian conjugate + ## terms from N/2 to N-1. + ## If the input is `float` data, first `unpackFFTs` it. + when T is float: # unpack data + unpackFFT(data, res, inLen) + else: # copy all the existing data + copyMem(res, data, inLen * sizeof(Complex64)) + var k = outLen - 1 # fill result from end + for i in 1 ..< ceilDiv(outLen, 2): + res[k] = conjugate res[i] + dec k + +proc symmTargetSize*[T: float | Complex64](lastVal: T, length: int): int = + ## Returns the correct target size needed in `symmetrize` for a given input data type + when T is float: + result = length + else: # copy all the existing data + let sub = if lastVal.im == 0.0: 2 ## Corresponds to even input, two zeroes recovered + else: 1 ## uneven input, one zero recovered + result = (length * 2 - sub) + +proc symmTargetSize*[T: float | Complex64](data: openArray[T]): int = + result = symmTargetSize(data[^1], data.len) + +proc symmetrize*[T: float | Complex64](data: openArray[T]): seq[Complex64] = + result = newSeq[Complex64](symmTargetSize(data)) + symmetrize(toPtr data, toPtr result, data.len, result.len) + +type + ## `nkBackward`: Normalization by `1` in FFT and `1/N` in inverse FFT + ## `nkForward`: Normalization by `1/N` in FFT and `1` in inverse FFT + ## `nkOrtho`: Normalization by `1/√N` for both directions + ## `nkCustom`: Custom normalization value given + NormalizeKind* = enum nkBackward, nkOrtho, nkForward, nkCustom + Normalize* = object + kind*: NormalizeKind + value*: float ## Actual normalization value + +proc initNormalize*(kind: NormalizeKind, forward: bool, value: float, length: int): Normalize = + case kind + of nkBackward: result = Normalize(kind: nkBackward, value: if forward: 1.0 else: 1.0 / length.float) + of nkForward: result = Normalize(kind: nkForward, value: if forward: 1.0 / length.float else: 1.0) + of nkOrtho: result = Normalize(kind: nkOrtho, value: 1.0 / sqrt(length.float)) + of nkCustom: result = Normalize(kind: nkCustom, value: value) + +template callFFT(plan, fwd, bck, data, length, forward, norm: untyped): untyped = + if forward: + let err = fwd(plan.pocket, data[0].address, norm.value) + if err != 0: + raise newException(Exception, "Forward FFT calculation failed.") + else: + let err = bck(plan.pocket, data[0].address, norm.value) + if err != 0: + raise newException(Exception, "Backward FFT calculation failed.") + +proc rfft*(data: MemoryView[float], length: int, forward: bool, normalize = nkBackward, normValue = Inf) = + ## Performs an FFT of purely real input `data`. The calculation happens inplace. The array + ## must be of length `length`. + ## `forward` determines if it's the forward or inverse FFT. + ## + ## This procedure is not intended as a main user facing proc. It is implementation + ## specific to how PocketFFT performs the FFT for real inputs. It makes use of two symmetries + ## in order to store the complex result in the input `data` array of length `length`. + ## + ## This means if you use this inplace procedure for performance reasons, be aware that the + ## `data` array will contain the FFT result `y[k]` as follows. + ## + ## If `length` is even: + ## + ## `data = [Re(y[0]), Re(y[1]), Im(y[1]), ..., Re(y[N/2])]` + ## + ## If `length` is odd: + ## + ## `data = [Re(y[0]), Re(y[1]), Im(y[1]), ..., Re(y[N/2]), Im(y[N/2])]` + ## + ## Note that the imaginary part of `y[0]` is dropped in both cases and for even length inputs + ## the imaginary part of the element `y[N/2]` is also dropped. These are both always zero + ## for these two cases and thus redundant. + ## All terms from `N/2+1` to `N-1` are the hermitian conjugate of the first `1` to `N/2` terms. + ## + ## Feel free to call `unpackFFT` on the `data` array to produce a `seq/array/Tensor` of + ## the input, which is in `Complex64` and exactly `N/2` in length. + ## + ## See the explanation below for why these symmetries hold. + ## + ## Those two symmetries are: + ## 1. For a real input array, the result of the FFT will always be 'symmetric'. + ## Given notation as x = [x_0, x_1, ..., x_N] our input and y = [y_0, ..., y_N] our FFT output. + ## If all x_i are real, then the only complex contributions come from the exp(-2πi kn/N) term. + ## With n running from 0 to N-1, + ## + ## `y[k] = Σ_{i = 0}^{N-1} exp( -2πi kn / N ) x[n]` + ## + ## we can see that the second half are the hermitian conjugate terms, because the `N - j`-th + ## term is always h.c. to the `j`-th term. + ## + ## See by, for a fixed integer `n` and `k ∈ {0, 1, ..., N-1}`: + ## + ## `k = N-1: exp(-2πi (N-1) n / N) = exp(-2πi Nn / N + 2πi n / N) = exp(-2πi n) + exp(2πi n / N)`, + ## with `exp(-2πi n) = 1`, due to n being an integer + ## `k = 1 : exp(-2πi n / N)` + ## + ## so `k = 1` is the h.c. of `N-1`. + ## + ## With that knowledge for the real FFT we can drop all the hermitian conjugate terms. + ## + ## 2. Then PocketFFT goes one step further by realizing that + ## + ## `y[0] = Σ_i x[i]`, all exponents are 0 and thus `e^-... = 1` + ## + ## allowing us to drop the imaginary part of the first `y[0]` and secondly for N even elements, + ## we can also drop the imaginary part of the 'middle' term (for `N = even` due to the `0`-th term + ## not contributing to the exponentials, the effective number of complex terms is odd, thus there + ## is one middle term; for `N = odd` no such term exists and thus cannot be dropped). For said middle + ## term the imaginary part is also always exactly `0`, because all exponents are exactly multiples + ## of `πi`. + ## + ## `k = N/2: exp(-2πi N/2 n / N) = exp(-2πi n/2) = exp(-πi n)` + ## + ## So that way PocketFFT can always return data into an output array of N elements for an input + ## of `N` elements. + ## + ## If the above is hard to follow, take a pen and paper and write out a discrete fourier transform by hand. + let norm = initNormalize(normalize, forward, normValue, length) + let plan = FFTPlanReal.init(length) + let fwd = rfft_forward + let bck = rfft_backward + callFFT(plan, fwd, bck, data, length, forward, norm) + +proc fft_impl*[T: float | Complex64](data: MemoryView[T], length: int, forward: bool, normalize = nkBackward, normValue = Inf) = + ## Performs an FFT of input `data`. The calculation happens inplace. The array + ## must be of length `length`. + ## `forward` determines if it's the forward or inverse FFT. + let norm = initNormalize(normalize, forward, normValue, length) + when T is float: + let plan = FFTPlanReal.init(length) + let fwd = rfft_forward + let bck = rfft_backward + else: + let plan = FFTPlanComplex.init(length) + let fwd = cfft_forward + let bck = cfft_backward + callFFT(plan, fwd, bck, data, length, forward, norm) + +proc fft*[T: float | Complex64](data: var openArray[T], forward = true, normalize = nkBackward, normValue = Inf) = + ## Performs an FFT of input `data`. The calculation happens inplace. This + ## means for real input data, the result is stored as packed `float` data (see `rfft` above). + ## + ## `forward` determines if it's the forward or inverse FFT. + fft_impl(toPtr data, data.len, forward) + +proc rfft_packed*(data: openArray[float], forward = true, normalize = nkBackward, normValue = Inf): seq[float] = + ## Performs an FFT of input real `data`. The result is returned as a `seq`. The array + ## must be of length `length`. The returned data is in maximally packed form as `float` + ## data. See the `rfft` overload above. + ## + ## `forward` determines if it's the forward or inverse FFT. + result = @data + rfft(toPtr(result), data.len, forward) + +proc rfft*(data: openArray[float], forward: bool = true, normalize = nkBackward, normValue = Inf): seq[Complex64] = + ## Performs an FFT of input real `data`. The result is returned as a `seq[Complex64]`. + ## The returned data only contains the non redundant N/2 first terms of the resulting + ## FFT. Call `symmetrize` on the result to compute the (symmetric) hermitian conjugate + ## terms from `N/2` to `N-1` (`N == data.len`). + ## + ## Alternatively, simply call `fft` above, which handles this for you. + ## + ## `forward` determines if it's the forward or inverse FFT. + var res = @data + fft(res, forward) # `rfft` result as a `seq[float]` + result = unpackFFT res + +proc fft*[T: float | Complex64](data: openArray[T], forward = true, normalize = nkBackward, normValue = Inf): seq[Complex64] = + ## Performs an FFT of input `data`. The result is returned as a `seq`. The array + ## must be of length `length`. + ## + ## `forward` determines if it's the forward or inverse FFT. + ## + ## For the real -> complex transform, this is 2 allocations: + ## - 1 copy of the input data + ## - 1 allocation for the output array + ## For complex -> complex we get away with a single clone of the input. + when T is float: + result = symmetrize rfft_packed(data, forward, normalize, normValue) + else: + result = @data + fft(result, forward, normalize, normValue) + +proc ifft*[T: float | Complex64](data: openArray[T], backward = true, normalize = nkBackward, normValue = Inf): seq[Complex64] = + ## Performs an inverse FFT of input `data`. The result is returned as a `seq`. The array + ## must be of length `length`. + ## + ## `backward` determines if it's the inverse (default) or forward FFT. + ## + ## For the real -> complex transform, this is 2 allocations: + ## - 1 copy of the input data + ## - 1 allocation for the output array + ## For complex -> complex we get away with a single clone of the input. + fft(data, forward = not backward, normalize, normValue) diff --git a/impulse/fft/c_pocketfft/pocketfft_arraymancer.nim b/impulse/fft/c_pocketfft/pocketfft_arraymancer.nim new file mode 100644 index 0000000..021cef3 --- /dev/null +++ b/impulse/fft/c_pocketfft/pocketfft_arraymancer.nim @@ -0,0 +1,94 @@ +import pocketfft +import arraymancer / tensor +export tensor + +## Unfortunately we need to duplicate some code for Arraymancer here. The issue is that +## there is no working way to define e.g. a concept `ArrayLike[T]` that allows to +## formalize the internal procs like `unpackFFT` or `symmetrize`. Any such attempts +## leads to internal compiler errors. +## +## Something like: +## +## type +## ArrayLike*[T] = concept x +## x.len is int +## x.high is int +## x[0] is T +## x[^1] is T +## toPtr(x) is MemoryView[T] +## +## Used in the main file crashes the compiler with +## Error: internal error: openArrayLoc: ArrayLike[Complex, ArrayLike] + +func toPtr*[T](ar: Tensor[T]): MemoryView[T] = cast[ptr UncheckedArray[T]](ar.toUnsafeView) + +proc unpackFFT*(data: Tensor[float]): Tensor[Complex64] = + ## Out of place version of the above so that `symmetrize` can call the inplace + ## version without reallocating again. + let outLen = if isOdd data.len: (data.len + 1) div 2 # odd, recover 1 value + else: (data.len + 2) div 2 # recover 2 values + result = newTensorUninit[Complex64](outLen) + unpackFFT(toPtr data, toPtr result, data.len) + +proc fft*[T: float | Complex64](data: var Tensor[T], forward: bool = true, normalize = nkBackward, normValue = Inf) = + ## Performs an FFT of input `data`. The calculation happens inplace. The tensor + ## must be of length `length`. + ## `forward` determines if it's the forward or inverse FFT. + fft_impl(toPtr data, data.size.int, forward, normalize, normValue) + +proc rfft_packed*(data: Tensor[float], forward: bool = true, normalize = nkBackward, normValue = Inf): Tensor[float] = + ## Performs an FFT of input real `data`. The result is returned as a `Tensor`. The array + ## must be of length `length`. The returned data is in maximally packed form as `float` + ## data. See the `rfft` overload above. + ## + ## `forward` determines if it's the forward or inverse FFT. + result = data.clone() + rfft(toPtr(result), data.len, forward, normalize, normValue) + +proc rfft*(data: Tensor[float], forward: bool = true, normalize = nkBackward, normValue = Inf): Tensor[Complex64] = + ## Performs an FFT of input real `data`. The result is returned as a `Tensor[Complex64]`. + ## The returned data only contains the non redundant N/2 first terms of the resulting + ## FFT. Call `symmetrize` on the result to compute the (symmetric) hermitian conjugate + ## terms from `N/2` to `N-1` (`N == data.len`). + ## + ## Alternatively, simply call `fft` above, which handles this for you. + ## + ## `forward` determines if it's the forward or inverse FFT. + var res = data.clone() + fft(res, forward, normalize, normValue) # `rfft` result as a `Tensor[float]` + result = unpackFFT res + +proc symmTargetSize*[T: float | Complex64](data: Tensor[T]): int = + result = symmTargetSize(data[data.len - 1], data.len) + +proc symmetrize*[T: float | Complex64](data: Tensor[T]): Tensor[Complex64] = + result = newTensorUninit[Complex64](symmTargetSize(data)) + symmetrize(toPtr data, toPtr result, data.len, result.len) + +proc fft*[T: float | Complex64](data: Tensor[T], forward = true, normalize = nkBackward, normValue = Inf): Tensor[Complex64] = + ## Performs an FFT of input `data`. The result is returned as a `Tensor`. The array + ## must be of length `length`. + ## + ## `forward` determines if it's the forward or inverse FFT. + ## + ## For the real -> complex transform, this is 2 allocations: + ## - 1 copy of the input data + ## - 1 allocation for the output array + ## For complex -> complex we get away with a single clone of the input. + when T is float: + result = symmetrize rfft_packed(data, forward, normalize, normValue) + else: + result = data.clone() + fft(result, forward, normalize, normValue) + +proc ifft*[T: float | Complex64](data: Tensor[T], backward = true, normalize = nkBackward, normValue = Inf): Tensor[Complex64] = + ## Performs an inverse FFT of input `data`. The result is returned as a `Tensor`. The array + ## must be of length `length`. + ## + ## `backward` determines if it's the inverse (default) or forward FFT. + ## + ## For the real -> complex transform, this is 2 allocations: + ## - 1 copy of the input data + ## - 1 allocation for the output array + ## For complex -> complex we get away with a single clone of the input. + fft(data, forward = not backward, normalize, normValue) diff --git a/impulse/fft/README_pocketfft.md b/impulse/fft/cpp_pocketfft/README_pocketfft.md similarity index 100% rename from impulse/fft/README_pocketfft.md rename to impulse/fft/cpp_pocketfft/README_pocketfft.md diff --git a/impulse/fft/cpp_pocketfft/pocketfft.nim b/impulse/fft/cpp_pocketfft/pocketfft.nim new file mode 100644 index 0000000..5ed3039 --- /dev/null +++ b/impulse/fft/cpp_pocketfft/pocketfft.nim @@ -0,0 +1,339 @@ +# Impulse +# Copyright (c) 2020-Present The SciNim Project +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + std/[strutils, os, complex], + ./std_cpp + +static: doAssert defined(cpp), "This module requires compilation in C++ mode" + +# ############################################################ +# +# PocketFFT wrapper +# +# ############################################################ + +const + pocketFFTPath = currentSourcePath.rsplit(DirSep, 1)[0] + + +{.pragma: pocket, header: pocketFFTPath / "pocketfft_hdronly.h".} + +when compileOption("threads"): + {.localPassC: "-I" & pocketFFTPath.} +else: + {.localPassC: "-DPOCKETFFT_NO_MULTITHREADING -I" & pocketFFTPath.} + +type + Shape{.importcpp: "pocketfft::shape_t", pocket.} = CppVector[uint] + Stride{.importcpp: "pocketfft::stride_t", pocket.} = CppVector[int] + +proc c2c[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + forward: bool, + dataIn: ptr UncheckedArray[CppComplex[T]], + dataOut: ptr UncheckedArray[CppComplex[T]], + fct: T, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::c2c(@)", pocket, cdecl.} + +proc r2c[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + forward: bool, + dataIn: ptr T or ptr UncheckedArray[T], + dataOut: ptr UncheckedArray[CppComplex[T]], + fct: T, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::r2c(@)", pocket, cdecl.} + +proc c2r[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + forward: bool, + dataIn: ptr UncheckedArray[CppComplex[T]], + dataOut: ptr T or ptr UncheckedArray[T], + fct: T, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::c2r(@)", pocket, cdecl.} + +proc r2r_fftpack[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + real2hermitian: bool, + forward: bool, + dataIn: ptr T or ptr UncheckedArray[T], + dataOut: ptr T or ptr UncheckedArray[T], + fct: T, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::r2r_fftpack(@)", pocket, cdecl.} + +proc r2r_separable_hartley[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + forward: bool, + dataIn: ptr T or ptr UncheckedArray[T], + dataOut: ptr T or ptr UncheckedArray[T], + fct: T, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::r2r_separable_hartley(@)", pocket, cdecl.} + +proc r2r_genuine_hartley[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + forward: bool, + dataIn: ptr T or ptr UncheckedArray[T], + dataOut: ptr T or ptr UncheckedArray[T], + fct: T, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::r2r_genuine_hartley(@)", pocket, cdecl.} + +proc dct[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + dctType: range[1'i32..4'i32], + dataIn: ptr T or ptr UncheckedArray[T], + dataOut: ptr T or ptr UncheckedArray[T], + fct: T, + ortho: bool, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::dct(@)", pocket, cdecl.} + +proc dst[T: SomeFloat]( + shape: Shape, + strideIn: Stride, + strideOut: Stride, + axes: Shape, + dstType: range[1'i32..4'i32], + dataIn: ptr T or ptr UncheckedArray[T], + dataOut: ptr T or ptr UncheckedArray[T], + fct: T, + ortho: bool, + nthreads: uint = 1 + ) {.importcpp: "pocketfft::dst(@)", pocket, cdecl.} + +# High-level API +# ------------------------------------------------------ + +type + DataDesc*[T] = object + ## Descriptor of the data used in or out of the FFT + shape*: Shape + stride*: Stride + buf*: ptr UncheckedArray[T] + + FFTDesc*[T] = object + ## Descriptor of the FFT + axes*: Shape + scalingFactor*: T + nthreads*: uint + forward*: bool + + DCTDesc*[T] = object + axes*: Shape + dctType*: range[1'i32..4'i32] + scalingFactor*: T + nthreads*: uint + ortho*: bool + +func init*[T](_: type DataDesc[T], + buffer: ptr T or ptr UncheckedArray[T], + shape, stride: distinct openArray[SomeInteger] + ): DataDesc[T] {.inline.} = + ## Initialize a description of the input or output data + ## from the `shape`, `stride` and `buffer` passed in. + ## stride is the distance in elements T between elements + ## on the same axis + assert shape.len = stride.len + assert not buffer.isNil + + result.shape = newCppVector[uint](shape.len) + result.stride = newCppVector[int](stride.len) + + for i in 0 ..< shape.len: + result.shape[i] = uint(shape[i]) + result.stride[i] = int(stride[i] * sizeof(T)) + + result.buf = cast[ptr UncheckedArray[T]](buffer) + +func init*[T](_: type DataDesc[T], + buffer: ptr T or ptr UncheckedArray[T], + shape: openArray[SomeInteger], + ): DataDesc[T] {.inline.} = + ## Initialize a description of the input or output data + ## from the `shape` and `buffer` passed in. + ## The data is assumed to be stored in + ## C-contiguous (row-major) format + assert not buffer.isNil + + result.shape = newCppVector[uint](shape.len) + result.stride = newCppVector[int](shape.len) + + var accum = 1 * sizeof(T) + for i in countdown(shape.len-1, 0): + result.stride[i] = int(accum) + accum *= shape[i] + + for i in 0 ..< shape.len: + result.shape[i] = uint(shape[i]) + + result.buf = cast[ptr UncheckedArray[T]](buffer) + +func init*[T](_: type FFTDesc[T], + axes: varargs[int], + forward: bool, + scalingFactor: T = 1, + nthreads = 1 + ): FFTDesc[T] {.inline.} = + ## Initialize a description of the FFT + ## Set `nthreads` to 0 to use all your available cores + ## Multithreading is only implemented for multidimensional FFT + result.axes = newCppVector[uint](axes.len) + for i in 0 ..< axes.len: + result.axes[i] = uint axes[i] + result.scalingFactor = scalingFactor + result.forward = forward + result.nthreads = uint nthreads + +func init*[T](_: type DCTDesc[T], + axes: varargs[int], + dctType: range[1'i32..4'i32] = 2'i32, + ortho = false, + scalingFactor: T = 1, + nthreads = 1 + ): DCTDesc[T] {.inline.} = + ## Initialize a description of the Discrete CosineTransform + ## Set `nthreads` to 0 to use all your available cores + ## Multithreading is only implemented for multidimensional FFT + result.axes = newCppVector[uint](axes.len) + for i in 0 ..< axes.len: + result.axes[i] = uint axes[i] + result.dctType = dctType + result.ortho = ortho + result.scalingFactor = scalingFactor + result.nthreads = uint nthreads + +func apply*[In, Out]( + fft: FFTDesc, + descOut: var DataDesc[Out], + descIn: DataDesc[In], + ) {.inline.} = + when In is Complex and Out is Complex: + c2c( + descIn.shape, + descIn.stride, + descOut.stride, + fft.axes, + fft.forward, + cast[ptr UncheckedArray[CppComplex[In.T]]](descIn.buf), + cast[ptr UncheckedArray[CppComplex[Out.T]]](descOut.buf), + fft.scalingFactor, + fft.nthreads + ) + elif Out is Complex: + r2c( + descIn.shape, + descIn.stride, + descOut.stride, + fft.axes, + fft.forward, + descIn.buf, + cast[ptr UncheckedArray[CppComplex[Out.T]]](descOut.buf), + fft.scalingFactor, + fft.nthreads + ) + elif In is Complex: + c2r( + descIn.shape, + descIn.stride, + descOut.stride, + fft.axes, + fft.forward, + cast[ptr UncheckedArray[CppComplex[Out.T]]](descIn.buf), + descOut.buf, + fft.scalingFactor, + fft.nthreads + ) + else: + {.error: "Not implemented".} + +func apply*[In, Out]( + dct: DCTDesc, + descOut: var DataDesc[Out], + descIn: DataDesc[In], + ) {.inline.} = + dct( + descIn.shape, + descIn.stride, + descOut.stride, + dct.axes, + dct.dctType, + descIn.buf, + descOut.buf, + dct.scalingFactor, + dct.ortho, + dct.nthreads + ) + +# Sanity checks +# ------------------------------------------------------ + +when isMainModule: + + block: # https://docs.scipy.org/doc/scipy/reference/tutorial/fft.html#id10 + let dIn = @[1.0, 2.0, 1.0, -1.0, 1.5] + var dOut = newSeq[Complex[float64]](dIn.len) + + let dInDesc = DataDesc[float64].init( + dIn[0].unsafeAddr, [dIn.len] + ) + var dOutDesc = DataDesc[Complex[float64]].init( + dOut[0].addr, [dOut.len] + ) + + let fft = FFTDesc[float64].init( + axes = [0], + forward = true + ) + + fft.apply(dOutDesc, dInDesc) + echo dIn + echo dOut + + block: # https://docs.scipy.org/doc/scipy/reference/generated/scipy.fft.dct.html + let dIn = @[4.0, 3.0, 5.0, 10.0] + var dOut = newSeq[float64](dIn.len) + + let dInDesc = DataDesc[float64].init( + dIn[0].unsafeAddr, [dIn.len] + ) + var dOutDesc = DataDesc[float64].init( + dOut[0].addr, [dOut.len] + ) + + let dct = DCTDesc[float64].init( + axes = [0] + ) + + dct.apply(dOutDesc, dInDesc) + echo dIn + echo dOut diff --git a/impulse/fft/pocketfft_hdronly.h b/impulse/fft/cpp_pocketfft/pocketfft_hdronly.h similarity index 100% rename from impulse/fft/pocketfft_hdronly.h rename to impulse/fft/cpp_pocketfft/pocketfft_hdronly.h diff --git a/impulse/fft/std_cpp.nim b/impulse/fft/cpp_pocketfft/std_cpp.nim similarity index 100% rename from impulse/fft/std_cpp.nim rename to impulse/fft/cpp_pocketfft/std_cpp.nim diff --git a/impulse/fft/pocketfft.nim b/impulse/fft/pocketfft.nim index 5ed3039..0decbb6 100644 --- a/impulse/fft/pocketfft.nim +++ b/impulse/fft/pocketfft.nim @@ -1,339 +1,12 @@ -# Impulse -# Copyright (c) 2020-Present The SciNim Project -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - std/[strutils, os, complex], - ./std_cpp - -static: doAssert defined(cpp), "This module requires compilation in C++ mode" - -# ############################################################ -# -# PocketFFT wrapper -# -# ############################################################ - -const - pocketFFTPath = currentSourcePath.rsplit(DirSep, 1)[0] - - -{.pragma: pocket, header: pocketFFTPath / "pocketfft_hdronly.h".} - -when compileOption("threads"): - {.localPassC: "-I" & pocketFFTPath.} +when defined(cpp): + ## When building on the C++ backend, uses the header only C++ library, which + ## has some added features. + import cpp_pocketfft/pocketfft + export pocketfft else: - {.localPassC: "-DPOCKETFFT_NO_MULTITHREADING -I" & pocketFFTPath.} - -type - Shape{.importcpp: "pocketfft::shape_t", pocket.} = CppVector[uint] - Stride{.importcpp: "pocketfft::stride_t", pocket.} = CppVector[int] - -proc c2c[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - forward: bool, - dataIn: ptr UncheckedArray[CppComplex[T]], - dataOut: ptr UncheckedArray[CppComplex[T]], - fct: T, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::c2c(@)", pocket, cdecl.} - -proc r2c[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - forward: bool, - dataIn: ptr T or ptr UncheckedArray[T], - dataOut: ptr UncheckedArray[CppComplex[T]], - fct: T, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::r2c(@)", pocket, cdecl.} - -proc c2r[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - forward: bool, - dataIn: ptr UncheckedArray[CppComplex[T]], - dataOut: ptr T or ptr UncheckedArray[T], - fct: T, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::c2r(@)", pocket, cdecl.} - -proc r2r_fftpack[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - real2hermitian: bool, - forward: bool, - dataIn: ptr T or ptr UncheckedArray[T], - dataOut: ptr T or ptr UncheckedArray[T], - fct: T, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::r2r_fftpack(@)", pocket, cdecl.} - -proc r2r_separable_hartley[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - forward: bool, - dataIn: ptr T or ptr UncheckedArray[T], - dataOut: ptr T or ptr UncheckedArray[T], - fct: T, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::r2r_separable_hartley(@)", pocket, cdecl.} - -proc r2r_genuine_hartley[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - forward: bool, - dataIn: ptr T or ptr UncheckedArray[T], - dataOut: ptr T or ptr UncheckedArray[T], - fct: T, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::r2r_genuine_hartley(@)", pocket, cdecl.} - -proc dct[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - dctType: range[1'i32..4'i32], - dataIn: ptr T or ptr UncheckedArray[T], - dataOut: ptr T or ptr UncheckedArray[T], - fct: T, - ortho: bool, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::dct(@)", pocket, cdecl.} - -proc dst[T: SomeFloat]( - shape: Shape, - strideIn: Stride, - strideOut: Stride, - axes: Shape, - dstType: range[1'i32..4'i32], - dataIn: ptr T or ptr UncheckedArray[T], - dataOut: ptr T or ptr UncheckedArray[T], - fct: T, - ortho: bool, - nthreads: uint = 1 - ) {.importcpp: "pocketfft::dst(@)", pocket, cdecl.} - -# High-level API -# ------------------------------------------------------ - -type - DataDesc*[T] = object - ## Descriptor of the data used in or out of the FFT - shape*: Shape - stride*: Stride - buf*: ptr UncheckedArray[T] - - FFTDesc*[T] = object - ## Descriptor of the FFT - axes*: Shape - scalingFactor*: T - nthreads*: uint - forward*: bool - - DCTDesc*[T] = object - axes*: Shape - dctType*: range[1'i32..4'i32] - scalingFactor*: T - nthreads*: uint - ortho*: bool - -func init*[T](_: type DataDesc[T], - buffer: ptr T or ptr UncheckedArray[T], - shape, stride: distinct openArray[SomeInteger] - ): DataDesc[T] {.inline.} = - ## Initialize a description of the input or output data - ## from the `shape`, `stride` and `buffer` passed in. - ## stride is the distance in elements T between elements - ## on the same axis - assert shape.len = stride.len - assert not buffer.isNil - - result.shape = newCppVector[uint](shape.len) - result.stride = newCppVector[int](stride.len) - - for i in 0 ..< shape.len: - result.shape[i] = uint(shape[i]) - result.stride[i] = int(stride[i] * sizeof(T)) - - result.buf = cast[ptr UncheckedArray[T]](buffer) - -func init*[T](_: type DataDesc[T], - buffer: ptr T or ptr UncheckedArray[T], - shape: openArray[SomeInteger], - ): DataDesc[T] {.inline.} = - ## Initialize a description of the input or output data - ## from the `shape` and `buffer` passed in. - ## The data is assumed to be stored in - ## C-contiguous (row-major) format - assert not buffer.isNil - - result.shape = newCppVector[uint](shape.len) - result.stride = newCppVector[int](shape.len) - - var accum = 1 * sizeof(T) - for i in countdown(shape.len-1, 0): - result.stride[i] = int(accum) - accum *= shape[i] - - for i in 0 ..< shape.len: - result.shape[i] = uint(shape[i]) - - result.buf = cast[ptr UncheckedArray[T]](buffer) - -func init*[T](_: type FFTDesc[T], - axes: varargs[int], - forward: bool, - scalingFactor: T = 1, - nthreads = 1 - ): FFTDesc[T] {.inline.} = - ## Initialize a description of the FFT - ## Set `nthreads` to 0 to use all your available cores - ## Multithreading is only implemented for multidimensional FFT - result.axes = newCppVector[uint](axes.len) - for i in 0 ..< axes.len: - result.axes[i] = uint axes[i] - result.scalingFactor = scalingFactor - result.forward = forward - result.nthreads = uint nthreads - -func init*[T](_: type DCTDesc[T], - axes: varargs[int], - dctType: range[1'i32..4'i32] = 2'i32, - ortho = false, - scalingFactor: T = 1, - nthreads = 1 - ): DCTDesc[T] {.inline.} = - ## Initialize a description of the Discrete CosineTransform - ## Set `nthreads` to 0 to use all your available cores - ## Multithreading is only implemented for multidimensional FFT - result.axes = newCppVector[uint](axes.len) - for i in 0 ..< axes.len: - result.axes[i] = uint axes[i] - result.dctType = dctType - result.ortho = ortho - result.scalingFactor = scalingFactor - result.nthreads = uint nthreads - -func apply*[In, Out]( - fft: FFTDesc, - descOut: var DataDesc[Out], - descIn: DataDesc[In], - ) {.inline.} = - when In is Complex and Out is Complex: - c2c( - descIn.shape, - descIn.stride, - descOut.stride, - fft.axes, - fft.forward, - cast[ptr UncheckedArray[CppComplex[In.T]]](descIn.buf), - cast[ptr UncheckedArray[CppComplex[Out.T]]](descOut.buf), - fft.scalingFactor, - fft.nthreads - ) - elif Out is Complex: - r2c( - descIn.shape, - descIn.stride, - descOut.stride, - fft.axes, - fft.forward, - descIn.buf, - cast[ptr UncheckedArray[CppComplex[Out.T]]](descOut.buf), - fft.scalingFactor, - fft.nthreads - ) - elif In is Complex: - c2r( - descIn.shape, - descIn.stride, - descOut.stride, - fft.axes, - fft.forward, - cast[ptr UncheckedArray[CppComplex[Out.T]]](descIn.buf), - descOut.buf, - fft.scalingFactor, - fft.nthreads - ) - else: - {.error: "Not implemented".} - -func apply*[In, Out]( - dct: DCTDesc, - descOut: var DataDesc[Out], - descIn: DataDesc[In], - ) {.inline.} = - dct( - descIn.shape, - descIn.stride, - descOut.stride, - dct.axes, - dct.dctType, - descIn.buf, - descOut.buf, - dct.scalingFactor, - dct.ortho, - dct.nthreads - ) - -# Sanity checks -# ------------------------------------------------------ - -when isMainModule: - - block: # https://docs.scipy.org/doc/scipy/reference/tutorial/fft.html#id10 - let dIn = @[1.0, 2.0, 1.0, -1.0, 1.5] - var dOut = newSeq[Complex[float64]](dIn.len) - - let dInDesc = DataDesc[float64].init( - dIn[0].unsafeAddr, [dIn.len] - ) - var dOutDesc = DataDesc[Complex[float64]].init( - dOut[0].addr, [dOut.len] - ) - - let fft = FFTDesc[float64].init( - axes = [0], - forward = true - ) - - fft.apply(dOutDesc, dInDesc) - echo dIn - echo dOut - - block: # https://docs.scipy.org/doc/scipy/reference/generated/scipy.fft.dct.html - let dIn = @[4.0, 3.0, 5.0, 10.0] - var dOut = newSeq[float64](dIn.len) - - let dInDesc = DataDesc[float64].init( - dIn[0].unsafeAddr, [dIn.len] - ) - var dOutDesc = DataDesc[float64].init( - dOut[0].addr, [dOut.len] - ) - - let dct = DCTDesc[float64].init( - axes = [0] - ) - - dct.apply(dOutDesc, dInDesc) - echo dIn - echo dOut + ## On the C backend uses the regular C PocketFFT implemention. Comes with + ## arraymancer support. + import c_pocketfft/pocketfft + export pocketfft + import c_pocketfft/pocketfft_arraymancer + export pocketfft_arraymancer diff --git a/tests/config.nims b/tests/config.nims new file mode 100644 index 0000000..3bb69f8 --- /dev/null +++ b/tests/config.nims @@ -0,0 +1 @@ +switch("path", "$projectDir/../src") \ No newline at end of file diff --git a/tests/test_fft.nim b/tests/test_fft.nim new file mode 100644 index 0000000..10f3370 --- /dev/null +++ b/tests/test_fft.nim @@ -0,0 +1,114 @@ +import ../impulse/fft/pocketfft +import std / [random, math] + +## The test case in this file is based on the tests part of PocketFFT + +static: doAssert defined(c), "This test must be run on the C backend!" + +const maxlen = 8192 + +proc fill_random(data: var openArray[float]) = + for i in 0 ..< data.len: + data[i] = rand(-0.5 .. 0.5) + +proc errcalc(data, odata: openArray[float], length: int): float = + var + sum = 0.0 + errsum = 0.0 + for m in 0 ..< length: + #echo "Data ", data[m], " vs ", odata[m] + errsum += (data[m]-odata[m])*(data[m]-odata[m]) + sum += odata[m]*odata[m] + result = sqrt(errsum / sum) + +proc errcalc(data, odata: Tensor[float], length: int): float = + result = errcalc(toOpenArray(data.toUnsafeView(), 0, length), + toOpenArray(odata.toUnsafeView(), 0, length), + length) + +proc test_real(): int = + var data: array[maxlen, float] + var odata: array[maxlen, float] + const epsilon = 2e-15 + var ret = 0 + fill_random(odata) + odata[0] = 0.340188 + var errsum = 0.0 + for length in 1 ..< maxlen: + copyMem(data[0].addr, odata[0].addr, length * sizeof(float)) + var plan = make_rfft_plan(length.csize_t) + discard rfft_forward(plan, data[0].addr, 1.0); + discard rfft_backward(plan, data[0].addr, 1.0 / length.float); + destroy_rfft_plan(plan); + var err = errcalc(data, odata, length) + if err > epsilon: + echo "problem at real length ", length, " : ", err + ret = 1 + errsum += err; + echo "errsum: ", errsum + result = ret + +proc test_real_hl(): int = + var data: array[maxlen, float] + var odata: array[maxlen, float] + fill_random(odata) + const epsilon = 2e-15 + var ret = 0 + var errsum = 0.0 + for length in 1 ..< maxlen: + data = odata + fft(toOpenArray(data, 0, length), forward = true) + fft(toOpenArray(data, 0, length), forward = false) + var err = errcalc(data, odata, length) + if err > epsilon: + echo "problem at real length ", length, " : ", err + ret = 1 + errsum += err; + echo "errsum: ", errsum + result = ret + +proc test_real_hl_seq(): int = + const epsilon = 2e-15 + var ret = 0 + var errsum = 0.0 + for length in 1 ..< maxlen: + var data: seq[float] + var odata = newSeq[float](length) + fill_random(odata) + data = odata + + fft(data, forward = true) + fft(data, forward = false) + + let err = errcalc(data, odata, length) + if err > epsilon: + echo "problem at real length ", length, " : ", err + ret = 1 + errsum += err; + echo "errsum: ", errsum + result = ret + +proc test_real_hl_tensor(): int = + const epsilon = 2e-15 + var ret = 0 + var errsum = 0.0 + for length in 1 ..< maxlen: + var data: Tensor[float] + var odata = randomTensor[float](length, -0.5 .. 0.5) + data = odata.clone() + + fft(data, forward = true) + fft(data, forward = false) + + let err = errcalc(data, odata, length) + if err > epsilon: + echo "problem at real length ", length, " : ", err + ret = 1 + errsum += err; + echo "errsum: ", errsum + result = ret + +doAssert test_real() == 0 +doAssert test_real_hl() == 0 +doAssert test_real_hl_seq() == 0 +doAssert test_real_hl_tensor() == 0 diff --git a/tests/test_fft2.nim b/tests/test_fft2.nim new file mode 100644 index 0000000..b5fe9ab --- /dev/null +++ b/tests/test_fft2.nim @@ -0,0 +1,15 @@ +import ../impulse/fft +import unittest + +suite "FFT": + test "Misc tests": + # Written by @AngelEzquerra + let expected = complex([6, -2, -2, -2].toTensor.asType(float), [0, 2, 0, -2].toTensor.asType(float)) + check fft(arange(4.0).asType(Complex64)) == expected + check fft(arange(4.0).asType(float)) == expected + check fft(arange(4.0).asType(float), forward = true) == expected + check ifft(fft(arange(4.0))).real == arange(4.0) + check ifft(fft(arange(4.0))).imag == [0.0, 0.0, 0.0, 0.0].toTensor + check fft(arange(4.0), normalize=nkForward) == expected /. complex(expected.len.float) + check fft(arange(4.0), normalize=nkOrtho) == expected /. complex(sqrt(expected.len.float)) + check ifft(arange(4.0)) == fft(arange(4.0), forward=false)