#include <stdlib.h>
#include <stdio.h>
#include <float.h>

#include <peck_fft.h>
#include <peck_fftr.h>

void enable_runfast() {
#ifdef __arm__
        static const unsigned int x = 0x04086060;
        static const unsigned int y = 0x03000000;
        int r;
        asm volatile (
                "fmrx   %0, fpscr                       \n\t"   //r0 = FPSCR
                "and    %0, %0, %1                      \n\t"   //r0 = r0 & 0x04086060
                "orr    %0, %0, %2                      \n\t"   //r0 = r0 | 0x03000000
                "fmxr   fpscr, %0                       \n\t"   //FPSCR = r0
                : "=r"(r)
                : "r"(x), "r"(y)
        );
#endif
}

int main(int argc, char *argv[]) {
    unsigned int i, j;
    peck_fftr_cfg p, pi;

    enable_runfast();

    const unsigned int N = 256;

    peck_fft_scalar in[N];
    peck_fft_cpx out[N/2 + 1];
    peck_fft_scalar res[N];

    for (i = 0; i < N; i++) {
#if USE_SIMD == SIMD_SSE2
        in[i] = _mm_set1_ps((i % 13) / 3);
#elif USE_SIMD == SIMD_NEON4
        in[i] = vdupq_n_f32((i % 13) / 3);
#elif USE_SIMD == SIMD_NEON2
        in[i] = vdup_n_f32((i % 13) / 3);
#else
        in[i] = (i % 13) / 3; 
#endif        
    }

    p = peck_fftr_alloc(N, 0, NULL, NULL);
    pi = peck_fftr_alloc(N, 1, NULL, NULL);

    for (j = 0; j < 10000; j++) {
        if (j == 0) {
            for (i = 0; i < 8; i++)
                printf("%d: %f\n", i, *(float*)&in[i]);
            printf("----\n");
        }

        peck_fftr(p, in, out);

        if (j == 0) {
            for (i = 0; i < 8; i++)
                printf("%d: %f %f\n", i, *(float*)&out[i].r, *(float*)&out[i].i);
            printf("----\n");
        }
        
        peck_fftri(pi, out, res);

        if (j == 0) {
            for (i = 0; i < 8; i++)
                printf("%d: %f %f\n", i, ((float*)&res[i])[0] / N, ((float*)&res[i])[1] / N);
        }
    }
    peck_fftr_free(p);    
    peck_fftr_free(pi);    
    peck_fft_cleanup();

    for (i = 0; i < N; i++) {
        if (fabs(((float*)&in[i])[0] - ((float*)&res[i])[0]/N) > 0.00001) {
            fprintf(stderr, "!!!! ERROR0 !!!! at %d\n", i);
            exit(EXIT_FAILURE);
        }
        if (fabs(((float*)&in[i])[1] - ((float*)&res[i])[1]/N) > 0.00001) {
            fprintf(stderr, "!!!! ERROR1 !!!! at %d\n", i);
            exit(EXIT_FAILURE);
        }
    }

    return EXIT_SUCCESS;
}
