Mercurial > hg > pa-neon
comparison sconv_neon.c @ 0:e0040ee59c3c
import
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Thu, 12 Jan 2012 17:27:46 +0100 |
| parents | |
| children | b829afbea564 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e0040ee59c3c |
|---|---|
| 1 /* | |
| 2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> | |
| 3 */ | |
| 4 | |
| 5 #include <stdlib.h> | |
| 6 #include <stdio.h> | |
| 7 #include <stdarg.h> | |
| 8 #include <string.h> | |
| 9 #include <math.h> | |
| 10 #include <sys/time.h> | |
| 11 #include <assert.h> | |
| 12 | |
| 13 typedef short int16_t; | |
| 14 typedef void (*pa_convert_func_t)(unsigned n, const void *a, void *b); | |
| 15 typedef long long unsigned int pa_usec_t; | |
| 16 | |
| 17 #define pa_assert(x) assert(x) | |
| 18 | |
| 19 #define PA_CLAMP_UNLIKELY(x, low, high) \ | |
| 20 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) | |
| 21 | |
| 22 static void pa_log_info(const char *format, ...) { | |
| 23 va_list ap; | |
| 24 char buf[1024]; | |
| 25 va_start(ap, format); | |
| 26 vsprintf(buf, format, ap); | |
| 27 printf("%s\n", buf); | |
| 28 va_end(ap); | |
| 29 } | |
| 30 | |
| 31 #define pa_log_debug pa_log_info | |
| 32 | |
| 33 static pa_usec_t pa_rtclock_now() { | |
| 34 struct timeval tv; | |
| 35 gettimeofday(&tv, NULL); | |
| 36 | |
| 37 return tv.tv_sec * 1000000ULL + tv.tv_usec; | |
| 38 } | |
| 39 | |
| 40 #if defined(__arm__) | |
| 41 | |
| 42 #include "arm_neon.h" | |
| 43 | |
| 44 void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) { | |
| 45 pa_assert(a); | |
| 46 pa_assert(b); | |
| 47 | |
| 48 for (; n > 0; n--) { | |
| 49 float v = *(a++); | |
| 50 | |
| 51 v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f); | |
| 52 *(b++) = (int16_t) lrintf(v * 0x7FFF); | |
| 53 } | |
| 54 } | |
| 55 | |
| 56 void pa_sconv_s16le_from_f32ne_neon(unsigned n, const float *a, int16_t *b) { | |
| 57 unsigned i; | |
| 58 | |
| 59 const float32x4_t plusone4 = vdupq_n_f32(1.0f); | |
| 60 const float32x4_t minusone4 = vdupq_n_f32(-1.0f); | |
| 61 const float32x4_t half4 = vdupq_n_f32(0.5f); | |
| 62 const float32x4_t scale4 = vdupq_n_f32(32767.0f); | |
| 63 const uint32x4_t mask4 = vdupq_n_u32(0x80000000); | |
| 64 | |
| 65 for (i = 0; i < n/4; i++) { | |
| 66 float32x4_t v4 = ((float32x4_t *)a)[i]; | |
| 67 v4 = vmulq_f32(vmaxq_f32(vminq_f32(v4, plusone4) , minusone4), scale4); | |
| 68 | |
| 69 const float32x4_t w4 = vreinterpretq_f32_u32(vorrq_u32(vandq_u32( | |
| 70 vreinterpretq_u32_f32(v4), mask4), vreinterpretq_u32_f32(half4))); | |
| 71 | |
| 72 ((int16x4_t *)b)[i] = vmovn_s32(vcvtq_s32_f32(vaddq_f32(v4, w4))); | |
| 73 } | |
| 74 | |
| 75 // leftovers | |
| 76 for (i = n & ~3; i < n; i++) { | |
| 77 b[i] = (int16_t) lrintf(PA_CLAMP_UNLIKELY(a[i], -1.0f, 1.0f) * 0x7FFF); | |
| 78 } | |
| 79 } | |
| 80 | |
| 81 void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *a, float *b) { | |
| 82 pa_assert(a); | |
| 83 pa_assert(b); | |
| 84 | |
| 85 for (; n > 0; n--) | |
| 86 *(b++) = ((float) (*(a++)))/(float) 0x7FFF; | |
| 87 } | |
| 88 | |
| 89 void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *a, float *b) { | |
| 90 unsigned i; | |
| 91 | |
| 92 const float32x4_t invscale4 = vdupq_n_f32(1.0f / 0x7FFF); | |
| 93 | |
| 94 for (i = 0; i < n/4; i++) { | |
| 95 ((float32x4_t *)b)[i] = vmulq_f32(vcvtq_f32_s32(vmovl_s16(((int16x4_t *)a)[i])), invscale4); | |
| 96 } | |
| 97 | |
| 98 // leftovers | |
| 99 const float invscale = 1.0f / 0x7FFF; | |
| 100 for (i = n & ~3; i < n; i++) { | |
| 101 b[i] = a[i] * invscale; | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 #define SAMPLES 1019 | |
| 106 #define TIMES 300 | |
| 107 | |
| 108 static void run_test_from(void) { | |
| 109 int16_t samples[SAMPLES]; | |
| 110 int16_t samples_ref[SAMPLES]; | |
| 111 float floats[SAMPLES]; | |
| 112 int i; | |
| 113 pa_usec_t start, stop; | |
| 114 pa_convert_func_t func; | |
| 115 | |
| 116 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); | |
| 117 | |
| 118 memset(samples_ref, 0, sizeof(samples_ref)); | |
| 119 memset(samples, 0, sizeof(samples)); | |
| 120 | |
| 121 for (i = 0; i < SAMPLES; i++) { | |
| 122 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); | |
| 123 } | |
| 124 | |
| 125 func = (pa_convert_func_t) pa_sconv_s16le_from_float32ne; | |
| 126 func(SAMPLES, floats, samples_ref); | |
| 127 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | |
| 128 | |
| 129 for (i = 0; i < SAMPLES; i++) { | |
| 130 if (abs(samples[i] - samples_ref[i]) > 0) { | |
| 131 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], | |
| 132 floats[i]); | |
| 133 } | |
| 134 } | |
| 135 | |
| 136 start = pa_rtclock_now(); | |
| 137 for (i = 0; i < TIMES; i++) { | |
| 138 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | |
| 139 } | |
| 140 stop = pa_rtclock_now(); | |
| 141 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
| 142 | |
| 143 start = pa_rtclock_now(); | |
| 144 for (i = 0; i < TIMES; i++) { | |
| 145 func(SAMPLES, floats, samples_ref); | |
| 146 } | |
| 147 stop = pa_rtclock_now(); | |
| 148 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
| 149 } | |
| 150 | |
| 151 static void run_test_to(void) { | |
| 152 int16_t samples[SAMPLES]; | |
| 153 float floats[SAMPLES]; | |
| 154 float floats_ref[SAMPLES]; | |
| 155 int i; | |
| 156 pa_usec_t start, stop; | |
| 157 pa_convert_func_t func; | |
| 158 | |
| 159 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); | |
| 160 | |
| 161 memset(floats_ref, 0, sizeof(floats_ref)); | |
| 162 memset(floats, 0, sizeof(float)); | |
| 163 | |
| 164 for (i = 0; i < SAMPLES; i++) { | |
| 165 samples[i] = rand() - RAND_MAX/2; | |
| 166 } | |
| 167 | |
| 168 func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne; | |
| 169 func(SAMPLES, samples, floats_ref); | |
| 170 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | |
| 171 | |
| 172 for (i = 0; i < SAMPLES; i++) { | |
| 173 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { | |
| 174 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], | |
| 175 samples[i]); | |
| 176 } | |
| 177 } | |
| 178 | |
| 179 start = pa_rtclock_now(); | |
| 180 for (i = 0; i < TIMES; i++) { | |
| 181 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | |
| 182 } | |
| 183 stop = pa_rtclock_now(); | |
| 184 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
| 185 | |
| 186 start = pa_rtclock_now(); | |
| 187 for (i = 0; i < TIMES; i++) { | |
| 188 func(SAMPLES, samples, floats_ref); | |
| 189 } | |
| 190 stop = pa_rtclock_now(); | |
| 191 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
| 192 } | |
| 193 | |
| 194 #endif /* defined(__arm__) */ | |
| 195 | |
| 196 int main() { | |
| 197 | |
| 198 run_test_from(); | |
| 199 run_test_to(); | |
| 200 | |
| 201 return EXIT_SUCCESS; | |
| 202 } |
