Mercurial > hg > pa-neon
annotate sconv_neon.c @ 5:07763f536182 default tip
ALIGNment support
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Sun, 08 Jul 2012 21:48:08 +0200 |
| parents | e889fd0e7769 |
| children |
| rev | line source |
|---|---|
| 0 | 1 /* |
| 2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> | |
| 3 */ | |
| 4 | |
| 5 #include <stdlib.h> | |
| 6 #include <stdio.h> | |
| 7 #include <stdarg.h> | |
| 8 #include <string.h> | |
| 9 #include <math.h> | |
| 10 #include <sys/time.h> | |
| 11 #include <assert.h> | |
| 12 | |
| 13 typedef short int16_t; | |
| 14 typedef void (*pa_convert_func_t)(unsigned n, const void *a, void *b); | |
| 15 typedef long long unsigned int pa_usec_t; | |
| 16 | |
| 17 #define pa_assert(x) assert(x) | |
| 18 | |
| 19 #define PA_CLAMP_UNLIKELY(x, low, high) \ | |
| 20 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) | |
| 21 | |
| 22 static void pa_log_info(const char *format, ...) { | |
| 23 va_list ap; | |
| 24 char buf[1024]; | |
| 25 va_start(ap, format); | |
| 26 vsprintf(buf, format, ap); | |
| 27 printf("%s\n", buf); | |
| 28 va_end(ap); | |
| 29 } | |
| 30 | |
| 31 #define pa_log_debug pa_log_info | |
| 32 | |
| 33 static pa_usec_t pa_rtclock_now() { | |
| 34 struct timeval tv; | |
| 35 gettimeofday(&tv, NULL); | |
| 36 | |
| 37 return tv.tv_sec * 1000000ULL + tv.tv_usec; | |
| 38 } | |
| 39 | |
| 40 #if defined(__arm__) | |
| 41 | |
| 42 #include "arm_neon.h" | |
| 43 | |
| 3 | 44 void pa_sconv_s16le_from_float32ne(unsigned n, const float *src, int16_t *dst) { |
| 45 pa_assert(src); | |
| 46 pa_assert(dst); | |
| 0 | 47 |
| 48 for (; n > 0; n--) { | |
| 3 | 49 float v = *(src++); |
| 0 | 50 |
| 51 v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f); | |
| 3 | 52 *(dst++) = (int16_t) lrintf(v * 0x7FFF); |
| 0 | 53 } |
| 54 } | |
| 55 | |
| 3 | 56 void pa_sconv_s16le_from_f32ne_neon(unsigned n, const float *src, int16_t *dst) { |
| 57 unsigned i = n & 3; | |
| 0 | 58 |
| 3 | 59 asm volatile ( |
| 60 "mov %[n], %[n], lsr #2\n\t" | |
| 61 "vdup.f32 q2, %[plusone]\n\t" | |
| 62 "vneg.f32 q3, q2\n\t" | |
| 63 "vdup.f32 q4, %[scale]\n\t" | |
| 64 "vdup.u32 q5, %[mask]\n\t" | |
| 65 "vdup.f32 q6, %[half]\n\t" | |
| 66 "1:\n\t" | |
| 67 "vld1.32 {q0}, [%[src]]!\n\t" | |
| 68 "vmin.f32 q0, q0, q2\n\t" /* clamp */ | |
| 69 "vmax.f32 q0, q0, q3\n\t" | |
| 70 "vmul.f32 q0, q0, q4\n\t" /* scale */ | |
| 71 "vand.u32 q1, q0, q5\n\t" | |
| 72 "vorr.u32 q1, q1, q6\n\t" /* round */ | |
| 73 "vadd.f32 q0, q0, q1\n\t" | |
| 74 "vcvt.s32.f32 q0, q0\n\t" /* narrow */ | |
| 75 "vmovn.i32 d0, q0\n\t" | |
| 76 "subs %[n], %[n], #1\n\t" | |
| 77 "vst1.16 {d0}, [%[dst]]!\n\t" | |
| 78 "bgt 1b\n\t" | |
| 79 /* output operands (or input operands that get modified) */ | |
| 80 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
| 81 : [plusone] "r" (1.0f), [scale] "r" (32767.0f), | |
| 82 [half] "r" (0.5f), [mask] "r" (0x80000000) /* input operands */ | |
| 83 : "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6" /* clobber list */ | |
| 84 ); | |
| 0 | 85 |
| 86 // leftovers | |
| 3 | 87 while (i--) { |
| 88 *dst++ = (int16_t) lrintf(PA_CLAMP_UNLIKELY(*src, -1.0f, 1.0f) * 0x7FFF); | |
| 89 src++; | |
| 0 | 90 } |
| 91 } | |
| 92 | |
| 3 | 93 void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *src, float *dst) { |
| 94 pa_assert(src); | |
| 95 pa_assert(dst); | |
| 0 | 96 |
| 97 for (; n > 0; n--) | |
| 3 | 98 *(dst++) = ((float) (*(src++)))/(float) 0x7FFF; |
| 0 | 99 } |
| 100 | |
| 3 | 101 void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *src, float *dst) { |
| 102 unsigned i = n & 3; | |
| 103 | |
| 104 const float invscale = 1.0f / 0x7FFF; | |
| 0 | 105 |
| 3 | 106 asm volatile ( |
| 107 "mov %[n], %[n], lsr #2\n\t" | |
| 108 "vdup.f32 q1, %[invscale]\n\t" | |
| 109 "1:\n\t" | |
| 110 "vld1.16 {d0}, [%[src]]!\n\t" | |
| 111 "vmovl.s16 q0, d0\n\t" | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
112 |
| 3 | 113 "vcvt.f32.s32 q0, q0\n\t" |
| 114 "vmul.f32 q0, q0, q1\n\t" | |
| 0 | 115 |
| 3 | 116 "subs %[n], %[n], #1\n\t" |
| 117 "vst1.32 {q0}, [%[dst]]!\n\t" | |
| 118 "bgt 1b\n\t" | |
| 119 /* output operands (or input operands that get modified) */ | |
| 120 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
| 121 : [invscale] "r" (invscale) /* input operands */ | |
| 122 : "memory", "cc", "q0", "q1" /* clobber list */ | |
| 123 ); | |
| 0 | 124 |
| 125 // leftovers | |
| 3 | 126 while (i--) { |
| 127 *dst++ = *src++ * invscale; | |
| 0 | 128 } |
| 129 } | |
| 130 | |
| 131 #define SAMPLES 1019 | |
| 3 | 132 #define TIMES 100000 |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
133 #define ALIGN 1 |
| 0 | 134 |
| 135 static void run_test_from(void) { | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
136 int16_t samples[SAMPLES+ALIGN]; |
|
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
137 int16_t samples_ref[SAMPLES+ALIGN]; |
|
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
138 float floats[SAMPLES+ALIGN]; |
| 0 | 139 int i; |
| 140 pa_usec_t start, stop; | |
| 141 | |
| 142 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); | |
| 143 | |
| 144 memset(samples_ref, 0, sizeof(samples_ref)); | |
| 145 memset(samples, 0, sizeof(samples)); | |
| 146 | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
147 for (i = 0; i < SAMPLES+ALIGN; i++) { |
| 0 | 148 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); |
| 149 } | |
| 150 | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
151 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
|
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
152 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
| 0 | 153 |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
154 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
| 0 | 155 if (abs(samples[i] - samples_ref[i]) > 0) { |
| 156 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], | |
| 157 floats[i]); | |
| 158 } | |
| 159 } | |
| 160 | |
| 161 start = pa_rtclock_now(); | |
| 162 for (i = 0; i < TIMES; i++) { | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
163 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
| 0 | 164 } |
| 165 stop = pa_rtclock_now(); | |
| 166 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
| 167 | |
| 168 start = pa_rtclock_now(); | |
| 169 for (i = 0; i < TIMES; i++) { | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
170 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
| 0 | 171 } |
| 172 stop = pa_rtclock_now(); | |
| 173 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
| 174 } | |
| 175 | |
| 176 static void run_test_to(void) { | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
177 int16_t samples[SAMPLES+ALIGN]; |
|
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
178 float floats[SAMPLES+ALIGN]; |
|
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
179 float floats_ref[SAMPLES+ALIGN]; |
| 0 | 180 int i; |
| 181 pa_usec_t start, stop; | |
| 182 | |
| 183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); | |
| 184 | |
| 185 memset(floats_ref, 0, sizeof(floats_ref)); | |
| 186 memset(floats, 0, sizeof(float)); | |
| 187 | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
188 for (i = 0; i < SAMPLES+ALIGN; i++) { |
| 0 | 189 samples[i] = rand() - RAND_MAX/2; |
| 190 } | |
| 191 | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
192 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
|
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
193 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
| 0 | 194 |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
195 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
| 0 | 196 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { |
| 197 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], | |
| 198 samples[i]); | |
| 199 } | |
| 200 } | |
| 201 | |
| 202 start = pa_rtclock_now(); | |
| 203 for (i = 0; i < TIMES; i++) { | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
204 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
| 0 | 205 } |
| 206 stop = pa_rtclock_now(); | |
| 207 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
| 208 | |
| 209 start = pa_rtclock_now(); | |
| 210 for (i = 0; i < TIMES; i++) { | |
|
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
211 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
| 0 | 212 } |
| 213 stop = pa_rtclock_now(); | |
| 214 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
| 215 } | |
| 216 | |
| 217 #endif /* defined(__arm__) */ | |
| 218 | |
| 219 int main() { | |
| 220 | |
| 221 run_test_from(); | |
| 222 run_test_to(); | |
| 223 | |
| 224 return EXIT_SUCCESS; | |
| 225 } |
