Mercurial > hg > pa-neon
comparison remap_neon.c @ 2:09ee6a01a3d3
new
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Wed, 04 Jul 2012 15:24:08 +0200 |
| parents | b829afbea564 |
| children | e889fd0e7769 |
comparison
equal
deleted
inserted
replaced
| 1:b829afbea564 | 2:09ee6a01a3d3 |
|---|---|
| 186 default: | 186 default: |
| 187 pa_assert_not_reached(); | 187 pa_assert_not_reached(); |
| 188 } | 188 } |
| 189 } | 189 } |
| 190 | 190 |
| 191 | |
| 192 | |
| 193 static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { | 191 static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { |
| 194 unsigned i; | 192 unsigned i; |
| 195 | 193 |
| 196 switch (*m->format) { | 194 switch (*m->format) { |
| 197 case PA_SAMPLE_FLOAT32NE: | 195 case PA_SAMPLE_FLOAT32NE: |
| 233 default: | 231 default: |
| 234 pa_assert_not_reached(); | 232 pa_assert_not_reached(); |
| 235 } | 233 } |
| 236 } | 234 } |
| 237 | 235 |
| 238 | |
| 239 #if defined(__arm__) | 236 #if defined(__arm__) |
| 240 | 237 |
| 241 #include "arm_neon.h" | 238 #include "arm_neon.h" |
| 242 | 239 |
| 243 void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { | 240 static void mono_to_stereo_float_neon_a8(float *dst, const float *src, unsigned n) { |
| 244 unsigned i; | 241 int i = n & 3; |
| 242 | |
| 243 asm volatile ( | |
| 244 "mov %[n], %[n], lsr #2\n\t" | |
| 245 "1:\n\t" | |
| 246 "vld1.32 {q0}, [%[src]]!\n\t" | |
| 247 "vmov q1, q0\n\t" | |
| 248 "subs %[n], %[n], #1\n\t" | |
| 249 "vst2.32 {q0,q1}, [%[dst]]!\n\t" | |
| 250 "bgt 1b\n\t" | |
| 251 // output operands (or input operands that get modified) | |
| 252 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
| 253 : // input operands | |
| 254 : "memory", "cc" // clobber list | |
| 255 ); | |
| 256 | |
| 257 while (i--) { | |
| 258 dst[0] = dst[1] = src[0]; | |
| 259 src++; | |
| 260 dst += 2; | |
| 261 } | |
| 262 } | |
| 263 | |
| 264 static void mono_to_stereo_float_neon_a9(float *dst, const float *src, unsigned n) { | |
| 265 int i = n & 1; | |
| 266 | |
| 267 asm volatile ( | |
| 268 "mov %[n], %[n], lsr #1\n\t" | |
| 269 "1:\n\t" | |
| 270 "ldm %[src]!, {r4,r6}\n\t" | |
| 271 "mov r5, r4\n\t" | |
| 272 "mov r7, r6\n\t" | |
| 273 "subs %[n], %[n], #1\n\t" | |
| 274 "stm %[dst]!, {r4-r7}\n\t" | |
| 275 "bgt 1b\n\t" | |
| 276 // output operands (or input operands that get modified) | |
| 277 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
| 278 : // input operands | |
| 279 : "memory", "cc", "r4", "r5", "r6", "r7" // clobber list | |
| 280 ); | |
| 281 | |
| 282 while (i--) { | |
| 283 dst[0] = dst[1] = src[0]; | |
| 284 src++; | |
| 285 dst += 2; | |
| 286 } | |
| 287 } | |
| 288 | |
| 289 static void mono_to_stereo_int16_neon(int16_t *dst, const int16_t *src, unsigned n) { | |
| 290 int i = n & 7; | |
| 291 | |
| 292 asm volatile ( | |
| 293 "mov %[n], %[n], lsr #3\n\t" | |
| 294 "1:\n\t" | |
| 295 "vld1.16 {q0}, [%[src]]!\n\t" | |
| 296 "vmov q1, q0\n\t" | |
| 297 "subs %[n], %[n], #1\n\t" | |
| 298 "vst2.16 {q0,q1}, [%[dst]]!\n\t" | |
| 299 "bgt 1b\n\t" | |
| 300 // output operands (or input operands that get modified) | |
| 301 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
| 302 : // input operands | |
| 303 : "memory", "cc" // clobber list | |
| 304 ); | |
| 305 | |
| 306 while (i--) { | |
| 307 dst[0] = dst[1] = src[0]; | |
| 308 src++; | |
| 309 dst += 2; | |
| 310 } | |
| 311 } | |
| 312 | |
| 313 static void remap_mono_to_stereo_neon_a9(pa_remap_t *m, void *dst, const void *src, unsigned n) { | |
| 245 switch (*m->format) { | 314 switch (*m->format) { |
| 246 case PA_SAMPLE_FLOAT32NE: | 315 case PA_SAMPLE_FLOAT32NE: |
| 247 { | 316 mono_to_stereo_float_neon_a9(dst, src, n); |
| 248 float *d = (float *) dst, *s = (float *) src; | 317 break; |
| 249 | |
| 250 for (i = 0; i < n/4; i++) { | |
| 251 float32x4x2_t stereo; | |
| 252 stereo.val[0] = vld1q_f32(s); | |
| 253 stereo.val[1] = stereo.val[0]; | |
| 254 vst2q_f32(d, stereo); | |
| 255 s += 4; | |
| 256 d += 8; | |
| 257 } | |
| 258 | |
| 259 for (i = n & ~3; i < n; i++) { | |
| 260 d[0] = d[1] = s[0]; | |
| 261 s++; | |
| 262 d += 2; | |
| 263 } | |
| 264 break; | |
| 265 } | |
| 266 case PA_SAMPLE_S16NE: | 318 case PA_SAMPLE_S16NE: |
| 267 { | 319 mono_to_stereo_int16_neon(dst, src, n); |
| 268 int16_t *d = (int16_t *) dst, *s = (int16_t *) src; | 320 break; |
| 269 | |
| 270 for (i = 0; i < n/8; i++) { | |
| 271 int16x8x2_t stereo; | |
| 272 stereo.val[0] = vld1q_s16(s); | |
| 273 stereo.val[1] = stereo.val[0]; | |
| 274 vst2q_s16(d, stereo); | |
| 275 s += 8; | |
| 276 d += 16; | |
| 277 } | |
| 278 | |
| 279 for (i = n & ~7; i < n; i++) { | |
| 280 d[0] = d[1] = s[0]; | |
| 281 s++; | |
| 282 d += 2; | |
| 283 } | |
| 284 break; | |
| 285 } | |
| 286 default: | 321 default: |
| 287 pa_assert_not_reached(); | 322 pa_assert_not_reached(); |
| 288 } | 323 } |
| 289 } | 324 } |
| 290 | 325 |
| 291 /* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */ | 326 static void remap_mono_to_stereo_neon_a8(pa_remap_t *m, void *dst, const void *src, unsigned n) { |
| 292 static __attribute__ ((noinline)) void stereo_to_mono_float(float *d, const float *s, unsigned n) { | 327 switch (*m->format) { |
| 293 unsigned i; | 328 case PA_SAMPLE_FLOAT32NE: |
| 294 | 329 mono_to_stereo_float_neon_a8(dst, src, n); |
| 295 for (i = 0; i < n/4; i++) { | 330 break; |
| 296 float32x4x2_t stereo = vld2q_f32(s); | 331 case PA_SAMPLE_S16NE: |
| 297 float32x4_t mono = vaddq_f32(stereo.val[0], stereo.val[1]); | 332 mono_to_stereo_int16_neon(dst, src, n); |
| 298 vst1q_f32(d, mono); | 333 break; |
| 299 s += 8; | 334 default: |
| 300 d += 4; | 335 pa_assert_not_reached(); |
| 301 } | 336 } |
| 302 for (i = n & ~3; i < n; i++) { | 337 } |
| 303 d[0] = s[0] + s[1]; | 338 |
| 304 s += 2; | 339 static void stereo_to_mono_float_neon(float *dst, const float *src, unsigned n) { |
| 305 d++; | 340 int i = n & 3; |
| 306 } | 341 |
| 307 } | 342 asm volatile ( |
| 308 | 343 "mov %[n], %[n], lsr #2\n\t" |
| 309 /* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */ | 344 "1:\n\t" |
| 310 static __attribute__ ((noinline)) void stereo_to_mono_int16(int16_t *d, const int16_t *s, unsigned n) { | 345 "vld2.32 {q0,q1}, [%[src]]!\n\t" |
| 311 unsigned int i; | 346 "vadd.f32 q0, q0, q1\n\t" |
| 312 | 347 "subs %[n], %[n], #1\n\t" |
| 313 for (i = 0; i < n/8; i++) { | 348 "vst1.32 {q0}, [%[dst]]!\n\t" |
| 314 int16x8x2_t stereo = vld2q_s16(s); | 349 "bgt 1b\n\t" |
| 315 int16x8_t mono = vaddq_s16(stereo.val[0], stereo.val[1]); | 350 // output operands (or input operands that get modified) |
| 316 vst1q_s16(d, mono); | 351 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) |
| 317 s += 16; | 352 : // input operands |
| 318 d += 8; | 353 : "memory", "cc" // clobber list |
| 319 } | 354 ); |
| 320 for (i = n & ~7; i < n; i++) { | 355 |
| 321 d[0] = s[0] + s[1]; | 356 while (i--) { |
| 322 s += 2; | 357 dst[0] = src[0] + src[1]; |
| 323 d++; | 358 src += 2; |
| 359 dst++; | |
| 360 } | |
| 361 } | |
| 362 | |
| 363 static void stereo_to_mono_int16_neon(int16_t *dst, const int16_t *src, unsigned n) { | |
| 364 int i = n & 7; | |
| 365 | |
| 366 asm volatile ( | |
| 367 "mov %[n], %[n], lsr #3\n\t" | |
| 368 "1:\n\t" | |
| 369 "vld2.16 {q0,q1}, [%[src]]!\n\t" | |
| 370 "vadd.s16 q0, q0, q1\n\t" | |
| 371 "subs %[n], %[n], #1\n\t" | |
| 372 "vst1.16 {q0}, [%[dst]]!\n\t" | |
| 373 "bgt 1b\n\t" | |
| 374 // output operands (or input operands that get modified) | |
| 375 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
| 376 : // input operands | |
| 377 : "memory", "cc" // clobber list | |
| 378 ); | |
| 379 | |
| 380 while (i--) { | |
| 381 dst[0] = src[0] + src[1]; | |
| 382 src += 2; | |
| 383 dst++; | |
| 324 } | 384 } |
| 325 } | 385 } |
| 326 | 386 |
| 327 static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { | 387 static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { |
| 328 switch (*m->format) { | 388 switch (*m->format) { |
| 329 case PA_SAMPLE_FLOAT32NE: | 389 case PA_SAMPLE_FLOAT32NE: |
| 330 stereo_to_mono_float(dst, src, n); | 390 stereo_to_mono_float_neon(dst, src, n); |
| 331 break; | 391 break; |
| 332 case PA_SAMPLE_S16NE: | 392 case PA_SAMPLE_S16NE: |
| 333 stereo_to_mono_int16(dst, src, n); | 393 stereo_to_mono_int16_neon(dst, src, n); |
| 334 break; | 394 break; |
| 335 default: | 395 default: |
| 336 pa_assert_not_reached(); | 396 pa_assert_not_reached(); |
| 337 } | 397 } |
| 338 } | 398 } |
| 399 | |
| 339 #define SAMPLES 1019 | 400 #define SAMPLES 1019 |
| 340 #define TIMES 10000 | 401 #define TIMES 500000 |
| 341 | 402 |
| 342 static void run_test_mono_to_stereo_float(void) { | 403 static void run_test_mono_to_stereo_float(void) { |
| 343 float stereo[2*SAMPLES]; | 404 float stereo_a9[2*SAMPLES]; |
| 405 float stereo_a8[2*SAMPLES]; | |
| 344 float stereo_ref[2*SAMPLES]; | 406 float stereo_ref[2*SAMPLES]; |
| 345 float stereo_gen[2*SAMPLES]; | 407 float stereo_gen[2*SAMPLES]; |
| 346 float mono[SAMPLES]; | 408 float mono[SAMPLES]; |
| 347 int i; | 409 int i; |
| 348 pa_usec_t start, stop; | 410 pa_usec_t start, stop; |
| 349 pa_sample_format_t sf; | 411 pa_sample_format_t sf; |
| 350 pa_sample_spec iss, oss; | 412 pa_sample_spec iss, oss; |
| 351 pa_remap_t remap; | 413 pa_remap_t remap; |
| 352 | 414 |
| 353 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); | 415 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); |
| 354 | 416 |
| 355 memset(stereo_ref, 0, sizeof(stereo_ref)); | 417 memset(stereo_ref, 0, sizeof(stereo_ref)); |
| 356 memset(stereo, 0, sizeof(stereo)); | 418 memset(stereo_gen, 0, sizeof(stereo_gen)); |
| 419 memset(stereo_a9, 0, sizeof(stereo_a9)); | |
| 420 memset(stereo_a8, 0, sizeof(stereo_a8)); | |
| 357 | 421 |
| 358 for (i = 0; i < SAMPLES; i++) { | 422 for (i = 0; i < SAMPLES; i++) { |
| 359 mono[i] = rand()/(float) RAND_MAX - 0.5f; | 423 mono[i] = rand()/(float) RAND_MAX - 0.5f; |
| 360 } | 424 } |
| 361 | 425 |
| 368 remap.i_ss = &iss; | 432 remap.i_ss = &iss; |
| 369 remap.o_ss = &oss; | 433 remap.o_ss = &oss; |
| 370 remap.map_table_f[0][0] = 1.0; | 434 remap.map_table_f[0][0] = 1.0; |
| 371 remap.map_table_f[1][0] = 1.0; | 435 remap.map_table_f[1][0] = 1.0; |
| 372 | 436 |
| 437 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); | |
| 438 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
| 373 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 439 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
| 374 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 440 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
| 375 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
| 376 | 441 |
| 377 for (i = 0; i < 2*SAMPLES; i++) { | 442 for (i = 0; i < 2*SAMPLES; i++) { |
| 378 if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) { | 443 if (fabsf(stereo_a9[i] - stereo_ref[i]) > 0.00001) { |
| 379 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i], | 444 pa_log_debug("NEON/A9 %d: %.3f != %.3f (%.3f)", i, stereo_a9[i], stereo_ref[i], |
| 380 mono[i/2]); | 445 mono[i/2]); |
| 381 } | 446 } |
| 382 } | 447 } |
| 383 for (i = 0; i < 2*SAMPLES; i++) { | 448 for (i = 0; i < 2*SAMPLES; i++) { |
| 384 if (fabsf(stereo[i] - stereo_gen[i]) > 0.00001) { | 449 if (fabsf(stereo_a8[i] - stereo_ref[i]) > 0.00001) { |
| 385 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_gen[i], | 450 pa_log_debug("NEON/A8 %d: %.3f != %.3f (%.3f)", i, stereo_a8[i], stereo_ref[i], |
| 386 mono[i/2]); | 451 mono[i/2]); |
| 387 } | 452 } |
| 388 } | 453 } |
| 389 | 454 for (i = 0; i < 2*SAMPLES; i++) { |
| 390 start = pa_rtclock_now(); | 455 if (fabsf(stereo_gen[i] - stereo_ref[i]) > 0.00001) { |
| 391 for (i = 0; i < TIMES; i++) { | 456 pa_log_debug("generic %d: %.3f != %.3f (%.3f)", i, stereo_gen[i], stereo_ref[i], |
| 392 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | 457 mono[i/2]); |
| 393 } | 458 } |
| 394 stop = pa_rtclock_now(); | 459 } |
| 395 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
| 396 | 460 |
| 397 start = pa_rtclock_now(); | 461 start = pa_rtclock_now(); |
| 398 for (i = 0; i < TIMES; i++) { | 462 for (i = 0; i < TIMES; i++) { |
| 399 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 463 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
| 400 } | 464 } |
| 401 stop = pa_rtclock_now(); | 465 stop = pa_rtclock_now(); |
| 402 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 466 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
| 467 | |
| 468 start = pa_rtclock_now(); | |
| 469 for (i = 0; i < TIMES; i++) { | |
| 470 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); | |
| 471 } | |
| 472 stop = pa_rtclock_now(); | |
| 473 pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); | |
| 474 | |
| 475 start = pa_rtclock_now(); | |
| 476 for (i = 0; i < TIMES; i++) { | |
| 477 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
| 478 } | |
| 479 stop = pa_rtclock_now(); | |
| 480 pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); | |
| 403 | 481 |
| 404 start = pa_rtclock_now(); | 482 start = pa_rtclock_now(); |
| 405 for (i = 0; i < TIMES; i++) { | 483 for (i = 0; i < TIMES; i++) { |
| 406 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 484 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
| 407 } | 485 } |
| 408 stop = pa_rtclock_now(); | 486 stop = pa_rtclock_now(); |
| 409 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 487 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
| 410 } | 488 } |
| 411 | 489 |
| 412 static void run_test_stereo_to_mono_float(void) { | 490 static void run_test_stereo_to_mono_float(void) { |
| 413 float stereo[2*SAMPLES]; | 491 float stereo[2*SAMPLES]; |
| 414 float mono_ref[SAMPLES]; | 492 float mono_ref[SAMPLES]; |
| 454 start = pa_rtclock_now(); | 532 start = pa_rtclock_now(); |
| 455 for (i = 0; i < TIMES; i++) { | 533 for (i = 0; i < TIMES; i++) { |
| 456 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); | 534 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); |
| 457 } | 535 } |
| 458 stop = pa_rtclock_now(); | 536 stop = pa_rtclock_now(); |
| 459 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 537 pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
| 460 | 538 |
| 461 start = pa_rtclock_now(); | 539 start = pa_rtclock_now(); |
| 462 for (i = 0; i < TIMES; i++) { | 540 for (i = 0; i < TIMES; i++) { |
| 463 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); | 541 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); |
| 464 } | 542 } |
| 465 stop = pa_rtclock_now(); | 543 stop = pa_rtclock_now(); |
| 466 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 544 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
| 467 | 545 |
| 468 start = pa_rtclock_now(); | 546 start = pa_rtclock_now(); |
| 469 for (i = 0; i < TIMES; i++) { | 547 for (i = 0; i < TIMES; i++) { |
| 470 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); | 548 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); |
| 471 } | 549 } |
| 472 stop = pa_rtclock_now(); | 550 stop = pa_rtclock_now(); |
| 473 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 551 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
| 474 } | 552 } |
| 475 | 553 |
| 476 static void run_test_mono_to_stereo_s16(void) { | 554 static void run_test_mono_to_stereo_s16(void) { |
| 477 int16_t stereo[2*SAMPLES]; | 555 int16_t stereo_a9[2*SAMPLES]; |
| 556 int16_t stereo_a8[2*SAMPLES]; | |
| 478 int16_t stereo_ref[2*SAMPLES]; | 557 int16_t stereo_ref[2*SAMPLES]; |
| 479 int16_t stereo_gen[2*SAMPLES]; | 558 int16_t stereo_gen[2*SAMPLES]; |
| 480 int16_t mono[SAMPLES]; | 559 int16_t mono[SAMPLES]; |
| 481 int i; | 560 int i; |
| 482 pa_usec_t start, stop; | 561 pa_usec_t start, stop; |
| 485 pa_remap_t remap; | 564 pa_remap_t remap; |
| 486 | 565 |
| 487 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); | 566 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); |
| 488 | 567 |
| 489 memset(stereo_ref, 0, sizeof(stereo_ref)); | 568 memset(stereo_ref, 0, sizeof(stereo_ref)); |
| 490 memset(stereo, 0, sizeof(stereo)); | 569 memset(stereo_a9, 0, sizeof(stereo_a9)); |
| 570 memset(stereo_a8, 0, sizeof(stereo_a8)); | |
| 571 memset(stereo_gen, 0, sizeof(stereo_gen)); | |
| 491 | 572 |
| 492 for (i = 0; i < SAMPLES; i++) { | 573 for (i = 0; i < SAMPLES; i++) { |
| 493 mono[i] = rand() - RAND_MAX/2; | 574 mono[i] = rand() - RAND_MAX/2; |
| 494 } | 575 } |
| 495 | 576 |
| 499 iss.channels = 1; | 580 iss.channels = 1; |
| 500 oss.format = PA_SAMPLE_S16NE; | 581 oss.format = PA_SAMPLE_S16NE; |
| 501 oss.channels = 2; | 582 oss.channels = 2; |
| 502 remap.i_ss = &iss; | 583 remap.i_ss = &iss; |
| 503 remap.o_ss = &oss; | 584 remap.o_ss = &oss; |
| 504 remap.map_table_f[0][0] = 1.0; | 585 remap.map_table_i[0][0] = 0x10000; |
| 505 remap.map_table_f[1][0] = 1.0; | 586 remap.map_table_i[1][0] = 0x10000; |
| 506 | 587 |
| 507 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 588 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
| 508 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 589 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
| 509 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | 590 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); |
| 591 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
| 510 | 592 |
| 511 for (i = 0; i < 2*SAMPLES; i++) { | 593 for (i = 0; i < 2*SAMPLES; i++) { |
| 512 if (abs(stereo[i] - stereo_ref[i]) > 0) { | 594 if (abs(stereo_a9[i] - stereo_ref[i]) > 0) { |
| 513 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i], | 595 pa_log_debug("NEON/A9 %d: %d != %d (%d)", i, stereo_a9[i], stereo_ref[i], |
| 514 mono[i/2]); | 596 mono[i/2]); |
| 515 } | 597 } |
| 516 } | 598 } |
| 517 | |
| 518 for (i = 0; i < 2*SAMPLES; i++) { | 599 for (i = 0; i < 2*SAMPLES; i++) { |
| 519 if (abs(stereo[i] - stereo_gen[i]) > 0) { | 600 if (abs(stereo_a8[i] - stereo_ref[i]) > 0) { |
| 520 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_gen[i], | 601 pa_log_debug("NEON/A8 %d: %d != %d (%d)", i, stereo_a8[i], stereo_ref[i], |
| 521 mono[i/2]); | 602 mono[i/2]); |
| 522 } | 603 } |
| 523 } | 604 } |
| 524 | 605 |
| 525 start = pa_rtclock_now(); | 606 for (i = 0; i < 2*SAMPLES; i++) { |
| 526 for (i = 0; i < TIMES; i++) { | 607 if (abs(stereo_gen[i] - stereo_ref[i]) > 0) { |
| 527 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | 608 pa_log_debug("generic %d: %d != %d (%d)", i, stereo_gen[i], stereo_ref[i], |
| 528 } | 609 mono[i/2]); |
| 529 stop = pa_rtclock_now(); | 610 } |
| 530 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 611 } |
| 612 | |
| 613 start = pa_rtclock_now(); | |
| 614 for (i = 0; i < TIMES; i++) { | |
| 615 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); | |
| 616 } | |
| 617 stop = pa_rtclock_now(); | |
| 618 pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); | |
| 619 | |
| 620 start = pa_rtclock_now(); | |
| 621 for (i = 0; i < TIMES; i++) { | |
| 622 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
| 623 } | |
| 624 stop = pa_rtclock_now(); | |
| 625 pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); | |
| 531 | 626 |
| 532 start = pa_rtclock_now(); | 627 start = pa_rtclock_now(); |
| 533 for (i = 0; i < TIMES; i++) { | 628 for (i = 0; i < TIMES; i++) { |
| 534 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 629 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
| 535 } | 630 } |
| 536 stop = pa_rtclock_now(); | 631 stop = pa_rtclock_now(); |
| 537 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 632 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
| 538 | 633 |
| 539 start = pa_rtclock_now(); | 634 start = pa_rtclock_now(); |
| 540 for (i = 0; i < TIMES; i++) { | 635 for (i = 0; i < TIMES; i++) { |
| 541 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 636 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
| 542 } | 637 } |
| 543 stop = pa_rtclock_now(); | 638 stop = pa_rtclock_now(); |
| 544 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 639 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
| 545 } | 640 } |
| 546 | 641 |
| 547 static void run_test_stereo_to_mono_s16(void) { | 642 static void run_test_stereo_to_mono_s16(void) { |
| 548 int16_t stereo[2*SAMPLES]; | 643 int16_t stereo[2*SAMPLES]; |
| 549 int16_t mono_ref[SAMPLES]; | 644 int16_t mono_ref[SAMPLES]; |
| 556 pa_remap_t remap; | 651 pa_remap_t remap; |
| 557 | 652 |
| 558 pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES); | 653 pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES); |
| 559 | 654 |
| 560 memset(mono_ref, 0, sizeof(mono_ref)); | 655 memset(mono_ref, 0, sizeof(mono_ref)); |
| 656 memset(mono_gen, 0, sizeof(mono_gen)); | |
| 561 memset(mono, 0, sizeof(mono)); | 657 memset(mono, 0, sizeof(mono)); |
| 562 | 658 |
| 563 for (i = 0; i < 2*SAMPLES; i++) { | 659 for (i = 0; i < 2*SAMPLES; i++) { |
| 564 stereo[i] = rand() - RAND_MAX/2; | 660 stereo[i] = rand() - RAND_MAX/2; |
| 565 } | 661 } |
| 570 iss.channels = 2; | 666 iss.channels = 2; |
| 571 oss.format = PA_SAMPLE_S16NE; | 667 oss.format = PA_SAMPLE_S16NE; |
| 572 oss.channels = 1; | 668 oss.channels = 1; |
| 573 remap.i_ss = &iss; | 669 remap.i_ss = &iss; |
| 574 remap.o_ss = &oss; | 670 remap.o_ss = &oss; |
| 575 remap.map_table_f[0][0] = 1.0; | 671 remap.map_table_i[0][0] = 0x10000; |
| 576 remap.map_table_f[0][1] = 1.0; | 672 remap.map_table_i[0][1] = 0x10000; |
| 577 | 673 |
| 578 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); | 674 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); |
| 579 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); | 675 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); |
| 580 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); | 676 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); |
| 581 | 677 |
| 595 start = pa_rtclock_now(); | 691 start = pa_rtclock_now(); |
| 596 for (i = 0; i < TIMES; i++) { | 692 for (i = 0; i < TIMES; i++) { |
| 597 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); | 693 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); |
| 598 } | 694 } |
| 599 stop = pa_rtclock_now(); | 695 stop = pa_rtclock_now(); |
| 600 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 696 pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
| 601 | 697 |
| 602 start = pa_rtclock_now(); | 698 start = pa_rtclock_now(); |
| 603 for (i = 0; i < TIMES; i++) { | 699 for (i = 0; i < TIMES; i++) { |
| 604 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); | 700 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); |
| 605 } | 701 } |
| 606 stop = pa_rtclock_now(); | 702 stop = pa_rtclock_now(); |
| 607 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 703 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
| 608 | 704 |
| 609 start = pa_rtclock_now(); | 705 start = pa_rtclock_now(); |
| 610 for (i = 0; i < TIMES; i++) { | 706 for (i = 0; i < TIMES; i++) { |
| 611 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); | 707 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); |
| 612 } | 708 } |
| 613 stop = pa_rtclock_now(); | 709 stop = pa_rtclock_now(); |
| 614 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 710 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
| 615 } | 711 } |
| 616 | |
| 617 | 712 |
| 618 #endif /* defined(__arm__) */ | 713 #endif /* defined(__arm__) */ |
| 619 | 714 |
| 620 int main() { | 715 int main() { |
| 621 | |
| 622 run_test_stereo_to_mono_float(); | 716 run_test_stereo_to_mono_float(); |
| 623 run_test_stereo_to_mono_s16(); | 717 run_test_stereo_to_mono_s16(); |
| 624 | 718 |
| 625 run_test_mono_to_stereo_float(); | 719 run_test_mono_to_stereo_float(); |
| 626 run_test_mono_to_stereo_s16(); | 720 run_test_mono_to_stereo_s16(); |
| 627 | 721 |
| 628 | |
| 629 return EXIT_SUCCESS; | 722 return EXIT_SUCCESS; |
| 630 } | 723 } |
