Mercurial > hg > audiostuff
annotate spandsp-0.0.6pre17/src/vector_float.c @ 6:22a74b01a099 default tip
implement more meaningful test program
author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
---|---|
date | Fri, 25 Jun 2010 16:14:50 +0200 |
parents | 26cd8f1ef0b1 |
children |
rev | line source |
---|---|
4
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
1 /* |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
2 * SpanDSP - a series of DSP components for telephony |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
3 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
4 * vector_float.c - Floating vector arithmetic routines. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
5 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
6 * Written by Steve Underwood <steveu@coppice.org> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
7 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
8 * Copyright (C) 2006 Steve Underwood |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
9 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
10 * All rights reserved. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
11 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
12 * This program is free software; you can redistribute it and/or modify |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
13 * it under the terms of the GNU Lesser General Public License version 2.1, |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
14 * as published by the Free Software Foundation. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
15 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
16 * This program is distributed in the hope that it will be useful, |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
19 * GNU Lesser General Public License for more details. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
20 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
21 * You should have received a copy of the GNU Lesser General Public |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
22 * License along with this program; if not, write to the Free Software |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
24 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
25 * $Id: vector_float.c,v 1.22 2009/07/12 09:23:09 steveu Exp $ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
26 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
27 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
28 /*! \file */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
29 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
30 #if defined(HAVE_CONFIG_H) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
31 #include "config.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
32 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
33 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
34 #include <inttypes.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
35 #include <stdlib.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
36 #include <stdio.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
37 #include <string.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
38 #if defined(HAVE_TGMATH_H) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
39 #include <tgmath.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
40 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
41 #if defined(HAVE_MATH_H) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
42 #include <math.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
43 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
44 #include <assert.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
45 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
46 #include "floating_fudge.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
47 #include "mmx_sse_decs.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
48 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
49 #include "spandsp/telephony.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
50 #include "spandsp/vector_float.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
51 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
52 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
53 SPAN_DECLARE(void) vec_copyf(float z[], const float x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
54 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
55 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
56 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
57 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
58 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
59 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
60 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
61 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
62 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
63 _mm_storeu_ps(z + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
64 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
65 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
66 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
67 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
68 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
69 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
70 z[n - 3] = x[n - 3]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
71 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
72 z[n - 2] = x[n - 2]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
73 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
74 z[n - 1] = x[n - 1]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
75 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
76 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
77 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
78 SPAN_DECLARE(void) vec_copyf(float z[], const float x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
79 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
80 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
81 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
82 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
83 z[i] = x[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
84 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
85 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
86 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
87 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
88 SPAN_DECLARE(void) vec_copy(double z[], const double x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
89 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
90 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
91 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
92 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
93 z[i] = x[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
94 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
95 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
96 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
97 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
98 SPAN_DECLARE(void) vec_copyl(long double z[], const long double x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
99 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
100 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
101 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
102 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
103 z[i] = x[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
104 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
105 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
106 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
107 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
108 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
109 SPAN_DECLARE(void) vec_negatef(float z[], const float x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
110 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
111 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
112 static const uint32_t mask = 0x80000000; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
113 static const float *fmask = (float *) &mask; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
114 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
115 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
116 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
117 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
118 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
119 n2 = _mm_set1_ps(*fmask); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
120 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
121 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
122 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
123 n1 = _mm_xor_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
124 _mm_storeu_ps(z + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
125 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
126 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
127 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
128 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
129 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
130 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
131 z[n - 3] = -x[n - 3]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
132 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
133 z[n - 2] = -x[n - 2]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
134 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
135 z[n - 1] = -x[n - 1]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
136 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
137 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
138 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
139 SPAN_DECLARE(void) vec_negatef(float z[], const float x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
140 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
141 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
142 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
143 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
144 z[i] = -x[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
145 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
146 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
147 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
148 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
149 SPAN_DECLARE(void) vec_negate(double z[], const double x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
150 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
151 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
152 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
153 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
154 z[i] = -x[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
155 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
156 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
157 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
158 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
159 SPAN_DECLARE(void) vec_negatel(long double z[], const long double x[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
160 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
161 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
162 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
163 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
164 z[i] = -x[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
165 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
166 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
167 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
168 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
169 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
170 SPAN_DECLARE(void) vec_zerof(float z[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
171 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
172 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
173 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
174 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
175 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
176 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
177 n1 = _mm_setzero_ps(); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
178 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
179 _mm_storeu_ps(z + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
180 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
181 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
182 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
183 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
184 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
185 z[n - 3] = 0; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
186 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
187 z[n - 2] = 0; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
188 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
189 z[n - 1] = 0; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
190 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
191 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
192 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
193 SPAN_DECLARE(void) vec_zerof(float z[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
194 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
195 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
196 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
197 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
198 z[i] = 0.0f; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
199 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
200 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
201 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
202 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
203 SPAN_DECLARE(void) vec_zero(double z[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
204 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
205 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
206 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
207 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
208 z[i] = 0.0; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
209 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
210 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
211 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
212 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
213 SPAN_DECLARE(void) vec_zerol(long double z[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
214 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
215 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
216 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
217 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
218 z[i] = 0.0L; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
219 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
220 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
221 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
222 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
223 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
224 SPAN_DECLARE(void) vec_setf(float z[], float x, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
225 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
226 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
227 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
228 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
229 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
230 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
231 n1 = _mm_set1_ps(x); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
232 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
233 _mm_storeu_ps(z + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
234 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
235 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
236 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
237 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
238 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
239 z[n - 3] = x; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
240 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
241 z[n - 2] = x; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
242 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
243 z[n - 1] = x; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
244 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
245 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
246 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
247 SPAN_DECLARE(void) vec_setf(float z[], float x, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
248 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
249 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
250 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
251 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
252 z[i] = x; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
253 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
254 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
255 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
256 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
257 SPAN_DECLARE(void) vec_set(double z[], double x, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
258 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
259 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
260 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
261 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
262 z[i] = x; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
263 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
264 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
265 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
266 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
267 SPAN_DECLARE(void) vec_setl(long double z[], long double x, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
268 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
269 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
270 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
271 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
272 z[i] = x; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
273 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
274 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
275 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
276 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
277 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
278 SPAN_DECLARE(void) vec_addf(float z[], const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
279 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
280 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
281 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
282 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
283 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
284 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
285 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
286 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
287 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
288 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
289 n2 = _mm_loadu_ps(y + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
290 n2 = _mm_add_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
291 _mm_storeu_ps(z + i, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
292 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
293 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
294 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
295 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
296 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
297 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
298 z[n - 3] = x[n - 3] + y[n - 3]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
299 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
300 z[n - 2] = x[n - 2] + y[n - 2]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
301 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
302 z[n - 1] = x[n - 1] + y[n - 1]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
303 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
304 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
305 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
306 SPAN_DECLARE(void) vec_addf(float z[], const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
307 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
308 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
309 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
310 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
311 z[i] = x[i] + y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
312 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
313 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
314 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
315 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
316 SPAN_DECLARE(void) vec_add(double z[], const double x[], const double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
317 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
318 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
319 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
320 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
321 z[i] = x[i] + y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
322 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
323 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
324 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
325 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
326 SPAN_DECLARE(void) vec_addl(long double z[], const long double x[], const long double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
327 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
328 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
329 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
330 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
331 z[i] = x[i] + y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
332 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
333 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
334 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
335 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
336 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
337 SPAN_DECLARE(void) vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
338 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
339 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
340 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
341 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
342 __m128 n3; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
343 __m128 n4; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
344 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
345 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
346 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
347 n3 = _mm_set1_ps(x_scale); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
348 n4 = _mm_set1_ps(y_scale); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
349 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
350 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
351 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
352 n2 = _mm_loadu_ps(y + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
353 n1 = _mm_mul_ps(n1, n3); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
354 n2 = _mm_mul_ps(n2, n4); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
355 n2 = _mm_add_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
356 _mm_storeu_ps(z + i, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
357 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
358 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
359 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
360 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
361 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
362 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
363 z[n - 3] = x[n - 3]*x_scale + y[n - 3]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
364 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
365 z[n - 2] = x[n - 2]*x_scale + y[n - 2]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
366 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
367 z[n - 1] = x[n - 1]*x_scale + y[n - 1]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
368 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
369 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
370 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
371 SPAN_DECLARE(void) vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
372 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
373 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
374 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
375 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
376 z[i] = x[i]*x_scale + y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
377 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
378 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
379 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
380 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
381 SPAN_DECLARE(void) vec_scaledxy_add(double z[], const double x[], double x_scale, const double y[], double y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
382 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
383 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
384 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
385 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
386 z[i] = x[i]*x_scale + y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
387 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
388 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
389 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
390 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
391 SPAN_DECLARE(void) vec_scaledxy_addl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
392 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
393 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
394 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
395 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
396 z[i] = x[i]*x_scale + y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
397 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
398 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
399 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
400 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
401 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
402 SPAN_DECLARE(void) vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
403 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
404 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
405 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
406 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
407 __m128 n3; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
408 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
409 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
410 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
411 n3 = _mm_set1_ps(y_scale); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
412 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
413 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
414 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
415 n2 = _mm_loadu_ps(y + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
416 n2 = _mm_mul_ps(n2, n3); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
417 n2 = _mm_add_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
418 _mm_storeu_ps(z + i, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
419 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
420 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
421 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
422 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
423 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
424 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
425 z[n - 3] = x[n - 3] + y[n - 3]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
426 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
427 z[n - 2] = x[n - 2] + y[n - 2]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
428 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
429 z[n - 1] = x[n - 1] + y[n - 1]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
430 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
431 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
432 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
433 SPAN_DECLARE(void) vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
434 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
435 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
436 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
437 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
438 z[i] = x[i] + y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
439 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
440 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
441 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
442 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
443 SPAN_DECLARE(void) vec_scaledy_add(double z[], const double x[], const double y[], double y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
444 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
445 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
446 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
447 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
448 z[i] = x[i] + y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
449 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
450 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
451 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
452 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
453 SPAN_DECLARE(void) vec_scaledy_addl(long double z[], const long double x[], const long double y[], long double y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
454 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
455 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
456 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
457 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
458 z[i] = x[i] + y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
459 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
460 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
461 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
462 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
463 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
464 SPAN_DECLARE(void) vec_subf(float z[], const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
465 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
466 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
467 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
468 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
469 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
470 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
471 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
472 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
473 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
474 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
475 n2 = _mm_loadu_ps(y + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
476 n2 = _mm_sub_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
477 _mm_storeu_ps(z + i, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
478 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
479 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
480 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
481 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
482 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
483 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
484 z[n - 3] = x[n - 3] - y[n - 3]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
485 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
486 z[n - 2] = x[n - 2] - y[n - 2]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
487 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
488 z[n - 1] = x[n - 1] - y[n - 1]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
489 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
490 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
491 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
492 SPAN_DECLARE(void) vec_subf(float z[], const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
493 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
494 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
495 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
496 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
497 z[i] = x[i] - y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
498 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
499 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
500 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
501 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
502 SPAN_DECLARE(void) vec_sub(double z[], const double x[], const double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
503 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
504 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
505 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
506 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
507 z[i] = x[i] - y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
508 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
509 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
510 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
511 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
512 SPAN_DECLARE(void) vec_subl(long double z[], const long double x[], const long double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
513 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
514 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
515 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
516 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
517 z[i] = x[i] - y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
518 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
519 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
520 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
521 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
522 SPAN_DECLARE(void) vec_scaledxy_subf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
523 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
524 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
525 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
526 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
527 z[i] = x[i]*x_scale - y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
528 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
529 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
530 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
531 SPAN_DECLARE(void) vec_scaledxy_sub(double z[], const double x[], double x_scale, const double y[], double y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
532 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
533 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
534 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
535 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
536 z[i] = x[i]*x_scale - y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
537 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
538 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
539 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
540 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
541 SPAN_DECLARE(void) vec_scaledxy_subl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
542 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
543 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
544 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
545 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
546 z[i] = x[i]*x_scale - y[i]*y_scale; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
547 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
548 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
549 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
550 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
551 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
552 SPAN_DECLARE(void) vec_scalar_mulf(float z[], const float x[], float y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
553 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
554 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
555 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
556 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
557 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
558 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
559 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
560 n2 = _mm_set1_ps(y); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
561 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
562 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
563 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
564 n1 = _mm_mul_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
565 _mm_storeu_ps(z + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
566 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
567 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
568 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
569 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
570 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
571 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
572 z[n - 3] = x[n - 3]*y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
573 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
574 z[n - 2] = x[n - 2]*y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
575 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
576 z[n - 1] = x[n - 1]*y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
577 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
578 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
579 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
580 SPAN_DECLARE(void) vec_scalar_mulf(float z[], const float x[], float y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
581 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
582 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
583 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
584 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
585 z[i] = x[i]*y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
586 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
587 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
588 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
589 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
590 SPAN_DECLARE(void) vec_scalar_mul(double z[], const double x[], double y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
591 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
592 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
593 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
594 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
595 z[i] = x[i]*y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
596 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
597 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
598 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
599 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
600 SPAN_DECLARE(void) vec_scalar_addf(float z[], const float x[], float y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
601 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
602 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
603 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
604 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
605 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
606 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
607 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
608 n2 = _mm_set1_ps(y); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
609 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
610 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
611 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
612 n1 = _mm_add_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
613 _mm_storeu_ps(z + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
614 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
615 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
616 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
617 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
618 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
619 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
620 z[n - 3] = x[n - 3] + y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
621 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
622 z[n - 2] = x[n - 2] + y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
623 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
624 z[n - 1] = x[n - 1] + y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
625 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
626 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
627 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
628 SPAN_DECLARE(void) vec_scalar_addf(float z[], const float x[], float y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
629 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
630 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
631 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
632 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
633 z[i] = x[i] + y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
634 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
635 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
636 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
637 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
638 SPAN_DECLARE(void) vec_scalar_add(double z[], const double x[], double y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
639 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
640 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
641 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
642 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
643 z[i] = x[i] + y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
644 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
645 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
646 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
647 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
648 SPAN_DECLARE(void) vec_scalar_addl(long double z[], const long double x[], long double y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
649 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
650 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
651 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
652 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
653 z[i] = x[i] + y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
654 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
655 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
656 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
657 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
658 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
659 SPAN_DECLARE(void) vec_scalar_subf(float z[], const float x[], float y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
660 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
661 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
662 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
663 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
664 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
665 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
666 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
667 n2 = _mm_set1_ps(y); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
668 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
669 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
670 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
671 n1 = _mm_sub_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
672 _mm_storeu_ps(z + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
673 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
674 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
675 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
676 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
677 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
678 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
679 z[n - 3] = x[n - 3] - y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
680 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
681 z[n - 2] = x[n - 2] - y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
682 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
683 z[n - 1] = x[n - 1] - y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
684 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
685 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
686 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
687 SPAN_DECLARE(void) vec_scalar_subf(float z[], const float x[], float y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
688 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
689 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
690 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
691 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
692 z[i] = x[i] - y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
693 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
694 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
695 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
696 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
697 SPAN_DECLARE(void) vec_scalar_sub(double z[], const double x[], double y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
698 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
699 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
700 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
701 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
702 z[i] = x[i] - y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
703 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
704 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
705 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
706 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
707 SPAN_DECLARE(void) vec_scalar_subl(long double z[], const long double x[], long double y, int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
708 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
709 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
710 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
711 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
712 z[i] = x[i] - y; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
713 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
714 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
715 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
716 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
717 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
718 SPAN_DECLARE(void) vec_mulf(float z[], const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
719 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
720 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
721 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
722 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
723 __m128 n3; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
724 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
725 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
726 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
727 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
728 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
729 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
730 n2 = _mm_loadu_ps(y + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
731 n3 = _mm_mul_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
732 _mm_storeu_ps(z + i, n3); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
733 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
734 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
735 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
736 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
737 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
738 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
739 z[n - 3] = x[n - 3]*y[n - 3]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
740 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
741 z[n - 2] = x[n - 2]*y[n - 2]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
742 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
743 z[n - 1] = x[n - 1]*y[n - 1]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
744 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
745 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
746 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
747 SPAN_DECLARE(void) vec_mulf(float z[], const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
748 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
749 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
750 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
751 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
752 z[i] = x[i]*y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
753 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
754 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
755 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
756 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
757 SPAN_DECLARE(void) vec_mul(double z[], const double x[], const double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
758 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
759 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
760 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
761 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
762 z[i] = x[i]*y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
763 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
764 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
765 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
766 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
767 SPAN_DECLARE(void) vec_mull(long double z[], const long double x[], const long double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
768 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
769 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
770 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
771 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
772 z[i] = x[i]*y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
773 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
774 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
775 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
776 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
777 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
778 SPAN_DECLARE(float) vec_dot_prodf(const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
779 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
780 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
781 float z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
782 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
783 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
784 __m128 n3; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
785 __m128 n4; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
786 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
787 z = 0.0f; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
788 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
789 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
790 n4 = _mm_setzero_ps(); //sets sum to zero |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
791 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
792 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
793 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
794 n2 = _mm_loadu_ps(y + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
795 n3 = _mm_mul_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
796 n4 = _mm_add_ps(n4, n3); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
797 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
798 n4 = _mm_add_ps(_mm_movehl_ps(n4, n4), n4); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
799 n4 = _mm_add_ss(_mm_shuffle_ps(n4, n4, 1), n4); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
800 _mm_store_ss(&z, n4); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
801 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
802 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
803 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
804 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
805 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
806 z += x[n - 3]*y[n - 3]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
807 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
808 z += x[n - 2]*y[n - 2]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
809 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
810 z += x[n - 1]*y[n - 1]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
811 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
812 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
813 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
814 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
815 SPAN_DECLARE(float) vec_dot_prodf(const float x[], const float y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
816 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
817 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
818 float z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
819 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
820 z = 0.0f; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
821 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
822 z += x[i]*y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
823 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
824 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
825 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
826 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
827 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
828 SPAN_DECLARE(double) vec_dot_prod(const double x[], const double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
829 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
830 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
831 double z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
832 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
833 z = 0.0; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
834 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
835 z += x[i]*y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
836 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
837 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
838 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
839 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
840 #if defined(HAVE_LONG_DOUBLE) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
841 SPAN_DECLARE(long double) vec_dot_prodl(const long double x[], const long double y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
842 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
843 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
844 long double z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
845 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
846 z = 0.0L; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
847 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
848 z += x[i]*y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
849 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
850 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
851 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
852 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
853 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
854 SPAN_DECLARE(float) vec_circular_dot_prodf(const float x[], const float y[], int n, int pos) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
855 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
856 float z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
857 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
858 z = vec_dot_prodf(&x[pos], &y[0], n - pos); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
859 z += vec_dot_prodf(&x[0], &y[n - pos], pos); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
860 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
861 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
862 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
863 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
864 #define LMS_LEAK_RATE 0.9999f |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
865 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
866 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
867 SPAN_DECLARE(void) vec_lmsf(const float x[], float y[], int n, float error) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
868 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
869 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
870 __m128 n1; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
871 __m128 n2; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
872 __m128 n3; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
873 __m128 n4; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
874 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
875 if ((i = n & ~3)) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
876 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
877 n3 = _mm_set1_ps(error); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
878 n4 = _mm_set1_ps(LMS_LEAK_RATE); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
879 for (i -= 4; i >= 0; i -= 4) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
880 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
881 n1 = _mm_loadu_ps(x + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
882 n2 = _mm_loadu_ps(y + i); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
883 n1 = _mm_mul_ps(n1, n3); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
884 n2 = _mm_mul_ps(n2, n4); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
885 n1 = _mm_add_ps(n1, n2); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
886 _mm_storeu_ps(y + i, n1); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
887 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
888 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
889 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
890 switch (n & 3) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
891 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
892 case 3: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
893 y[n - 3] = y[n - 3]*LMS_LEAK_RATE + x[n - 3]*error; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
894 case 2: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
895 y[n - 2] = y[n - 2]*LMS_LEAK_RATE + x[n - 2]*error; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
896 case 1: |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
897 y[n - 1] = y[n - 1]*LMS_LEAK_RATE + x[n - 1]*error; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
898 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
899 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
900 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
901 SPAN_DECLARE(void) vec_lmsf(const float x[], float y[], int n, float error) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
902 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
903 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
904 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
905 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
906 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
907 /* Leak a little to tame uncontrolled wandering */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
908 y[i] = y[i]*LMS_LEAK_RATE + x[i]*error; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
909 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
910 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
911 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
912 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
913 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
914 SPAN_DECLARE(void) vec_circular_lmsf(const float x[], float y[], int n, int pos, float error) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
915 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
916 vec_lmsf(&x[pos], &y[0], n - pos, error); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
917 vec_lmsf(&x[0], &y[n - pos], pos, error); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
918 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
919 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
920 /*- End of file ------------------------------------------------------------*/ |