Mercurial > hg > audiostuff
comparison spandsp-0.0.6pre17/src/spandsp/fast_convert.h @ 4:26cd8f1ef0b1
import spandsp-0.0.6pre17
| author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
|---|---|
| date | Fri, 25 Jun 2010 15:50:58 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:c6c5a16ce2f2 | 4:26cd8f1ef0b1 |
|---|---|
| 1 /* | |
| 2 * SpanDSP - a series of DSP components for telephony | |
| 3 * | |
| 4 * fast_convert.h - Quick ways to convert floating point numbers to integers | |
| 5 * | |
| 6 * Written by Steve Underwood <steveu@coppice.org> | |
| 7 * | |
| 8 * Copyright (C) 2009 Steve Underwood | |
| 9 * | |
| 10 * All rights reserved. | |
| 11 * | |
| 12 * This program is free software; you can redistribute it and/or modify | |
| 13 * it under the terms of the GNU Lesser General Public License version 2.1, | |
| 14 * as published by the Free Software Foundation. | |
| 15 * | |
| 16 * This program is distributed in the hope that it will be useful, | |
| 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 19 * GNU Lesser General Public License for more details. | |
| 20 * | |
| 21 * You should have received a copy of the GNU Lesser General Public | |
| 22 * License along with this program; if not, write to the Free Software | |
| 23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
| 24 * | |
| 25 * $Id: fast_convert.h,v 1.9 2009/10/03 04:37:25 steveu Exp $ | |
| 26 */ | |
| 27 | |
| 28 #if !defined(_SPANDSP_FAST_CONVERT_H_) | |
| 29 #define _SPANDSP_FAST_CONVERT_H_ | |
| 30 | |
| 31 #if defined(__cplusplus) | |
| 32 extern "C" | |
| 33 { | |
| 34 #endif | |
| 35 | |
| 36 /* The following code, to handle issues with lrint() and lrintf() on various | |
| 37 * platforms, is adapted from similar code in libsndfile, which is: | |
| 38 * | |
| 39 * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com> | |
| 40 * | |
| 41 * This program is free software; you can redistribute it and/or modify | |
| 42 * it under the terms of the GNU Lesser General Public License as published by | |
| 43 * the Free Software Foundation; either version 2.1 of the License, or | |
| 44 * (at your option) any later version. | |
| 45 * | |
| 46 * This program is distributed in the hope that it will be useful, | |
| 47 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 49 * GNU Lesser General Public License for more details. | |
| 50 */ | |
| 51 | |
| 52 /* | |
| 53 * On Intel Pentium processors (especially PIII and probably P4), converting | |
| 54 * from float to int is very slow. To meet the C specs, the code produced by | |
| 55 * most C compilers targeting Pentium needs to change the FPU rounding mode | |
| 56 * before the float to int conversion is performed. | |
| 57 * | |
| 58 * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It | |
| 59 * is this flushing of the pipeline which is so slow. | |
| 60 * | |
| 61 * Fortunately the ISO C99 specification defines the functions lrint, lrintf, | |
| 62 * llrint and llrintf which fix this problem as a side effect. | |
| 63 * | |
| 64 * On Unix-like systems, the configure process should have detected the | |
| 65 * presence of these functions. If they weren't found we have to replace them | |
| 66 * here with a standard C cast. | |
| 67 */ | |
| 68 | |
| 69 /* | |
| 70 * The C99 prototypes for these functions are as follows: | |
| 71 * | |
| 72 * int rintf(float x); | |
| 73 * int rint(double x); | |
| 74 * long int lrintf(float x); | |
| 75 * long int lrint(double x); | |
| 76 * long long int llrintf(float x); | |
| 77 * long long int llrint(double x); | |
| 78 * | |
| 79 * The presence of the required functions are detected during the configure | |
| 80 * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in | |
| 81 * the config file. | |
| 82 */ | |
| 83 | |
| 84 #if defined(__CYGWIN__) | |
| 85 #if !defined(__cplusplus) | |
| 86 /* | |
| 87 * CYGWIN has lrint and lrintf functions, but they are slow and buggy: | |
| 88 * http://sourceware.org/ml/cygwin/2005-06/msg00153.html | |
| 89 * http://sourceware.org/ml/cygwin/2005-09/msg00047.html | |
| 90 * The latest version of cygwin seems to have made no effort to fix this. | |
| 91 * These replacement functions (pulled from the Public Domain MinGW | |
| 92 * math.h header) replace the native versions. | |
| 93 */ | |
| 94 static __inline__ long int lrint(double x) | |
| 95 { | |
| 96 long int retval; | |
| 97 | |
| 98 __asm__ __volatile__ | |
| 99 ( | |
| 100 "fistpl %0" | |
| 101 : "=m" (retval) | |
| 102 : "t" (x) | |
| 103 : "st" | |
| 104 ); | |
| 105 | |
| 106 return retval; | |
| 107 } | |
| 108 | |
| 109 static __inline__ long int lrintf(float x) | |
| 110 { | |
| 111 long int retval; | |
| 112 | |
| 113 __asm__ __volatile__ | |
| 114 ( | |
| 115 "fistpl %0" | |
| 116 : "=m" (retval) | |
| 117 : "t" (x) | |
| 118 : "st" | |
| 119 ); | |
| 120 return retval; | |
| 121 } | |
| 122 #endif | |
| 123 | |
| 124 /* The fastest way to convert is the equivalent of lrint() */ | |
| 125 static __inline__ long int lfastrint(double x) | |
| 126 { | |
| 127 long int retval; | |
| 128 | |
| 129 __asm__ __volatile__ | |
| 130 ( | |
| 131 "fistpl %0" | |
| 132 : "=m" (retval) | |
| 133 : "t" (x) | |
| 134 : "st" | |
| 135 ); | |
| 136 | |
| 137 return retval; | |
| 138 } | |
| 139 | |
| 140 static __inline__ long int lfastrintf(float x) | |
| 141 { | |
| 142 long int retval; | |
| 143 | |
| 144 __asm__ __volatile__ | |
| 145 ( | |
| 146 "fistpl %0" | |
| 147 : "=m" (retval) | |
| 148 : "t" (x) | |
| 149 : "st" | |
| 150 ); | |
| 151 return retval; | |
| 152 } | |
| 153 #elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590) | |
| 154 | |
| 155 #if defined(__i386__) | |
| 156 /* These routines are guaranteed fast on an i386 machine. Using the built in | |
| 157 lrint() and lrintf() should be similar, but they may not always be enabled. | |
| 158 Sometimes, especially with "-O0", you might get slow calls to routines. */ | |
| 159 static __inline__ long int lfastrint(double x) | |
| 160 { | |
| 161 long int retval; | |
| 162 | |
| 163 __asm__ __volatile__ | |
| 164 ( | |
| 165 "fistpl %0" | |
| 166 : "=m" (retval) | |
| 167 : "t" (x) | |
| 168 : "st" | |
| 169 ); | |
| 170 | |
| 171 return retval; | |
| 172 } | |
| 173 | |
| 174 static __inline__ long int lfastrintf(float x) | |
| 175 { | |
| 176 long int retval; | |
| 177 | |
| 178 __asm__ __volatile__ | |
| 179 ( | |
| 180 "fistpl %0" | |
| 181 : "=m" (retval) | |
| 182 : "t" (x) | |
| 183 : "st" | |
| 184 ); | |
| 185 return retval; | |
| 186 } | |
| 187 #elif defined(__x86_64__) | |
| 188 /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a | |
| 189 double or float to an int. It looks like the design on the x86_64 took account | |
| 190 of the default behaviour specified for C. */ | |
| 191 static __inline__ long int lfastrint(double x) | |
| 192 { | |
| 193 return (long int) (x); | |
| 194 } | |
| 195 | |
| 196 static __inline__ long int lfastrintf(float x) | |
| 197 { | |
| 198 return (long int) (x); | |
| 199 } | |
| 200 #elif defined(__ppc__) || defined(__powerpc__) | |
| 201 static __inline__ long int lfastrint(register double x) | |
| 202 { | |
| 203 int res[2]; | |
| 204 | |
| 205 __asm__ __volatile__ | |
| 206 ( | |
| 207 "fctiw %1, %1\n\t" | |
| 208 "stfd %1, %0" | |
| 209 : "=m" (res) /* Output */ | |
| 210 : "f" (x) /* Input */ | |
| 211 : "memory" | |
| 212 ); | |
| 213 | |
| 214 return res[1]; | |
| 215 } | |
| 216 | |
| 217 static __inline__ long int lfastrintf(register float x) | |
| 218 { | |
| 219 int res[2]; | |
| 220 | |
| 221 __asm__ __volatile__ | |
| 222 ( | |
| 223 "fctiw %1, %1\n\t" | |
| 224 "stfd %1, %0" | |
| 225 : "=m" (res) /* Output */ | |
| 226 : "f" (x) /* Input */ | |
| 227 : "memory" | |
| 228 ); | |
| 229 | |
| 230 return res[1]; | |
| 231 } | |
| 232 #else | |
| 233 /* Fallback routines, for unrecognised platforms */ | |
| 234 static __inline__ long int lfastrint(double x) | |
| 235 { | |
| 236 return (long int) x; | |
| 237 } | |
| 238 | |
| 239 static __inline__ long int lfastrintf(float x) | |
| 240 { | |
| 241 return (long int) x; | |
| 242 } | |
| 243 #endif | |
| 244 | |
| 245 #elif defined(_M_IX86) | |
| 246 /* Visual Studio i386 */ | |
| 247 /* | |
| 248 * Win32 doesn't seem to have the lrint() and lrintf() functions. | |
| 249 * Therefore implement inline versions of these functions here. | |
| 250 */ | |
| 251 | |
| 252 __inline long int lrint(double x) | |
| 253 { | |
| 254 long int i; | |
| 255 | |
| 256 _asm | |
| 257 { | |
| 258 fld x | |
| 259 fistp i | |
| 260 }; | |
| 261 return i; | |
| 262 } | |
| 263 | |
| 264 __inline long int lrintf(float x) | |
| 265 { | |
| 266 long int i; | |
| 267 | |
| 268 _asm | |
| 269 { | |
| 270 fld x | |
| 271 fistp i | |
| 272 }; | |
| 273 return i; | |
| 274 } | |
| 275 | |
| 276 __inline float rintf(float flt) | |
| 277 { | |
| 278 _asm | |
| 279 { fld flt | |
| 280 frndint | |
| 281 } | |
| 282 } | |
| 283 | |
| 284 __inline double rint(double dbl) | |
| 285 { | |
| 286 _asm | |
| 287 { | |
| 288 fld dbl | |
| 289 frndint | |
| 290 } | |
| 291 } | |
| 292 | |
| 293 __inline long int lfastrint(double x) | |
| 294 { | |
| 295 long int i; | |
| 296 | |
| 297 _asm | |
| 298 { | |
| 299 fld x | |
| 300 fistp i | |
| 301 }; | |
| 302 return i; | |
| 303 } | |
| 304 | |
| 305 __inline long int lfastrintf(float x) | |
| 306 { | |
| 307 long int i; | |
| 308 | |
| 309 _asm | |
| 310 { | |
| 311 fld x | |
| 312 fistp i | |
| 313 }; | |
| 314 return i; | |
| 315 } | |
| 316 #elif defined(_M_X64) | |
| 317 /* Visual Studio x86_64 */ | |
| 318 /* x86_64 machines will do best with a simple assignment. */ | |
| 319 #include <intrin.h> | |
| 320 | |
| 321 __inline long int lrint(double x) | |
| 322 { | |
| 323 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) ); | |
| 324 } | |
| 325 | |
| 326 __inline long int lrintf(float x) | |
| 327 { | |
| 328 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) ); | |
| 329 } | |
| 330 | |
| 331 __inline long int lfastrint(double x) | |
| 332 { | |
| 333 return (long int) (x); | |
| 334 } | |
| 335 | |
| 336 __inline long int lfastrintf(float x) | |
| 337 { | |
| 338 return (long int) (x); | |
| 339 } | |
| 340 #elif defined(__MWERKS__) && defined(macintosh) | |
| 341 /* This MacOS 9 solution was provided by Stephane Letz */ | |
| 342 | |
| 343 long int __inline__ lfastrint(register double x) | |
| 344 { | |
| 345 long int res[2]; | |
| 346 | |
| 347 asm | |
| 348 { | |
| 349 fctiw x, x | |
| 350 stfd x, res | |
| 351 } | |
| 352 return res[1]; | |
| 353 } | |
| 354 | |
| 355 long int __inline__ lfastrintf(register float x) | |
| 356 { | |
| 357 long int res[2]; | |
| 358 | |
| 359 asm | |
| 360 { | |
| 361 fctiw x, x | |
| 362 stfd x, res | |
| 363 } | |
| 364 return res[1]; | |
| 365 } | |
| 366 #elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) | |
| 367 /* For Apple Mac OS/X - do recent versions still need this? */ | |
| 368 | |
| 369 static __inline__ long int lfastrint(register double x) | |
| 370 { | |
| 371 int res[2]; | |
| 372 | |
| 373 __asm__ __volatile__ | |
| 374 ( | |
| 375 "fctiw %1, %1\n\t" | |
| 376 "stfd %1, %0" | |
| 377 : "=m" (res) /* Output */ | |
| 378 : "f" (x) /* Input */ | |
| 379 : "memory" | |
| 380 ); | |
| 381 | |
| 382 return res[1]; | |
| 383 } | |
| 384 | |
| 385 static __inline__ long int lfastrintf(register float x) | |
| 386 { | |
| 387 int res[2]; | |
| 388 | |
| 389 __asm__ __volatile__ | |
| 390 ( | |
| 391 "fctiw %1, %1\n\t" | |
| 392 "stfd %1, %0" | |
| 393 : "=m" (res) /* Output */ | |
| 394 : "f" (x) /* Input */ | |
| 395 : "memory" | |
| 396 ); | |
| 397 | |
| 398 return res[1]; | |
| 399 } | |
| 400 #else | |
| 401 /* There is nothing else to do, but use a simple casting operation, instead of a real | |
| 402 rint() type function. Since we are only trying to use rint() to speed up conversions, | |
| 403 the accuracy issues related to changing the rounding scheme are of little concern | |
| 404 to us. */ | |
| 405 | |
| 406 #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun) | |
| 407 #warning "No usable lrint() and lrintf() functions available." | |
| 408 #warning "Replacing these functions with a simple C cast." | |
| 409 #endif | |
| 410 | |
| 411 static __inline__ long int lrint(double x) | |
| 412 { | |
| 413 return (long int) (x); | |
| 414 } | |
| 415 | |
| 416 static __inline__ long int lrintf(float x) | |
| 417 { | |
| 418 return (long int) (x); | |
| 419 } | |
| 420 | |
| 421 static __inline__ long int lfastrint(double x) | |
| 422 { | |
| 423 return (long int) (x); | |
| 424 } | |
| 425 | |
| 426 static __inline__ long int lfastrintf(float x) | |
| 427 { | |
| 428 return (long int) (x); | |
| 429 } | |
| 430 #endif | |
| 431 | |
| 432 #if defined(__cplusplus) | |
| 433 } | |
| 434 #endif | |
| 435 | |
| 436 #endif | |
| 437 | |
| 438 /*- End of file ------------------------------------------------------------*/ |
