spandsp 3.0.0
fast_convert.h
1/*
2 * SpanDSP - a series of DSP components for telephony
3 *
4 * fast_convert.h - Quick ways to convert floating point numbers to integers
5 *
6 * Written by Steve Underwood <steveu@coppice.org>
7 *
8 * Copyright (C) 2009 Steve Underwood
9 *
10 * All rights reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 2.1,
14 * as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 */
25
26#if !defined(_SPANDSP_FAST_CONVERT_H_)
27#define _SPANDSP_FAST_CONVERT_H_
28
29#if defined(__cplusplus)
30extern "C"
31{
32#endif
33
34/* The following code, to handle issues with lrint() and lrintf() on various
35 * platforms, is adapted from similar code in libsndfile, which is:
36 *
37 * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
38 *
39 * This program is free software; you can redistribute it and/or modify
40 * it under the terms of the GNU Lesser General Public License as published by
41 * the Free Software Foundation; either version 2.1 of the License, or
42 * (at your option) any later version.
43 *
44 * This program is distributed in the hope that it will be useful,
45 * but WITHOUT ANY WARRANTY; without even the implied warranty of
46 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47 * GNU Lesser General Public License for more details.
48 */
49
50/*
51 * On Intel Pentium processors (especially PIII and probably P4), converting
52 * from float to int is very slow. To meet the C specs, the code produced by
53 * most C compilers targeting Pentium needs to change the FPU rounding mode
54 * before the float to int conversion is performed.
55 *
56 * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
57 * is this flushing of the pipeline which is so slow.
58 *
59 * Fortunately the ISO C99 specification defines the functions lrint, lrintf,
60 * llrint and llrintf which fix this problem as a side effect.
61 *
62 * On Unix-like systems, the configure process should have detected the
63 * presence of these functions. If they weren't found we have to replace them
64 * here with a standard C cast.
65 */
66
67/*
68 * The C99 prototypes for these functions are as follows:
69 *
70 * int rintf(float x);
71 * int rint(double x);
72 * long int lrintf(float x);
73 * long int lrint(double x);
74 * long long int llrintf(float x);
75 * long long int llrint(double x);
76 *
77 * The presence of the required functions are detected during the configure
78 * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
79 * the config file.
80 */
81
82#if defined(__CYGWIN__)
83#if !defined(__cplusplus) && (__GNUC__ < 4)
84 /*
85 * CYGWIN versions prior to 1.7.1 have lrint and lrintf functions, but
86 * they are slow and buggy:
87 * http://sourceware.org/ml/cygwin/2005-06/msg00153.html
88 * http://sourceware.org/ml/cygwin/2005-09/msg00047.html
89 * These replacement functions (pulled from the Public Domain MinGW
90 * math.h header) replace the native versions.
91 */
92 static __inline__ long int lrint(double x)
93 {
94 long int retval;
95
96 __asm__ __volatile__
97 (
98 "fistpl %0"
99 : "=m" (retval)
100 : "t" (x)
101 : "st"
102 );
103
104 return retval;
105 }
106
107 static __inline__ long int lrintf(float x)
108 {
109 long int retval;
110
111 __asm__ __volatile__
112 (
113 "fistpl %0"
114 : "=m" (retval)
115 : "t" (x)
116 : "st"
117 );
118 return retval;
119 }
120#endif
121
122 /* The fastest way to convert is the equivalent of lrint() */
123 static __inline__ long int lfastrint(double x)
124 {
125 long int retval;
126
127 __asm__ __volatile__
128 (
129 "fistpl %0"
130 : "=m" (retval)
131 : "t" (x)
132 : "st"
133 );
134
135 return retval;
136 }
137
138 static __inline__ long int lfastrintf(float x)
139 {
140 long int retval;
141
142 __asm__ __volatile__
143 (
144 "fistpl %0"
145 : "=m" (retval)
146 : "t" (x)
147 : "st"
148 );
149 return retval;
150 }
151#elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590)
152
153#if defined(__i386__)
154 /* These routines are guaranteed fast on an i386 machine. Using the built in
155 lrint() and lrintf() should be similar, but they may not always be enabled.
156 Sometimes, especially with "-O0", you might get slow calls to routines. */
157 static __inline__ long int lfastrint(double x)
158 {
159 long int retval;
160
161 __asm__ __volatile__
162 (
163 "fistpl %0"
164 : "=m" (retval)
165 : "t" (x)
166 : "st"
167 );
168
169 return retval;
170 }
171
172 static __inline__ long int lfastrintf(float x)
173 {
174 long int retval;
175
176 __asm__ __volatile__
177 (
178 "fistpl %0"
179 : "=m" (retval)
180 : "t" (x)
181 : "st"
182 );
183 return retval;
184 }
185#elif defined(__x86_64__)
186 /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
187 double or float to an int. It looks like the design on the x86_64 took account
188 of the default behaviour specified for C. */
189 static __inline__ long int lfastrint(double x)
190 {
191 return (long int) (x);
192 }
193
194 static __inline__ long int lfastrintf(float x)
195 {
196 return (long int) (x);
197 }
198#elif (defined(__ppc__) || defined(__powerpc__)) && !defined(__NO_FPRS__)
199 static __inline__ long int lfastrint(register double x)
200 {
201 int res[2];
202
203 __asm__ __volatile__
204 (
205 "fctiw %1, %1\n\t"
206 "stfd %1, %0"
207 : "=m" (res) /* Output */
208 : "f" (x) /* Input */
209 : "memory"
210 );
211
212 return res[1];
213 }
214
215 static __inline__ long int lfastrintf(register float x)
216 {
217 int res[2];
218
219 __asm__ __volatile__
220 (
221 "fctiw %1, %1\n\t"
222 "stfd %1, %0"
223 : "=m" (res) /* Output */
224 : "f" (x) /* Input */
225 : "memory"
226 );
227
228 return res[1];
229 }
230#else
231 /* Fallback routines, for unrecognised platforms */
232 static __inline__ long int lfastrint(double x)
233 {
234 return (long int) x;
235 }
236
237 static __inline__ long int lfastrintf(float x)
238 {
239 return (long int) x;
240 }
241#endif
242
243#elif defined(_M_IX86)
244 /* Visual Studio i386 */
245 /*
246 * Win32 doesn't seem to have the lrint() and lrintf() functions.
247 * Therefore implement inline versions of these functions here.
248 */
249
250#if (_MSC_VER < 1800)
251 __inline long int lrint(double x)
252 {
253 long int i;
254
255 _asm
256 {
257 fld x
258 fistp i
259 };
260 return i;
261 }
262
263 __inline long int lrintf(float x)
264 {
265 long int i;
266
267 _asm
268 {
269 fld x
270 fistp i
271 };
272 return i;
273 }
274
275 __inline float rintf(float flt)
276 {
277 _asm
278 {
279 fld flt
280 frndint
281 }
282 }
283
284 __inline double rint(double dbl)
285 {
286 _asm
287 {
288 fld dbl
289 frndint
290 }
291 }
292#endif
293
294 __inline long int lfastrint(double x)
295 {
296 long int i;
297
298 _asm
299 {
300 fld x
301 fistp i
302 };
303 return i;
304 }
305
306 __inline long int lfastrintf(float x)
307 {
308 long int i;
309
310 _asm
311 {
312 fld x
313 fistp i
314 };
315 return i;
316 }
317#elif defined(_M_X64)
318 /* Visual Studio x86_64 */
319 /* x86_64 machines will do best with a simple assignment. */
320#include <intrin.h>
321
322#if (_MSC_VER < 1800)
323 __inline long int lrint(double x)
324 {
325 return (long int)_mm_cvtsd_si64x(_mm_loadu_pd((const double *) &x));
326 }
327
328 __inline long int lrintf(float x)
329 {
330 return _mm_cvt_ss2si(_mm_load_ss((const float *) &x));
331 }
332#endif
333
334 __inline long int lfastrint(double x)
335 {
336 return (long int) (x);
337 }
338
339 __inline long int lfastrintf(float x)
340 {
341 return (long int) (x);
342 }
343#elif defined(__MWERKS__) && defined(macintosh)
344 /* This MacOS 9 solution was provided by Stephane Letz */
345
346 long int __inline__ lfastrint(register double x)
347 {
348 long int res[2];
349
350 asm
351 {
352 fctiw x, x
353 stfd x, res
354 }
355 return res[1];
356 }
357
358 long int __inline__ lfastrintf(register float x)
359 {
360 long int res[2];
361
362 asm
363 {
364 fctiw x, x
365 stfd x, res
366 }
367 return res[1];
368 }
369#elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
370 /* For Apple Mac OS/X - do recent versions still need this? */
371
372 static __inline__ long int lfastrint(register double x)
373 {
374 int res[2];
375
376 __asm__ __volatile__
377 (
378 "fctiw %1, %1\n\t"
379 "stfd %1, %0"
380 : "=m" (res) /* Output */
381 : "f" (x) /* Input */
382 : "memory"
383 );
384
385 return res[1];
386 }
387
388 static __inline__ long int lfastrintf(register float x)
389 {
390 int res[2];
391
392 __asm__ __volatile__
393 (
394 "fctiw %1, %1\n\t"
395 "stfd %1, %0"
396 : "=m" (res) /* Output */
397 : "f" (x) /* Input */
398 : "memory"
399 );
400
401 return res[1];
402 }
403#else
404 /* There is nothing else to do, but use a simple casting operation, instead of a real
405 rint() type function. Since we are only trying to use rint() to speed up conversions,
406 the accuracy issues related to changing the rounding scheme are of little concern
407 to us. */
408
409 #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun)
410 #warning "No usable lrint() and lrintf() functions available."
411 #warning "Replacing these functions with a simple C cast."
412 #endif
413
414 static __inline__ long int lrint(double x)
415 {
416 return (long int) (x);
417 }
418
419 static __inline__ long int lrintf(float x)
420 {
421 return (long int) (x);
422 }
423
424 static __inline__ long int lfastrint(double x)
425 {
426 return (long int) (x);
427 }
428
429 static __inline__ long int lfastrintf(float x)
430 {
431 return (long int) (x);
432 }
433#endif
434
435#if defined(__cplusplus)
436}
437#endif
438
439#endif
440
441/*- End of file ------------------------------------------------------------*/