summaryrefslogtreecommitdiffstats
path: root/lib/ffts/src/ffts_trig.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ffts/src/ffts_trig.c')
-rw-r--r--lib/ffts/src/ffts_trig.c1057
1 files changed, 856 insertions, 201 deletions
diff --git a/lib/ffts/src/ffts_trig.c b/lib/ffts/src/ffts_trig.c
index 74ebfd2..65efa86 100644
--- a/lib/ffts/src/ffts_trig.c
+++ b/lib/ffts/src/ffts_trig.c
@@ -2,7 +2,7 @@
This file is part of FFTS -- The Fastest Fourier Transform in the South
-Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
+Copyright (c) 2015-2016, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
All rights reserved.
@@ -33,193 +33,707 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ffts_trig.h"
#include "ffts_dd.h"
+/*
+* For more information on algorithms:
+*
+* D. Potts, G. Steidl, M. Tasche, Numerical stability of fast
+* trigonometric transforms — a worst case study,
+* J. Concrete Appl. Math. 1 (2003) 1–36
+*
+* O. Buneman, Stable on–line creation of sines and cosines of
+* successive angles, Proc. IEEE 75, 1434 – 1435 (1987).
+*/
+
+/* An union to initialize doubles using byte presentation,
+* and to avoid breaking strict-aliasing rules
+*/
+
+/* TODO: we need macros to take care endianess */
+typedef union ffts_double {
+ int32_t i[2];
+ double d;
+} ffts_double_t;
+
/* 1/(2*cos(pow(2,-p)*pi)) */
-static const FFTS_ALIGN(16) unsigned int half_secant[132] = {
- 0x00000000, 0x3fe00000, 0xc9be45de, 0x3be3bd3c,
- 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c03bd3c,
- 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c23bd3c,
- 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c43bd3c,
- 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c63bd3c,
- 0x00000000, 0x3fe00000, 0xc9be45df, 0x3c83bd3c,
- 0x00000001, 0x3fe00000, 0x4df22efd, 0x3c7de9e6,
- 0x00000005, 0x3fe00000, 0x906e8725, 0xbc60b0cd,
- 0x00000014, 0x3fe00000, 0x906e8357, 0xbc80b0cd,
- 0x0000004f, 0x3fe00000, 0x0dce83c9, 0xbc5619b2,
- 0x0000013c, 0x3fe00000, 0x0dc6e79a, 0xbc7619b2,
- 0x000004ef, 0x3fe00000, 0xe4af1240, 0x3c83cc9b,
- 0x000013bd, 0x3fe00000, 0x2d14c08a, 0x3c7e64df,
- 0x00004ef5, 0x3fe00000, 0x47a85465, 0xbc59b20b,
- 0x00013bd4, 0x3fe00000, 0xab79c897, 0xbc79b203,
- 0x0004ef4f, 0x3fe00000, 0x15019a96, 0x3c79386b,
- 0x0013bd3d, 0x3fe00000, 0x7d6dbf4b, 0xbc7b16b7,
- 0x004ef4f3, 0x3fe00000, 0xf30832e0, 0x3c741ee4,
- 0x013bd3cd, 0x3fe00000, 0xd3bcd4bb, 0xbc83f41e,
- 0x04ef4f34, 0x3fe00000, 0xdd75aebb, 0xbc82ef06,
- 0x13bd3cde, 0x3fe00000, 0xb2b41b3d, 0x3c52d979,
- 0x4ef4f46c, 0x3fe00000, 0x4f0fb458, 0xbc851db3,
- 0x3bd3e0e7, 0x3fe00001, 0x8a0ce3f0, 0x3c58dbab,
- 0xef507722, 0x3fe00004, 0x2a8ec295, 0x3c83e351,
- 0xbd5114f9, 0x3fe00013, 0xc4c0d92d, 0x3c8b3ca4,
- 0xf637de7d, 0x3fe0004e, 0xb74de729, 0x3c45974e,
- 0xe8190891, 0x3fe0013b, 0x26edf4da, 0xbc814c20,
- 0x9436640e, 0x3fe004f0, 0xe2b34b50, 0x3c8091ab,
- 0x9c61d971, 0x3fe013d1, 0x6ce01b8e, 0x3c7f7df7,
- 0xd17cba53, 0x3fe0503e, 0x74ad7633, 0xbc697609,
- 0x7bdb3895, 0x3fe1517a, 0x82f9091b, 0xbc8008d1,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000
+static const FFTS_ALIGN(16) ffts_double_t half_secant[66] = {
+ { { 0x00000000, 0x3fe00000 } }, { { 0xc9be45de, 0x3be3bd3c } },
+ { { 0x00000000, 0x3fe00000 } }, { { 0xc9be45de, 0x3c03bd3c } },
+ { { 0x00000000, 0x3fe00000 } }, { { 0xc9be45de, 0x3c23bd3c } },
+ { { 0x00000000, 0x3fe00000 } }, { { 0xc9be45de, 0x3c43bd3c } },
+ { { 0x00000000, 0x3fe00000 } }, { { 0xc9be45de, 0x3c63bd3c } },
+ { { 0x00000000, 0x3fe00000 } }, { { 0xc9be45df, 0x3c83bd3c } },
+ { { 0x00000001, 0x3fe00000 } }, { { 0x4df22efd, 0x3c7de9e6 } },
+ { { 0x00000005, 0x3fe00000 } }, { { 0x906e8725, 0xbc60b0cd } },
+ { { 0x00000014, 0x3fe00000 } }, { { 0x906e8357, 0xbc80b0cd } },
+ { { 0x0000004f, 0x3fe00000 } }, { { 0x0dce83c9, 0xbc5619b2 } },
+ { { 0x0000013c, 0x3fe00000 } }, { { 0x0dc6e79a, 0xbc7619b2 } },
+ { { 0x000004ef, 0x3fe00000 } }, { { 0xe4af1240, 0x3c83cc9b } },
+ { { 0x000013bd, 0x3fe00000 } }, { { 0x2d14c08a, 0x3c7e64df } },
+ { { 0x00004ef5, 0x3fe00000 } }, { { 0x47a85465, 0xbc59b20b } },
+ { { 0x00013bd4, 0x3fe00000 } }, { { 0xab79c897, 0xbc79b203 } },
+ { { 0x0004ef4f, 0x3fe00000 } }, { { 0x15019a96, 0x3c79386b } },
+ { { 0x0013bd3d, 0x3fe00000 } }, { { 0x7d6dbf4b, 0xbc7b16b7 } },
+ { { 0x004ef4f3, 0x3fe00000 } }, { { 0xf30832e0, 0x3c741ee4 } },
+ { { 0x013bd3cd, 0x3fe00000 } }, { { 0xd3bcd4bb, 0xbc83f41e } },
+ { { 0x04ef4f34, 0x3fe00000 } }, { { 0xdd75aebb, 0xbc82ef06 } },
+ { { 0x13bd3cde, 0x3fe00000 } }, { { 0xb2b41b3d, 0x3c52d979 } },
+ { { 0x4ef4f46c, 0x3fe00000 } }, { { 0x4f0fb458, 0xbc851db3 } },
+ { { 0x3bd3e0e7, 0x3fe00001 } }, { { 0x8a0ce3f0, 0x3c58dbab } },
+ { { 0xef507722, 0x3fe00004 } }, { { 0x2a8ec295, 0x3c83e351 } },
+ { { 0xbd5114f9, 0x3fe00013 } }, { { 0xc4c0d92d, 0x3c8b3ca4 } },
+ { { 0xf637de7d, 0x3fe0004e } }, { { 0xb74de729, 0x3c45974e } },
+ { { 0xe8190891, 0x3fe0013b } }, { { 0x26edf4da, 0xbc814c20 } },
+ { { 0x9436640e, 0x3fe004f0 } }, { { 0xe2b34b50, 0x3c8091ab } },
+ { { 0x9c61d971, 0x3fe013d1 } }, { { 0x6ce01b8e, 0x3c7f7df7 } },
+ { { 0xd17cba53, 0x3fe0503e } }, { { 0x74ad7633, 0xbc697609 } },
+ { { 0x7bdb3895, 0x3fe1517a } }, { { 0x82f9091b, 0xbc8008d1 } },
+ { { 0x00000000, 0x00000000 } }, { { 0x00000000, 0x00000000 } },
+ { { 0x00000000, 0x00000000 } }, { { 0x00000000, 0x00000000 } }
};
/* cos(pow(2,-p)*pi), sin(pow(2,-p)*pi) */
-static const FFTS_ALIGN(16) unsigned int cos_sin_pi_table[264] = {
- 0x00000000, 0x3ff00000, 0x54442d18, 0x3df921fb,
- 0xc9be45de, 0xbbf3bd3c, 0xbb77974f, 0x3a91a390,
- 0x00000000, 0x3ff00000, 0x54442d18, 0x3e0921fb,
- 0xc9be45de, 0xbc13bd3c, 0x54a14928, 0x3aa19bd0,
- 0x00000000, 0x3ff00000, 0x54442d18, 0x3e1921fb,
- 0xc9be45de, 0xbc33bd3c, 0xb948108a, 0x3ab17cce,
- 0x00000000, 0x3ff00000, 0x54442d18, 0x3e2921fb,
- 0xc9be45de, 0xbc53bd3c, 0x4be32e14, 0x3ac100c8,
- 0x00000000, 0x3ff00000, 0x54442d18, 0x3e3921fb,
- 0xc9be45de, 0xbc73bd3c, 0x2c9f4879, 0x3ace215d,
- 0xffffffff, 0x3fefffff, 0x54442d18, 0x3e4921fb,
- 0x6c837443, 0x3c888586, 0x0005f376, 0x3acd411f,
- 0xfffffffe, 0x3fefffff, 0x54442d18, 0x3e5921fb,
- 0x4df22ef1, 0xbc8de9e6, 0x9937209e, 0xbaf7b153,
- 0xfffffff6, 0x3fefffff, 0x54442d16, 0x3e6921fb,
- 0x906e88aa, 0x3c70b0cd, 0xfe19968a, 0xbb03b7c0,
- 0xffffffd9, 0x3fefffff, 0x54442d0e, 0x3e7921fb,
- 0xdf22ed26, 0xbc8e9e64, 0x8d1b6ffb, 0xbaee8bb4,
- 0xffffff62, 0x3fefffff, 0x54442cef, 0x3e8921fb,
- 0x0dd18f0f, 0x3c6619b2, 0x7f2b20fb, 0xbb00e133,
- 0xfffffd88, 0x3fefffff, 0x54442c73, 0x3e9921fb,
- 0x0dd314b2, 0x3c8619b2, 0x619fdf6e, 0xbb174e98,
- 0xfffff621, 0x3fefffff, 0x54442a83, 0x3ea921fb,
- 0x3764acf5, 0x3c8866c8, 0xf5b2407f, 0xbb388215,
- 0xffffd886, 0x3fefffff, 0x544422c2, 0x3eb921fb,
- 0x20e7a944, 0xbc8e64df, 0x7b9b9f23, 0x3b5a0961,
- 0xffff6216, 0x3fefffff, 0x544403c1, 0x3ec921fb,
- 0x52ee25ea, 0x3c69b20e, 0x4df6a86a, 0xbb5999d9,
- 0xfffd8858, 0x3fefffff, 0x544387ba, 0x3ed921fb,
- 0xd8910ead, 0x3c89b20f, 0x0809d04d, 0x3b77d9db,
- 0xfff62162, 0x3fefffff, 0x544197a1, 0x3ee921fb,
- 0x438d3925, 0xbc8937a8, 0xa5d27f7a, 0xbb858b02,
- 0xffd88586, 0x3fefffff, 0x5439d73a, 0x3ef921fb,
- 0x94b3ddd2, 0x3c8b22e4, 0xf8a3b73d, 0xbb863c7f,
- 0xff62161a, 0x3fefffff, 0x541ad59e, 0x3f0921fb,
- 0x7ea469b2, 0xbc835c13, 0xb8cee262, 0x3bae9860,
- 0xfd885867, 0x3fefffff, 0x539ecf31, 0x3f1921fb,
- 0x23a32e63, 0xbc77d556, 0xfcd23a30, 0x3b96b111,
- 0xf621619c, 0x3fefffff, 0x51aeb57c, 0x3f2921fb,
- 0xbbbd8fe6, 0xbc87507d, 0x4916c435, 0xbbca6e1d,
- 0xd8858675, 0x3fefffff, 0x49ee4ea6, 0x3f3921fb,
- 0x54748eab, 0xbc879f0e, 0x744a453e, 0x3bde894d,
- 0x62161a34, 0x3fefffff, 0x2aecb360, 0x3f4921fb,
- 0xb1f9b9c4, 0xbc6136dc, 0x7e566b4c, 0x3be87615,
- 0x88586ee6, 0x3feffffd, 0xaee6472e, 0x3f5921fa,
- 0xf173ae5b, 0x3c81af64, 0x284a9df8, 0xbbfee52e,
- 0x21621d02, 0x3feffff6, 0xbecca4ba, 0x3f6921f8,
- 0xebc82813, 0xbc76acfc, 0x7bcab5b2, 0x3c02ba40,
- 0x858e8a92, 0x3fefffd8, 0xfe670071, 0x3f7921f0,
- 0x1883bcf7, 0x3c8359c7, 0xfe6b7a9b, 0x3bfab967,
- 0x169b92db, 0x3fefff62, 0xfcdec784, 0x3f8921d1,
- 0xc81fbd0d, 0x3c85dda3, 0xbe836d9d, 0x3c29878e,
- 0x6084cd0d, 0x3feffd88, 0xf7a3667e, 0x3f992155,
- 0x4556e4cb, 0xbc81354d, 0x091a0130, 0xbbfb1d63,
- 0xe3796d7e, 0x3feff621, 0xf10dd814, 0x3fa91f65,
- 0x2e24aa15, 0xbc6c57bc, 0x0d569a90, 0xbc2912bd,
- 0xa3d12526, 0x3fefd88d, 0xbc29b42c, 0x3fb917a6,
- 0x378811c7, 0xbc887df6, 0xd26ed688, 0xbc3e2718,
- 0xcff75cb0, 0x3fef6297, 0x3c69a60b, 0x3fc8f8b8,
- 0x2a361fd3, 0x3c756217, 0xb9ff8d82, 0xbc626d19,
- 0xcf328d46, 0x3fed906b, 0xa6aea963, 0x3fd87de2,
- 0x10231ac2, 0x3c7457e6, 0xd3d5a610, 0xbc672ced,
- 0x667f3bcd, 0x3fe6a09e, 0x667f3bcd, 0x3fe6a09e,
- 0x13b26456, 0xbc8bdd34, 0x13b26456, 0xbc8bdd34,
- 0x00000000, 0x00000000, 0x00000000, 0x3ff00000,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000
+static const FFTS_ALIGN(32) ffts_double_t cos_sin_pi_table[132] = {
+ { { 0x00000000, 0x3ff00000 } }, { { 0x54442d18, 0x3df921fb } },
+ { { 0xc9be45de, 0xbbf3bd3c } }, { { 0xbb77974f, 0x3a91a390 } },
+ { { 0x00000000, 0x3ff00000 } }, { { 0x54442d18, 0x3e0921fb } },
+ { { 0xc9be45de, 0xbc13bd3c } }, { { 0x54a14928, 0x3aa19bd0 } },
+ { { 0x00000000, 0x3ff00000 } }, { { 0x54442d18, 0x3e1921fb } },
+ { { 0xc9be45de, 0xbc33bd3c } }, { { 0xb948108a, 0x3ab17cce } },
+ { { 0x00000000, 0x3ff00000 } }, { { 0x54442d18, 0x3e2921fb } },
+ { { 0xc9be45de, 0xbc53bd3c } }, { { 0x4be32e14, 0x3ac100c8 } },
+ { { 0x00000000, 0x3ff00000 } }, { { 0x54442d18, 0x3e3921fb } },
+ { { 0xc9be45de, 0xbc73bd3c } }, { { 0x2c9f4879, 0x3ace215d } },
+ { { 0xffffffff, 0x3fefffff } }, { { 0x54442d18, 0x3e4921fb } },
+ { { 0x6c837443, 0x3c888586 } }, { { 0x0005f376, 0x3acd411f } },
+ { { 0xfffffffe, 0x3fefffff } }, { { 0x54442d18, 0x3e5921fb } },
+ { { 0x4df22ef1, 0xbc8de9e6 } }, { { 0x9937209e, 0xbaf7b153 } },
+ { { 0xfffffff6, 0x3fefffff } }, { { 0x54442d16, 0x3e6921fb } },
+ { { 0x906e88aa, 0x3c70b0cd } }, { { 0xfe19968a, 0xbb03b7c0 } },
+ { { 0xffffffd9, 0x3fefffff } }, { { 0x54442d0e, 0x3e7921fb } },
+ { { 0xdf22ed26, 0xbc8e9e64 } }, { { 0x8d1b6ffb, 0xbaee8bb4 } },
+ { { 0xffffff62, 0x3fefffff } }, { { 0x54442cef, 0x3e8921fb } },
+ { { 0x0dd18f0f, 0x3c6619b2 } }, { { 0x7f2b20fb, 0xbb00e133 } },
+ { { 0xfffffd88, 0x3fefffff } }, { { 0x54442c73, 0x3e9921fb } },
+ { { 0x0dd314b2, 0x3c8619b2 } }, { { 0x619fdf6e, 0xbb174e98 } },
+ { { 0xfffff621, 0x3fefffff } }, { { 0x54442a83, 0x3ea921fb } },
+ { { 0x3764acf5, 0x3c8866c8 } }, { { 0xf5b2407f, 0xbb388215 } },
+ { { 0xffffd886, 0x3fefffff } }, { { 0x544422c2, 0x3eb921fb } },
+ { { 0x20e7a944, 0xbc8e64df } }, { { 0x7b9b9f23, 0x3b5a0961 } },
+ { { 0xffff6216, 0x3fefffff } }, { { 0x544403c1, 0x3ec921fb } },
+ { { 0x52ee25ea, 0x3c69b20e } }, { { 0x4df6a86a, 0xbb5999d9 } },
+ { { 0xfffd8858, 0x3fefffff } }, { { 0x544387ba, 0x3ed921fb } },
+ { { 0xd8910ead, 0x3c89b20f } }, { { 0x0809d04d, 0x3b77d9db } },
+ { { 0xfff62162, 0x3fefffff } }, { { 0x544197a1, 0x3ee921fb } },
+ { { 0x438d3925, 0xbc8937a8 } }, { { 0xa5d27f7a, 0xbb858b02 } },
+ { { 0xffd88586, 0x3fefffff } }, { { 0x5439d73a, 0x3ef921fb } },
+ { { 0x94b3ddd2, 0x3c8b22e4 } }, { { 0xf8a3b73d, 0xbb863c7f } },
+ { { 0xff62161a, 0x3fefffff } }, { { 0x541ad59e, 0x3f0921fb } },
+ { { 0x7ea469b2, 0xbc835c13 } }, { { 0xb8cee262, 0x3bae9860 } },
+ { { 0xfd885867, 0x3fefffff } }, { { 0x539ecf31, 0x3f1921fb } },
+ { { 0x23a32e63, 0xbc77d556 } }, { { 0xfcd23a30, 0x3b96b111 } },
+ { { 0xf621619c, 0x3fefffff } }, { { 0x51aeb57c, 0x3f2921fb } },
+ { { 0xbbbd8fe6, 0xbc87507d } }, { { 0x4916c435, 0xbbca6e1d } },
+ { { 0xd8858675, 0x3fefffff } }, { { 0x49ee4ea6, 0x3f3921fb } },
+ { { 0x54748eab, 0xbc879f0e } }, { { 0x744a453e, 0x3bde894d } },
+ { { 0x62161a34, 0x3fefffff } }, { { 0x2aecb360, 0x3f4921fb } },
+ { { 0xb1f9b9c4, 0xbc6136dc } }, { { 0x7e566b4c, 0x3be87615 } },
+ { { 0x88586ee6, 0x3feffffd } }, { { 0xaee6472e, 0x3f5921fa } },
+ { { 0xf173ae5b, 0x3c81af64 } }, { { 0x284a9df8, 0xbbfee52e } },
+ { { 0x21621d02, 0x3feffff6 } }, { { 0xbecca4ba, 0x3f6921f8 } },
+ { { 0xebc82813, 0xbc76acfc } }, { { 0x7bcab5b2, 0x3c02ba40 } },
+ { { 0x858e8a92, 0x3fefffd8 } }, { { 0xfe670071, 0x3f7921f0 } },
+ { { 0x1883bcf7, 0x3c8359c7 } }, { { 0xfe6b7a9b, 0x3bfab967 } },
+ { { 0x169b92db, 0x3fefff62 } }, { { 0xfcdec784, 0x3f8921d1 } },
+ { { 0xc81fbd0d, 0x3c85dda3 } }, { { 0xbe836d9d, 0x3c29878e } },
+ { { 0x6084cd0d, 0x3feffd88 } }, { { 0xf7a3667e, 0x3f992155 } },
+ { { 0x4556e4cb, 0xbc81354d } }, { { 0x091a0130, 0xbbfb1d63 } },
+ { { 0xe3796d7e, 0x3feff621 } }, { { 0xf10dd814, 0x3fa91f65 } },
+ { { 0x2e24aa15, 0xbc6c57bc } }, { { 0x0d569a90, 0xbc2912bd } },
+ { { 0xa3d12526, 0x3fefd88d } }, { { 0xbc29b42c, 0x3fb917a6 } },
+ { { 0x378811c7, 0xbc887df6 } }, { { 0xd26ed688, 0xbc3e2718 } },
+ { { 0xcff75cb0, 0x3fef6297 } }, { { 0x3c69a60b, 0x3fc8f8b8 } },
+ { { 0x2a361fd3, 0x3c756217 } }, { { 0xb9ff8d82, 0xbc626d19 } },
+ { { 0xcf328d46, 0x3fed906b } }, { { 0xa6aea963, 0x3fd87de2 } },
+ { { 0x10231ac2, 0x3c7457e6 } }, { { 0xd3d5a610, 0xbc672ced } },
+ { { 0x667f3bcd, 0x3fe6a09e } }, { { 0x667f3bcd, 0x3fe6a09e } },
+ { { 0x13b26456, 0xbc8bdd34 } }, { { 0x13b26456, 0xbc8bdd34 } },
+ { { 0x00000000, 0x00000000 } }, { { 0x00000000, 0x3ff00000 } },
+ { { 0x00000000, 0x00000000 } }, { { 0x00000000, 0x00000000 } }
+};
+
+#define COS_SIN_TABLE_SIZE 260
+
+/* cos(pi*k/256), sin(pi*k/256) */
+static const FFTS_ALIGN(32) ffts_double_t cos_sin_table[COS_SIN_TABLE_SIZE] = {
+ { { 0x00000000, 0x3FF00000 } }, { { 0x00000000, 0x00000000 } },
+ { { 0x00000000, 0x00000000 } }, { { 0x00000000, 0x00000000 } },
+ { { 0x169B92DB, 0x3FEFFF62 } }, { { 0xFCDEC784, 0x3F8921D1 } },
+ { { 0xC81FBD0D, 0x3C85DDA3 } }, { { 0xBE836D9D, 0x3C29878E } },
+ { { 0x6084CD0D, 0x3FEFFD88 } }, { { 0xF7A3667E, 0x3F992155 } },
+ { { 0x4556E4CB, 0xBC81354D } }, { { 0x091A0130, 0xBBFB1D63 } },
+ { { 0xEFFEF75D, 0x3FEFFA72 } }, { { 0x759455CD, 0x3FA2D865 } },
+ { { 0xCDB25956, 0xBC88B4CD } }, { { 0x5BA93AC0, 0x3C2686F6 } },
+ { { 0xE3796D7E, 0x3FEFF621 } }, { { 0xF10DD814, 0x3FA91F65 } },
+ { { 0x2E24AA15, 0xBC6C57BC } }, { { 0x0D569A90, 0xBC2912BD } },
+ { { 0x658E71AD, 0x3FEFF095 } }, { { 0x79F820E0, 0x3FAF656E } },
+ { { 0xE18A4B9E, 0x3C801A8C } }, { { 0xE392BFFE, 0xBC22E1EB } },
+ { { 0xAD01883A, 0x3FEFE9CD } }, { { 0x92CE19F6, 0x3FB2D520 } },
+ { { 0xD0C67E35, 0x3C6521EC } }, { { 0xA8BF6B2C, 0xBC49A088 } },
+ { { 0xFCBD5B09, 0x3FEFE1CA } }, { { 0x0A9AA419, 0x3FB5F6D0 } },
+ { { 0x202A884E, 0x3C6A23E3 } }, { { 0xD03F6C9A, 0xBC4F4022 } },
+ { { 0xA3D12526, 0x3FEFD88D } }, { { 0xBC29B42C, 0x3FB917A6 } },
+ { { 0x378811C7, 0xBC887DF6 } }, { { 0xD26ED688, 0xBC3E2718 } },
+ { { 0xFD6DA67B, 0x3FEFCE15 } }, { { 0xC79EC2D5, 0x3FBC3785 } },
+ { { 0x830D4C09, 0xBC75DD6F } }, { { 0xF133FB21, 0xBC24F39D } },
+ { { 0x70E19FD3, 0x3FEFC264 } }, { { 0x56A9730E, 0x3FBF564E } },
+ { { 0x68ECACEE, 0x3C81EC86 } }, { { 0x729AE56D, 0x3C4A2704 } },
+ { { 0x7195D741, 0x3FEFB579 } }, { { 0xCEDAF577, 0x3FC139F0 } },
+ { { 0x7397CC08, 0x3C71BFAC } }, { { 0x4D1B3CFA, 0xBC652343 } },
+ { { 0x7F08A517, 0x3FEFA755 } }, { { 0x6E8E613A, 0x3FC2C810 } },
+ { { 0xCA13571F, 0xBC87A0A8 } }, { { 0xA89A11E0, 0x3C513000 } },
+ { { 0x24C9099B, 0x3FEF97F9 } }, { { 0xB1293E5A, 0x3FC45576 } },
+ { { 0xEEA5963B, 0xBC8E2AE0 } }, { { 0x4119F7B1, 0xBC5285A2 } },
+ { { 0xFA714BA9, 0x3FEF8764 } }, { { 0x448B3FC6, 0x3FC5E214 } },
+ { { 0x778FFCB6, 0x3C7AB256 } }, { { 0x779DDAC6, 0x3C6531FF } },
+ { { 0xA3A12077, 0x3FEF7599 } }, { { 0xDE50BF31, 0x3FC76DD9 } },
+ { { 0xD743195C, 0x3C884F31 } }, { { 0xEC501B2F, 0x3C61D5EE } },
+ { { 0xCFF75CB0, 0x3FEF6297 } }, { { 0x3C69A60B, 0x3FC8F8B8 } },
+ { { 0x2A361FD3, 0x3C756217 } }, { { 0xB9FF8D82, 0xBC626D19 } },
+ { { 0x3B0B2F2D, 0x3FEF4E60 } }, { { 0x25B00451, 0x3FCA82A0 } },
+ { { 0xE695AC05, 0xBC78EE01 } }, { { 0xFFD084AD, 0xBC687905 } },
+ { { 0xAC64E589, 0x3FEF38F3 } }, { { 0x6A7E4F63, 0x3FCC0B82 } },
+ { { 0xB51F72E6, 0xBC7D7BAF } }, { { 0x9E521935, 0xBC1AF143 } },
+ { { 0xF7763ADA, 0x3FEF2252 } }, { { 0xE5454311, 0x3FCD934F } },
+ { { 0x1C8D94AB, 0xBC820CB8 } }, { { 0x277107AD, 0x3C675B92 } },
+ { { 0xFB9230D7, 0x3FEF0A7E } }, { { 0x7B215F1B, 0x3FCF19F9 } },
+ { { 0xDC6B4989, 0x3C752C7A } }, { { 0xF11DA2C4, 0xBC642DEE } },
+ { { 0xA3E473C2, 0x3FEEF178 } }, { { 0x0E37FDAE, 0x3FD04FB8 } },
+ { { 0x67FE774F, 0x3C86310A } }, { { 0xB72583CC, 0xBC0412CD } },
+ { { 0xE7684963, 0x3FEED740 } }, { { 0x62B1F677, 0x3FD111D2 } },
+ { { 0x91F59CC2, 0x3C7E82C7 } }, { { 0x0AB7AA9A, 0x3C7824C2 } },
+ { { 0xC8DF0B74, 0x3FEEBBD8 } }, { { 0x3F4CDB3E, 0x3FD1D344 } },
+ { { 0x615E7277, 0x3C7C6C8C } }, { { 0x1C13519E, 0xBC6720D4 } },
+ { { 0x56C62DDA, 0x3FEE9F41 } }, { { 0x2ED59F06, 0x3FD29406 } },
+ { { 0xE2E3F81E, 0x3C8760B1 } }, { { 0xA2C4612D, 0xBC75D28D } },
+ { { 0xAB4CD10D, 0x3FEE817B } }, { { 0xC2E18152, 0x3FD35410 } },
+ { { 0x686B5E0A, 0xBC7D0AFE } }, { { 0x2F96E062, 0xBC73CB00 } },
+ { { 0xEC48E112, 0x3FEE6288 } }, { { 0x94176601, 0x3FD4135C } },
+ { { 0xF2847754, 0xBC616B56 } }, { { 0x4AFA2518, 0x3C70C97C } },
+ { { 0x4B2BC17E, 0x3FEE426A } }, { { 0x4278E76A, 0x3FD4D1E2 } },
+ { { 0x89744882, 0x3C8A8738 } }, { { 0x18792858, 0x3C624172 } },
+ { { 0x04F686E5, 0x3FEE2121 } }, { { 0x75AB1FDD, 0x3FD58F9A } },
+ { { 0x6C126527, 0xBC8014C7 } }, { { 0xD58CF620, 0xBC1EFDC0 } },
+ { { 0x622DBE2B, 0x3FEDFEAE } }, { { 0xDD3F27C6, 0x3FD64C7D } },
+ { { 0x88425567, 0xBC8514EA } }, { { 0x4A664121, 0x3C510D2B } },
+ { { 0xB6CCC23C, 0x3FEDDB13 } }, { { 0x30FA459F, 0x3FD70885 } },
+ { { 0xC6107DB3, 0x3C883C37 } }, { { 0xE0864C5D, 0xBC744B19 } },
+ { { 0x6238A09B, 0x3FEDB652 } }, { { 0x311DCCE7, 0x3FD7C3A9 } },
+ { { 0xEAE69460, 0xBC7ADEE7 } }, { { 0x1EF3E8D9, 0x3C19A3F2 } },
+ { { 0xCF328D46, 0x3FED906B } }, { { 0xA6AEA963, 0x3FD87DE2 } },
+ { { 0x10231AC2, 0x3C7457E6 } }, { { 0xD3D5A610, 0xBC672CED } },
+ { { 0x73C9E68B, 0x3FED6961 } }, { { 0x63BC93D7, 0x3FD9372A } },
+ { { 0xC6393D55, 0xBC7E8C61 } }, { { 0x9E5AD5B1, 0x3C668431 } },
+ { { 0xD14DC93A, 0x3FED4134 } }, { { 0x43A8ED8A, 0x3FD9EF79 } },
+ { { 0x95D25AF2, 0xBC84EF52 } }, { { 0x290BDBAB, 0x3C66DA81 } },
+ { { 0x743E35DC, 0x3FED17E7 } }, { { 0x2B6D3FCA, 0x3FDAA6C8 } },
+ { { 0x3540130A, 0xBC5101DA } }, { { 0x6EE5CCF7, 0xBC7D5F10 } },
+ { { 0xF43CC773, 0x3FECED7A } }, { { 0x09E15CC0, 0x3FDB5D10 } },
+ { { 0xB5AB58AE, 0xBC5E7B6B } }, { { 0xCB974183, 0x3C65B362 } },
+ { { 0xF3FCFC5C, 0x3FECC1F0 } }, { { 0xD8011EE7, 0x3FDC1249 } },
+ { { 0x3B68F6AB, 0x3C7E5761 } }, { { 0xBB515206, 0xBC7813AA } },
+ { { 0x213411F5, 0x3FEC954B } }, { { 0x9931C45E, 0x3FDCC66E } },
+ { { 0x1E946603, 0xBC52FB76 } }, { { 0x59C37F8F, 0x3C56850E } },
+ { { 0x3488739B, 0x3FEC678B } }, { { 0x5B86E389, 0x3FDD7977 } },
+ { { 0xC7C5FF5B, 0x3C6D86CA } }, { { 0x87BC0575, 0x3C7550EC } },
+ { { 0xF180BDB1, 0x3FEC38B2 } }, { { 0x3806F63B, 0x3FDE2B5D } },
+ { { 0x757C8D07, 0xBC76E0B1 } }, { { 0x1D3C6841, 0x3C5E0D89 } },
+ { { 0x26725549, 0x3FEC08C4 } }, { { 0x52EF78D6, 0x3FDEDC19 } },
+ { { 0xD80E2946, 0x3C5B157F } }, { { 0xC33EDEE6, 0xBC7DD0F7 } },
+ { { 0xAC6F952A, 0x3FEBD7C0 } }, { { 0xDBF89ABA, 0x3FDF8BA4 } },
+ { { 0x32AC700A, 0xBC8825A7 } }, { { 0xC1B776B8, 0xBC32EC1F } },
+ { { 0x673590D2, 0x3FEBA5AA } }, { { 0x874C3EB7, 0x3FE01CFC } },
+ { { 0x370753B6, 0x3C87EA4E } }, { { 0xE7C2368C, 0xBC734A35 } },
+ { { 0x45196E3E, 0x3FEB7283 } }, { { 0x9922FFEE, 0x3FE07387 } },
+ { { 0x324E6D61, 0xBC8BC69F } }, { { 0x4347406C, 0xBC8A5A01 } },
+ { { 0x3EF55712, 0x3FEB3E4D } }, { { 0x4D5D898F, 0x3FE0C970 } },
+ { { 0xBF11A493, 0xBC8EB6B8 } }, { { 0xDE6EE9B2, 0xBC88D3D7 } },
+ { { 0x58150200, 0x3FEB090A } }, { { 0x541B4B23, 0x3FE11EB3 } },
+ { { 0x300FFCCE, 0xBC8926DA } }, { { 0x69ABE4F1, 0xBC8EF23B } },
+ { { 0x9E21D511, 0x3FEAD2BC } }, { { 0x63DEDB49, 0x3FE1734D } },
+ { { 0x07BEA548, 0xBC847FBE } }, { { 0xCCC50575, 0xBC87EEF2 } },
+ { { 0x290EA1A3, 0x3FEA9B66 } }, { { 0x39AE68C8, 0x3FE1C73B } },
+ { { 0xE8B6DAC8, 0x3C39F630 } }, { { 0x267F6600, 0x3C8B25DD } },
+ { { 0x1B02FAE2, 0x3FEA6309 } }, { { 0x9933EB59, 0x3FE21A79 } },
+ { { 0x52248D10, 0xBC7E9111 } }, { { 0x77C68FB2, 0xBC83A7B1 } },
+ { { 0xA0462782, 0x3FEA29A7 } }, { { 0x4CDD12DF, 0x3FE26D05 } },
+ { { 0x015DF175, 0xBC7128BB } }, { { 0x3EF3770C, 0xBC85DA74 } },
+ { { 0xEF29AF94, 0x3FE9EF43 } }, { { 0x25FAF3EA, 0x3FE2BEDB } },
+ { { 0xB60445C2, 0x3C7B1DFC } }, { { 0xC796EE46, 0xBC514981 } },
+ { { 0x47F38741, 0x3FE9B3E0 } }, { { 0xFCE17035, 0x3FE30FF7 } },
+ { { 0x86712474, 0xBC830EE2 } }, { { 0x26F74A6F, 0xBC6EFCC6 } },
+ { { 0xF4C7D742, 0x3FE9777E } }, { { 0xB10659F3, 0x3FE36058 } },
+ { { 0xA240665E, 0xBC815479 } }, { { 0xA35857E7, 0xBC81FCB3 } },
+ { { 0x499263FB, 0x3FE93A22 } }, { { 0x292050B9, 0x3FE3AFFA } },
+ { { 0xA920DF0B, 0x3C83D419 } }, { { 0xE3954964, 0x3C7E3E25 } },
+ { { 0xA3EF940D, 0x3FE8FBCC } }, { { 0x534556D4, 0x3FE3FED9 } },
+ { { 0x9C86F2F1, 0xBC66DFA9 } }, { { 0x608C5061, 0x3C836916 } },
+ { { 0x6B151741, 0x3FE8BC80 } }, { { 0x25091DD6, 0x3FE44CF3 } },
+ { { 0x2ED1336D, 0xBC82C5E1 } }, { { 0x2CFDC6B3, 0x3C68076A } },
+ { { 0x0FBA2EBF, 0x3FE87C40 } }, { { 0x9B9B0939, 0x3FE49A44 } },
+ { { 0x0C3F64CD, 0xBC82DABC } }, { { 0x6D719B94, 0xBC827EE1 } },
+ { { 0x0BFF976E, 0x3FE83B0E } }, { { 0xBBE3E5E9, 0x3FE4E6CA } },
+ { { 0xF8EA3475, 0xBC76F420 } }, { { 0xEDCEB327, 0x3C63C293 } },
+ { { 0xE3571771, 0x3FE7F8EC } }, { { 0x92A35596, 0x3FE53282 } },
+ { { 0xCE93C917, 0xBC89C8D8 } }, { { 0x89DA0257, 0xBC7A12EB } },
+ { { 0x226AAFAF, 0x3FE7B5DF } }, { { 0x348CECA0, 0x3FE57D69 } },
+ { { 0xACDF0AD7, 0xBC70F537 } }, { { 0x992BFBB2, 0xBC875720 } },
+ { { 0x5F037261, 0x3FE771E7 } }, { { 0xBE65018C, 0x3FE5C77B } },
+ { { 0x8D84068F, 0x3C75CFCE } }, { { 0x9C0BC32A, 0x3C8069EA } },
+ { { 0x37EFFF96, 0x3FE72D08 } }, { { 0x551D2CDF, 0x3FE610B7 } },
+ { { 0x0F1D915C, 0x3C80D4EF } }, { { 0x52FF2A37, 0xBC7251B3 } },
+ { { 0x54EAA8AF, 0x3FE6E744 } }, { { 0x25F0783D, 0x3FE65919 } },
+ { { 0xC84E226E, 0xBC8DBC03 } }, { { 0xFBF5DE23, 0x3C8C3D64 } },
+ { { 0x667F3BCD, 0x3FE6A09E } }, { { 0x667F3BCD, 0x3FE6A09E } },
+ { { 0x13B26456, 0xBC8BDD34 } }, { { 0x13B26456, 0xBC8BDD34 } }
};
+/* cos(pi * x), x=[0;1/512] */
+static const FFTS_ALIGN(16) ffts_double_t cos_coeff[3] = {
+ { { 0xC9BE45DE, 0xC013BD3C } },
+ { { 0x081749FA, 0x40103C1F } },
+ { { 0x047EE98B, 0xBFF55D10 } }
+};
+
+/* sin(pi * x), x=[0;1/512] */
+static const FFTS_ALIGN(16) ffts_double_t sin_coeff[4] = {
+ { { 0x54442D18, 0x400921FB } },
+ { { 0xE62154CA, 0xC014ABBC } },
+ { { 0xCEF16BFE, 0x40046675 } },
+ { { 0xADE54A87, 0x40339228 } }
+};
+
+#ifndef M_1_256
+#define M_1_256 3.90625e-3
+#endif
+
+static int
+ffts_cexp_32f64f(size_t n, size_t d, double *out);
+
+/* calculate cos(pi * n / d) and sin(pi * n / d) with maximum error less than 1 ULP, average ~0.5 ULP */
int
-ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size)
+ffts_cexp_32f(size_t n, size_t d, float *output)
{
- double alpha, beta;
- double c[2], s[2];
- double x, z;
- int i;
+ double FFTS_ALIGN(16) z[2];
- if (!table || !table_size) {
+ if (!d || !output)
return -1;
+
+ /* reduction */
+ if (FFTS_UNLIKELY(n >= d))
+ n %= d;
+
+ ffts_cexp_32f64f(n, d, z);
+
+ output[0] = (float) z[0];
+ output[1] = (float) z[1];
+ return 0;
+}
+
+/* used as intermediate result for single precision calculations */
+static int
+ffts_cexp_32f64f(size_t n, size_t d, double *output)
+{
+ const ffts_double_t *ct = (const ffts_double_t*) FFTS_ASSUME_ALIGNED_32(cos_sin_table);
+ const ffts_double_t *cc = (const ffts_double_t*) FFTS_ASSUME_ALIGNED_16(cos_coeff);
+ const ffts_double_t *sc = (const ffts_double_t*) FFTS_ASSUME_ALIGNED_16(sin_coeff);
+ double *out = FFTS_ASSUME_ALIGNED_16(output);
+ double c, s, cos_a, cos_b, sin_a, sin_b;
+ double cos_sign, sin_sign, sign, x, z;
+ int i, j, swap;
+
+ /* we know this */
+ FFTS_ASSUME(d > 0);
+ FFTS_ASSUME(n < d);
+
+ /* determinate octant */
+ if (n > d - n) {
+ sin_sign = -1.0;
+ n = d - n;
+ } else {
+ sin_sign = 1.0;
}
- /* the first */
- table[0][0] = 1.0f;
- table[0][1] = -0.0f;
+ n <<= 1;
+ if (n > d - n) {
+ cos_sign = -1.0;
+ swap = 1;
+ n += n - d;
+ } else {
+ cos_sign = 1.0;
+ swap = 0;
+ n <<= 1;
+ }
- if (FFTS_UNLIKELY(table_size == 1)) {
- goto exit;
+ if (n > d - n) {
+ swap ^= 1;
+ n = d - n;
}
- if (FFTS_UNLIKELY(table_size == 2)) {
- /* skip over */
- i = 1;
- goto mid_point;
+ /* "binary long division" */
+ for (i = 0, j = (1 << 5), n <<= 1; j && n; j >>= 1) {
+ if (n > d - n) {
+ n += n - d;
+ i += j;
+ } else {
+ n <<= 1;
+ }
+ }
+
+ /* decide between two table values */
+ if (n > d - n) {
+ i++;
+ n = d - n;
+ sign = -1.0;
+ } else {
+ sign = 1.0;
}
- /* polynomial approximations calculated using Sollya */
- x = 1.0 / table_size;
+ /* divide by 256 is exact (as is the multiply with its reciprocal) */
+ x = ((double) n / d) * M_1_256;
+
+ /* 0 <= x <= 1/512 */
z = x * x;
- /* alpha = 2 * sin(M_PI_4 / m) * sin(M_PI_4 / m) */
- alpha = x * (1.1107207345394952717884501203293686870741139540138 +
- z * (-0.114191397993514079911985272577099412137126013186879 +
- z * 3.52164670852685621720746817665316575239342815885835e-3));
- alpha = alpha * alpha;
+ /* table lookup */
+ cos_a = ct[4 * i + 0].d;
+ sin_a = ct[4 * i + 2].d;
- /* beta = sin(M_PI_2 / m) */
- beta = x * (1.57079632679489455959753740899031981825828552246094 +
- z * (-0.64596409735041482313988581154262647032737731933593 +
- z * 7.9690915468332887416913479228242067620158195495605e-2));
+ /* evaluate polynomials */
+ cos_b = 1.0 + z * (cc[0].d + z * (cc[1].d + z * cc[2].d));
+ sin_b = x * (sc[0].d + z * (sc[1].d + z * (sc[2].d + z * sc[3].d)));
- /* cos(0) = 1.0, sin(0) = 0.0 */
- c[0] = 1.0;
- s[0] = 0.0;
+ /* sum or difference of angles */
+ c = cos_a * cos_b - sign * sin_a * sin_b;
+ s = sin_a * cos_b + sign * cos_a * sin_b;
+
+ if (swap) {
+ double tmp = c;
+ c = s;
+ s = tmp;
+ }
+
+ out[0] = cos_sign * c;
+ out[1] = sin_sign * s;
+ return 0;
+}
+
+int
+ffts_generate_chirp_32f(ffts_cpx_32f *const table, size_t table_size)
+{
+ ffts_cpx_32f *lut;
+ size_t i, j, n;
- /* generate sine and cosine tables with maximum error less than 1 ULP */
- for (i = 1; i < (table_size + 1)/2; i++) {
- c[1] = c[0] - ((alpha * c[0]) + (beta * s[0]));
- s[1] = s[0] - ((alpha * s[0]) - (beta * c[0]));
+ if (!table || !table_size)
+ return -1;
+
+ n = 2 * table_size;
+ lut = ffts_aligned_malloc(n * sizeof(*lut));
+ if (!lut)
+ return -1;
- table[i + 0][0] = (float) c[1];
- table[i + 0][1] = (float) -s[1];
- table[table_size - i][0] = (float) s[1];
- table[table_size - i][1] = (float) -c[1];
+ /* initialize LUT */
+ ffts_generate_cosine_sine_32f(lut, n);
- c[0] = c[1];
- s[0] = s[1];
+ /* generate CZT sequence */
+ for (i = 0, j = 0; i < table_size; ++i) {
+ table[i][0] = lut[j][0];
+ table[i][1] = lut[j][1];
+
+ j += 2 * i + 1;
+ if (j >= n)
+ j -= n;
}
- if (FFTS_UNLIKELY(table_size & 1)) {
+ ffts_aligned_free(lut);
+ return 0;
+}
+
+/* generate cosine and sine tables with maximum error less than 1 ULP, average ~0.5 ULP
+* using repeated subvector scaling algorithm, 16 - 20 times faster than
+* direct library calling algorithm.
+*/
+int
+ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, size_t table_size)
+{
+ ffts_cpx_64f *const tmp = (ffts_cpx_64f *const) table;
+ double FFTS_ALIGN(16) z[2], zz[2], x[2], xx[2];
+ size_t i, j, k, len;
+
+ if (!table || !table_size)
+ return -1;
+
+ if (FFTS_UNLIKELY(table_size == 1))
goto exit;
- }
-mid_point:
- table[i][0] = 0.70710677f;
- table[i][1] = -0.70710677f;
+ /* check if table size is a power of two */
+ if (!(table_size & (table_size - 1)))
+ return ffts_generate_cosine_sine_pow2_32f(table, table_size);
+
+ if (!(table_size & 1)) {
+ /* even table size -- check if multiply of four */
+ if (!(table_size & 3)) {
+ /* multiply of four */
+ len = table_size >> 2;
+ for (j = 1; 4 * j <= len; j <<= 1) {
+ ffts_cexp_32f64f(j, table_size, z);
+
+ tmp[j][0] = z[0];
+ tmp[j][1] = z[1];
+
+ tmp[len - j][0] = z[1];
+ tmp[len - j][1] = z[0];
+
+ for (i = 1; i < j; i++) {
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ tmp[j + i][0] = zz[0];
+ tmp[j + i][1] = zz[1];
+
+ tmp[len - j - i][0] = zz[1];
+ tmp[len - j - i][1] = zz[0];
+ }
+ }
+
+ /* this loops zero or one times */
+ for (k = j << 1; k <= len; j <<= 1) {
+ ffts_cexp_32f64f(j, table_size, z);
+
+ tmp[j][0] = z[0];
+ tmp[j][1] = z[1];
+ if (k++ == len)
+ break;
+
+ tmp[len - j][0] = z[1];
+ tmp[len - j][1] = z[0];
+ if (k++ == len)
+ break;
+
+ for (i = 1; i < j; i++) {
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ tmp[j + i][0] = zz[0];
+ tmp[j + i][1] = zz[1];
+ if (k++ == len)
+ break;
+
+ tmp[len - j - i][0] = zz[1];
+ tmp[len - j - i][1] = zz[0];
+ if (k++ == len)
+ break;
+ }
+ }
+
+ /* convert doubles to floats */
+ for (i = 1; i < len; i++) {
+ table[i][0] = (float) tmp[i][0];
+ table[i][1] = (float) tmp[i][1];
+ }
+
+ table[len][0] = 0.0f;
+ table[len][1] = 1.0f;
+
+ for (i = 1; i <= len; i++) {
+ table[len + i][0] = -table[i][1];
+ table[len + i][1] = table[i][0];
+ }
+ } else {
+ /* multiply of two */
+ len = table_size >> 1;
+ for (j = 1; 4 * j <= len; j <<= 1) {
+ ffts_cexp_32f64f(j, table_size, z);
+
+ tmp[j][0] = z[0];
+ tmp[j][1] = z[1];
+
+ tmp[len - j][0] = -z[0];
+ tmp[len - j][1] = z[1];
+
+ for (i = 1; i < j; i++) {
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ tmp[j + i][0] = zz[0];
+ tmp[j + i][1] = zz[1];
+
+ tmp[len - j - i][0] = -zz[0];
+ tmp[len - j - i][1] = zz[1];
+ }
+ }
+
+ /* this loops zero or one times */
+ for (k = j << 1; k <= len; j <<= 1) {
+ ffts_cexp_32f64f(j, table_size, z);
+
+ tmp[j][0] = z[0];
+ tmp[j][1] = z[1];
+ if (k++ == len)
+ break;
+
+ tmp[len - j][0] = -z[0];
+ tmp[len - j][1] = z[1];
+ if (k++ == len)
+ break;
+
+ for (i = 1; i < j; i++) {
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ tmp[j + i][0] = zz[0];
+ tmp[j + i][1] = zz[1];
+ if (k++ == len)
+ break;
+
+ tmp[len - j - i][0] = -zz[0];
+ tmp[len - j - i][1] = zz[1];
+ if (k++ == len)
+ break;
+ }
+ }
+
+ /* convert doubles to floats */
+ for (i = 1; i < len; i++) {
+ table[i][0] = (float) tmp[i][0];
+ table[i][1] = (float) tmp[i][1];
+ }
+
+ table[len][0] = -1.0f;
+ table[len][1] = 0.0f;
+ }
+
+ /* duplicate lower half to higher */
+ len = table_size >> 1;
+ for (i = 1; i < len; i++) {
+ table[table_size - i][0] = table[i][0];
+ table[table_size - i][1] = -table[i][1];
+ }
+ } else {
+ /* odd table size */
+
+ /* to avoid using temporary tables, generate the first 1/8 of table in
+ * double precision on lower half (and using the symmetry store
+ * the last 1/8 of table in single precision on higher half)
+ */
+ for (j = 1; 8 * j < table_size; j <<= 1) {
+ ffts_cexp_32f64f(j, table_size, z);
+
+ /* store double precision to lower half */
+ tmp[j][0] = z[0];
+ tmp[j][1] = z[1];
+
+ /* store single precision to higher half */
+ table[table_size - j][0] = (float) z[0];
+ table[table_size - j][1] = (float) -z[1];
+
+ for (i = 1; i < j; i++) {
+ /* use double precision for intermediate calculations */
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ tmp[i + j][0] = zz[0];
+ tmp[i + j][1] = zz[1];
+
+ table[table_size - i - j][0] = (float) zz[0];
+ table[table_size - i - j][1] = (float) -zz[1];
+ }
+ }
+
+ /* now generate 1/2 of table in single precision on higher half */
+ k = j << 1;
+ ffts_cexp_32f64f(j, table_size, z);
+ ffts_cexp_32f64f(k, table_size, x);
+
+ /* store single precision to higher half */
+ table[table_size - j][0] = (float) z[0];
+ table[table_size - j][1] = (float) -z[1];
+
+ table[table_size - k][0] = (float) x[0];
+ table[table_size - k][1] = (float) -x[1];
+
+ i = 1;
+ len = ((table_size + 1) >> 1) - k;
+ if (len > j) {
+ len -= j;
+
+ xx[0] = x[0] * z[0] - x[1] * z[1];
+ xx[1] = x[1] * z[0] + x[0] * z[1];
+
+ table[table_size - k - j][0] = (float) xx[0];
+ table[table_size - k - j][1] = (float) -xx[1];
+
+ for (; i < len; i++) {
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ table[table_size - i - j][0] = (float) zz[0];
+ table[table_size - i - j][1] = (float) -zz[1];
+
+ xx[0] = x[0] * tmp[i][0] - x[1] * tmp[i][1];
+ xx[1] = x[1] * tmp[i][0] + x[0] * tmp[i][1];
+
+ table[table_size - i - k][0] = (float) xx[0];
+ table[table_size - i - k][1] = (float) -xx[1];
+
+ xx[0] = x[0] * zz[0] - x[1] * zz[1];
+ xx[1] = x[1] * zz[0] + x[0] * zz[1];
+
+ table[table_size - i - k - j][0] = (float) xx[0];
+ table[table_size - i - k - j][1] = (float) -xx[1];
+ }
+
+ len = j;
+ }
+
+ for (; i < len; i++) {
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ table[table_size - i - j][0] = (float) zz[0];
+ table[table_size - i - j][1] = (float) -zz[1];
+
+ xx[0] = x[0] * tmp[i][0] - x[1] * tmp[i][1];
+ xx[1] = x[1] * tmp[i][0] + x[0] * tmp[i][1];
+
+ table[table_size - i - k][0] = (float) xx[0];
+ table[table_size - i - k][1] = (float) -xx[1];
+ }
+
+ for (; i < j; i++) {
+ zz[0] = z[0] * tmp[i][0] - z[1] * tmp[i][1];
+ zz[1] = z[1] * tmp[i][0] + z[0] * tmp[i][1];
+
+ table[table_size - i - j][0] = (float) zz[0];
+ table[table_size - i - j][1] = (float) -zz[1];
+ }
+
+ /* duplicate higher half to lower */
+ len = table_size >> 1;
+ for (i = 1; i <= len; i++) {
+ table[i][0] = table[table_size - i][0];
+ table[i][1] = -table[table_size - i][1];
+ }
+ }
exit:
+ /* cos(0) = 1.0, sin(0) = 0.0 */
+ table[0][0] = 1.0f;
+ table[0][1] = 0.0f;
return 0;
}
/* Oscar Buneman's method for generating a sequence of sines and cosines.
* Expired US Patent 4,878,187 A
-*
-* D. Potts, G. Steidl, M. Tasche, Numerical stability of fast
-* trigonometric transforms — a worst case study,
-* J. Concrete Appl. Math. 1 (2003) 1–36
-*
-* O. Buneman, Stable on–line creation of sines and cosines of
-* successive angles, Proc. IEEE 75, 1434 – 1435 (1987).
*/
#if HAVE_SSE2
int
@@ -227,10 +741,11 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
{
static const __m128d sign_swap = { 0.0, -0.0 };
const __m128d *FFTS_RESTRICT ct;
- const double *FFTS_RESTRICT hs;
+ const ffts_double_t *FFTS_RESTRICT cst;
+ const ffts_double_t *FFTS_RESTRICT hs;
__m128d FFTS_ALIGN(16) w[32];
__m128d FFTS_ALIGN(16) h[32];
- int i, log_2, offset;
+ int i, log_2, offset, step;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
@@ -251,21 +766,42 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
goto mid_point;
}
+ cst = (const ffts_double_t*)
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_table);
+
+ /* generate small tables from lookup table */
+ if (table_size <= 128) {
+ step = 128 / table_size;
+
+ for (i = 1; i < table_size/2; i++) {
+ float cosine = (float) cst[4 * i * step + 0].d;
+ float sine = (float) cst[4 * i * step + 1].d;
+
+ table[i + 0][0] = cosine;
+ table[i + 0][1] = -sine;
+ table[table_size - i][0] = sine;
+ table[table_size - i][1] = -cosine;
+ }
+
+ goto mid_point;
+ }
+
/* calculate table offset */
- FFTS_ASSUME(table_size/2 > 1);
+ FFTS_ASSUME(table_size/2 > 64);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
+ step = log_2 - 8;
ct = (const __m128d*)
- FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
- hs = (const double*) &half_secant[4 * offset];
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
+ hs = FFTS_ASSUME_ALIGNED_16(&half_secant[2 * offset]);
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i] = ct[2*i];
/* duplicate the high part */
- h[i] = _mm_set1_pd(hs[2*i]);
+ h[i] = _mm_set1_pd(hs[2*i].d);
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
@@ -279,9 +815,20 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
_mm_storel_pi((__m64*) &table[table_size - i], _mm_cvtpd_ps(
_mm_or_pd(_mm_shuffle_pd(w[log_2], w[log_2], 1), sign_swap)));
- /* skip and find next trailing zero */
- offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2] = _mm_mul_pd(h[log_2], _mm_add_pd(w[log_2 + 1], w[offset]));
+ /* use lookup table when possible */
+ if (log_2 > step) {
+ offset = ((2 * i) >> step) + (4 << (log_2 - step));
+ if (offset >= COS_SIN_TABLE_SIZE) {
+ offset = COS_SIN_TABLE_SIZE - (2 << (log_2 - step)) - 4;
+ w[log_2] = _mm_loadr_pd(&cst[offset].d);
+ } else {
+ w[log_2] = _mm_load_pd(&cst[offset].d);
+ }
+ } else {
+ /* skip and find next trailing zero */
+ offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
+ w[log_2] = _mm_mul_pd(h[log_2], _mm_add_pd(w[log_2 + 1], w[offset]));
+ }
}
mid_point:
@@ -297,11 +844,12 @@ ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
{
static const __m128d sign_swap = { 0.0, -0.0 };
const struct ffts_dd2_t *FFTS_RESTRICT ct;
- const double *FFTS_RESTRICT hs;
+ const ffts_double_t *FFTS_RESTRICT cst;
+ const ffts_double_t *FFTS_RESTRICT hs;
struct ffts_dd2_t FFTS_ALIGN(16) w[32];
struct ffts_dd2_t FFTS_ALIGN(16) h[32];
struct ffts_dd2_t FFTS_ALIGN(16) sum;
- int i, log_2, offset;
+ int i, log_2, offset, step;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
@@ -322,22 +870,43 @@ ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
goto mid_point;
}
+ cst = (const ffts_double_t*)
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_table);
+
+ /* generate small tables from lookup table */
+ if (table_size <= 128) {
+ step = 128 / table_size;
+
+ for (i = 1; i < table_size/2; i++) {
+ double cosine = cst[4 * i * step + 0].d;
+ double sine = cst[4 * i * step + 1].d;
+
+ table[i + 0][0] = cosine;
+ table[i + 0][1] = -sine;
+ table[table_size - i][0] = sine;
+ table[table_size - i][1] = -cosine;
+ }
+
+ goto mid_point;
+ }
+
/* calculate table offset */
- FFTS_ASSUME(table_size/2 > 1);
+ FFTS_ASSUME(table_size/2 > 64);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
+ step = log_2 - 8;
ct = (const struct ffts_dd2_t*)
- FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
- hs = (const double*) &half_secant[4 * offset];
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
+ hs = FFTS_ASSUME_ALIGNED_16(&half_secant[2 * offset]);
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i] = ct[i];
/* duplicate the high and low parts */
- h[i].hi = _mm_set1_pd(hs[2*i + 0]);
- h[i].lo = _mm_set1_pd(hs[2*i + 1]);
+ h[i].hi = _mm_set1_pd(hs[2*i + 0].d);
+ h[i].lo = _mm_set1_pd(hs[2*i + 1].d);
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
@@ -351,10 +920,23 @@ ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
_mm_store_pd((double*) &table[table_size - i],
_mm_or_pd(_mm_shuffle_pd(w[log_2].hi, w[log_2].hi, 1), sign_swap));
- /* skip and find next trailing zero */
- offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- sum = ffts_dd2_add_dd2_unnormalized(&w[log_2 + 1], &w[offset]);
- w[log_2] = ffts_dd2_mul_dd2(&h[log_2], &sum);
+ /* use lookup table when possible */
+ if (log_2 > step) {
+ offset = ((2 * i) >> step) + (4 << (log_2 - step));
+ if (offset >= COS_SIN_TABLE_SIZE) {
+ offset = COS_SIN_TABLE_SIZE - (2 << (log_2 - step)) - 4;
+ w[log_2].hi = _mm_loadr_pd(&cst[offset + 0].d);
+ w[log_2].lo = _mm_loadr_pd(&cst[offset + 2].d);
+ } else {
+ w[log_2].hi = _mm_load_pd(&cst[offset + 0].d);
+ w[log_2].lo = _mm_load_pd(&cst[offset + 2].d);
+ }
+ } else {
+ /* skip and find next trailing zero */
+ offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
+ sum = ffts_dd2_add_dd2_unnormalized(&w[log_2 + 1], &w[offset]);
+ w[log_2] = ffts_dd2_mul_dd2(&h[log_2], &sum);
+ }
}
mid_point:
@@ -369,9 +951,10 @@ int
ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
{
const ffts_cpx_64f *FFTS_RESTRICT ct;
- const double *FFTS_RESTRICT hs;
+ const ffts_double_t *FFTS_RESTRICT cst;
+ const ffts_double_t *FFTS_RESTRICT hs;
ffts_cpx_64f FFTS_ALIGN(16) w[32];
- int i, log_2, offset;
+ int i, log_2, offset, step;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
@@ -392,14 +975,35 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
goto mid_point;
}
+ cst = (const ffts_double_t*)
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_table);
+
+ /* generate small tables from lookup table */
+ if (table_size <= 128) {
+ step = 128 / table_size;
+
+ for (i = 1; i < table_size/2; i++) {
+ float cosine = (float) cst[4 * i * step + 0].d;
+ float sine = (float) cst[4 * i * step + 1].d;
+
+ table[i + 0][0] = cosine;
+ table[i + 0][1] = -sine;
+ table[table_size - i][0] = sine;
+ table[table_size - i][1] = -cosine;
+ }
+
+ goto mid_point;
+ }
+
/* calculate table offset */
- FFTS_ASSUME(table_size/2 > 1);
+ FFTS_ASSUME(table_size/2 > 64);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
+ step = log_2 - 8;
ct = (const ffts_cpx_64f*)
- FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
- hs = (const double*) &half_secant[4 * offset];
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
+ hs = FFTS_ASSUME_ALIGNED_16(&half_secant[2 * offset]);
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
@@ -417,10 +1021,23 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
table[table_size - i][0] = (float) w[log_2][1];
table[table_size - i][1] = (float) -w[log_2][0];
- /* skip and find next trailing zero */
- offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
- w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ /* use lookup table when possible */
+ if (log_2 > step) {
+ offset = ((2 * i) >> step) + (4 << (log_2 - step));
+ if (offset >= 260) {
+ offset = 260 - (2 << (log_2 - step)) - 4;
+ w[log_2][0] = cst[offset + 0].d;
+ w[log_2][1] = cst[offset + 1].d;
+ } else {
+ w[log_2][0] = cst[offset + 0].d;
+ w[log_2][1] = cst[offset + 1].d;
+ }
+ } else {
+ /* skip and find next trailing zero */
+ offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
+ w[log_2][0] = hs[2 * log_2].d * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[2 * log_2].d * (w[log_2 + 1][1] + w[offset][1]);
+ }
}
mid_point:
@@ -435,9 +1052,10 @@ int
ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
{
const struct ffts_dd_t *FFTS_RESTRICT ct;
+ const ffts_double_t *FFTS_RESTRICT cst;
const struct ffts_dd_t *FFTS_RESTRICT hs;
struct ffts_dd_t FFTS_ALIGN(16) w[32][2];
- int i, log_2, offset;
+ int i, log_2, offset, step;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
@@ -458,14 +1076,35 @@ ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
goto mid_point;
}
+ cst = (const ffts_double_t*)
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_table);
+
+ /* generate small tables from lookup table */
+ if (table_size <= 128) {
+ step = 128 / table_size;
+
+ for (i = 1; i < table_size/2; i++) {
+ double cosine = cst[4 * i * step + 0].d;
+ double sine = cst[4 * i * step + 1].d;
+
+ table[i + 0][0] = cosine;
+ table[i + 0][1] = -sine;
+ table[table_size - i][0] = sine;
+ table[table_size - i][1] = -cosine;
+ }
+
+ goto mid_point;
+ }
+
/* calculate table offset */
- FFTS_ASSUME(table_size/2 > 1);
+ FFTS_ASSUME(table_size/2 > 64);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
+ step = log_2 - 8;
ct = (const struct ffts_dd_t*)
- FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
- hs = (const struct ffts_dd_t*) &half_secant[4 * offset];
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
+ hs = (const struct ffts_dd_t*) &half_secant[2 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
@@ -486,12 +1125,29 @@ ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
table[table_size - i][0] = w[log_2][1].hi;
table[table_size - i][1] = -w[log_2][0].hi;
- /* skip and find next trailing zero */
- offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2][0] = ffts_dd_mul_dd(hs[log_2],
- ffts_dd_add_dd_unnormalized(w[log_2 + 1][0], w[offset][0]));
- w[log_2][1] = ffts_dd_mul_dd(hs[log_2],
- ffts_dd_add_dd_unnormalized(w[log_2 + 1][1], w[offset][1]));
+ /* use lookup table when possible */
+ if (log_2 > step) {
+ offset = ((2 * i) >> step) + (4 << (log_2 - step));
+ if (offset >= 260) {
+ offset = 260 - (2 << (log_2 - step)) - 4;
+ w[log_2][0].hi = cst[offset + 1].d;
+ w[log_2][1].hi = cst[offset + 0].d;
+ w[log_2][0].lo = cst[offset + 3].d;
+ w[log_2][1].lo = cst[offset + 2].d;
+ } else {
+ w[log_2][0].hi = cst[offset + 0].d;
+ w[log_2][1].hi = cst[offset + 1].d;
+ w[log_2][0].lo = cst[offset + 2].d;
+ w[log_2][1].lo = cst[offset + 3].d;
+ }
+ } else {
+ /* skip and find next trailing zero */
+ offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
+ w[log_2][0] = ffts_dd_mul_dd(hs[log_2],
+ ffts_dd_add_dd_unnormalized(w[log_2 + 1][0], w[offset][0]));
+ w[log_2][1] = ffts_dd_mul_dd(hs[log_2],
+ ffts_dd_add_dd_unnormalized(w[log_2 + 1][1], w[offset][1]));
+ }
}
mid_point:
@@ -509,7 +1165,7 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
int invert)
{
const ffts_cpx_64f *FFTS_RESTRICT ct;
- const double *FFTS_RESTRICT hs;
+ const ffts_double_t *FFTS_RESTRICT hs;
ffts_cpx_64f FFTS_ALIGN(16) w[32];
int i, log_2, offset, N;
float *A, *B;
@@ -547,8 +1203,8 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
FFTS_ASSUME(log_2 > 2);
offset = 34 - log_2;
ct = (const ffts_cpx_64f*)
- FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
- hs = (const double*) &half_secant[4 * offset];
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
+ hs = FFTS_ASSUME_ALIGNED_16(&half_secant[2 * offset]);
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
@@ -556,7 +1212,6 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
w[i][1] = ct[2*i][1];
}
- /* generate sine and cosine tables with maximum error less than 0.5 ULP */
if (sign < 0) {
for (i = 1; i < N/4; i++) {
float t0, t1, t2;
@@ -580,8 +1235,8 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
- w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ w[log_2][0] = hs[2 * log_2].d * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[2 * log_2].d * (w[log_2 + 1][1] + w[offset][1]);
}
} else {
for (i = 1; i < N/4; i++) {
@@ -606,8 +1261,8 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
- w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ w[log_2][0] = hs[2 * log_2].d * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[2 * log_2].d * (w[log_2 + 1][1] + w[offset][1]);
}
}
@@ -625,4 +1280,4 @@ last:
}
return 0;
-} \ No newline at end of file
+}