summaryrefslogtreecommitdiffstats
path: root/lib/ffts/src/ffts_internal.h
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineering.com>2024-08-24 13:04:45 -0500
committerTimothy Pearson <tpearson@raptorengineering.com>2024-08-24 13:51:05 -0500
commit2ef6dba8728db2437def9a4fc1d3e20e0aa44c31 (patch)
tree5211805789c78639d6b96a89bd0a4a96078d0fd9 /lib/ffts/src/ffts_internal.h
parentc40a208abbc778da4271485eba06a89d05c69b5e (diff)
downloadulab-2ef6dba8728db2437def9a4fc1d3e20e0aa44c31.tar.gz
ulab-2ef6dba8728db2437def9a4fc1d3e20e0aa44c31.zip
Revup FFTS to latest upstream version
Taken from https://github.com/linkotec/ffts Fixes ppc64el support and a handful of other bugs
Diffstat (limited to 'lib/ffts/src/ffts_internal.h')
-rw-r--r--lib/ffts/src/ffts_internal.h123
1 files changed, 109 insertions, 14 deletions
diff --git a/lib/ffts/src/ffts_internal.h b/lib/ffts/src/ffts_internal.h
index 157c283..04ebb9c 100644
--- a/lib/ffts/src/ffts_internal.h
+++ b/lib/ffts/src/ffts_internal.h
@@ -2,6 +2,7 @@
This file is part of FFTS -- The Fastest Fourier Transform in the South
+Copyright (c) 2015-2016, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
@@ -34,7 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef FFTS_INTERNAL_H
#define FFTS_INTERNAL_H
+#ifdef AUTOTOOLS_BUILD
#include "config.h"
+#endif
+
#include "ffts_attributes.h"
#include "types.h"
@@ -42,18 +46,59 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <malloc.h>
#endif
+#ifdef HAVE_MM_ALLOC_H
+#include <mm_malloc.h>
+#ifndef HAVE__MM_MALLOC
+#define HAVE__MM_MALLOC
+#endif
+#endif
+
#include <stddef.h>
-#ifdef HAVE_STDINT_H
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#elif HAVE_STDINT_H
#include <stdint.h>
+#elif _MSC_VER
+typedef __int32 int32_t;
+typedef __int64 int64_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+typedef signed long int int32_t;
+typedef unsigned long int uint32_t;
+typedef signed long long int int64_t;
+typedef unsigned long long int uint64_t;
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
#include <stdio.h>
+#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN
+extern void *memalign(size_t, size_t);
+#endif
+
+#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN
+extern int posix_memalign(void **, size_t, size_t);
+#endif
+
+#if defined(HAVE_DECL_VALLOC) && !HAVE_DECL_VALLOC
+extern void *valloc(size_t);
+#endif
+
+#ifdef _mm_malloc
+#ifndef HAVE__MM_MALLOC
+#define HAVE__MM_MALLOC
+#endif
+#endif
+
#ifdef ENABLE_LOG
#ifdef __ANDROID__
#include <android/log.h>
@@ -142,11 +187,9 @@ struct _ffts_plan_t {
*/
size_t transform_size;
- /**
- * Points to the cosnant variables used by
- * the Assembly Code
- */
- void *constants;
+ /* pointer to the constant variable used by SSE for sign change */
+ /* TODO: #ifdef HAVE_SSE */
+ const void *constants;
// multi-dimensional stuff:
struct _ffts_plan_t **plans;
@@ -171,44 +214,96 @@ struct _ffts_plan_t {
size_t i2;
};
-static FFTS_INLINE void *ffts_aligned_malloc(size_t size)
+static FFTS_INLINE void*
+ffts_aligned_malloc(size_t size)
{
-#if defined(_WIN32)
- return _aligned_malloc(size, 32);
+ void *p = NULL;
+
+ /* various ways to allocate aligned memory in order of preferance */
+#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
+ p = (void*) _mm_malloc(size, 32);
+#elif defined(HAVE_POSIX_MEMALIGN)
+ if (posix_memalign(&p, 32, size))
+ p = NULL;
+#elif defined(HAVE_MEMALIGN)
+ p = memalign(32, size);
+#elif defined(__ALTIVEC__)
+ p = vec_malloc(size);
+#elif defined(_MSC_VER) || defined(WIN32)
+ p = _aligned_malloc(size, 32);
+#elif defined(HAVE_VALLOC)
+ p = valloc(size);
#else
- return valloc(size);
+ p = malloc(size);
#endif
+
+ return p;
}
-static FFTS_INLINE void ffts_aligned_free(void *p)
+static FFTS_INLINE
+void ffts_aligned_free(void *p)
{
-#if defined(_WIN32)
+ /* order must match with ffts_aligned_malloc */
+#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
+ _mm_free(p);
+#elif defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)
+ free(p);
+#elif defined(__ALTIVEC__)
+ vec_free(p);
+#elif defined(_MSC_VER) || defined(WIN32)
_aligned_free(p);
#else
+ /* valloc or malloc */
free(p);
#endif
}
#if GCC_VERSION_AT_LEAST(3,3)
#define ffts_ctzl __builtin_ctzl
+
+static FFTS_INLINE size_t
+ffts_next_power_of_2(size_t N)
+{
+ return 1 << (32 - __builtin_clzl(N));
+}
#elif defined(_MSC_VER)
#include <intrin.h>
#ifdef _M_X64
#pragma intrinsic(_BitScanForward64)
-static __inline unsigned long ffts_ctzl(size_t N)
+static FFTS_INLINE unsigned long
+ffts_ctzl(size_t N)
{
unsigned long count;
_BitScanForward64((unsigned long*) &count, N);
return count;
}
+
+#pragma intrinsic(_BitScanReverse64)
+static FFTS_INLINE size_t
+ffts_next_power_of_2(size_t N)
+{
+ unsigned long log_2;
+ _BitScanReverse64((unsigned long*)&log_2, N);
+ return 1ULL << (log_2 + 1);
+}
#else
#pragma intrinsic(_BitScanForward)
-static __inline unsigned long ffts_ctzl(size_t N)
+static FFTS_INLINE unsigned long
+ffts_ctzl(size_t N)
{
unsigned long count;
_BitScanForward((unsigned long*) &count, N);
return count;
}
+
+#pragma intrinsic(_BitScanReverse)
+static FFTS_INLINE size_t
+ffts_next_power_of_2(size_t N)
+{
+ unsigned long log_2;
+ _BitScanReverse((unsigned long*)&log_2, N);
+ return 1 << (log_2 + 1);
+}
#endif /* _WIN64 */
#endif /* _MSC_VER */