diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2024-08-24 13:04:45 -0500 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2024-08-24 13:51:05 -0500 |
commit | 2ef6dba8728db2437def9a4fc1d3e20e0aa44c31 (patch) | |
tree | 5211805789c78639d6b96a89bd0a4a96078d0fd9 /lib/ffts/src/ffts_internal.h | |
parent | c40a208abbc778da4271485eba06a89d05c69b5e (diff) | |
download | ulab-2ef6dba8728db2437def9a4fc1d3e20e0aa44c31.tar.gz ulab-2ef6dba8728db2437def9a4fc1d3e20e0aa44c31.zip |
Revup FFTS to latest upstream version
Taken from https://github.com/linkotec/ffts
Fixes ppc64el support and a handful of other bugs
Diffstat (limited to 'lib/ffts/src/ffts_internal.h')
-rw-r--r-- | lib/ffts/src/ffts_internal.h | 123 |
1 files changed, 109 insertions, 14 deletions
diff --git a/lib/ffts/src/ffts_internal.h b/lib/ffts/src/ffts_internal.h index 157c283..04ebb9c 100644 --- a/lib/ffts/src/ffts_internal.h +++ b/lib/ffts/src/ffts_internal.h @@ -2,6 +2,7 @@ This file is part of FFTS -- The Fastest Fourier Transform in the South +Copyright (c) 2015-2016, Jukka Ojanen <jukka.ojanen@kolumbus.fi> Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com> Copyright (c) 2012, The University of Waikato @@ -34,7 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef FFTS_INTERNAL_H #define FFTS_INTERNAL_H +#ifdef AUTOTOOLS_BUILD #include "config.h" +#endif + #include "ffts_attributes.h" #include "types.h" @@ -42,18 +46,59 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <malloc.h> #endif +#ifdef HAVE_MM_ALLOC_H +#include <mm_malloc.h> +#ifndef HAVE__MM_MALLOC +#define HAVE__MM_MALLOC +#endif +#endif + #include <stddef.h> -#ifdef HAVE_STDINT_H +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#elif HAVE_STDINT_H #include <stdint.h> +#elif _MSC_VER +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#else +typedef signed long int int32_t; +typedef unsigned long int uint32_t; +typedef signed long long int int64_t; +typedef unsigned long long int uint64_t; #endif #ifdef HAVE_STDLIB_H #include <stdlib.h> #endif +#ifdef HAVE_STRING_H +#include <string.h> +#endif + #include <stdio.h> +#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN +extern void *memalign(size_t, size_t); +#endif + +#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN +extern int posix_memalign(void **, size_t, size_t); +#endif + +#if defined(HAVE_DECL_VALLOC) && !HAVE_DECL_VALLOC +extern void *valloc(size_t); +#endif + +#ifdef _mm_malloc +#ifndef HAVE__MM_MALLOC +#define HAVE__MM_MALLOC +#endif +#endif + #ifdef ENABLE_LOG #ifdef __ANDROID__ #include <android/log.h> @@ -142,11 +187,9 @@ struct _ffts_plan_t { */ size_t transform_size; - /** - * Points to the cosnant variables used by - * the Assembly Code - */ - void *constants; + /* pointer to the constant variable used by SSE for sign change */ + /* TODO: #ifdef HAVE_SSE */ + const void *constants; // multi-dimensional stuff: struct _ffts_plan_t **plans; @@ -171,44 +214,96 @@ struct _ffts_plan_t { size_t i2; }; -static FFTS_INLINE void *ffts_aligned_malloc(size_t size) +static FFTS_INLINE void* +ffts_aligned_malloc(size_t size) { -#if defined(_WIN32) - return _aligned_malloc(size, 32); + void *p = NULL; + + /* various ways to allocate aligned memory in order of preferance */ +#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) + p = (void*) _mm_malloc(size, 32); +#elif defined(HAVE_POSIX_MEMALIGN) + if (posix_memalign(&p, 32, size)) + p = NULL; +#elif defined(HAVE_MEMALIGN) + p = memalign(32, size); +#elif defined(__ALTIVEC__) + p = vec_malloc(size); +#elif defined(_MSC_VER) || defined(WIN32) + p = _aligned_malloc(size, 32); +#elif defined(HAVE_VALLOC) + p = valloc(size); #else - return valloc(size); + p = malloc(size); #endif + + return p; } -static FFTS_INLINE void ffts_aligned_free(void *p) +static FFTS_INLINE +void ffts_aligned_free(void *p) { -#if defined(_WIN32) + /* order must match with ffts_aligned_malloc */ +#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC) + _mm_free(p); +#elif defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) + free(p); +#elif defined(__ALTIVEC__) + vec_free(p); +#elif defined(_MSC_VER) || defined(WIN32) _aligned_free(p); #else + /* valloc or malloc */ free(p); #endif } #if GCC_VERSION_AT_LEAST(3,3) #define ffts_ctzl __builtin_ctzl + +static FFTS_INLINE size_t +ffts_next_power_of_2(size_t N) +{ + return 1 << (32 - __builtin_clzl(N)); +} #elif defined(_MSC_VER) #include <intrin.h> #ifdef _M_X64 #pragma intrinsic(_BitScanForward64) -static __inline unsigned long ffts_ctzl(size_t N) +static FFTS_INLINE unsigned long +ffts_ctzl(size_t N) { unsigned long count; _BitScanForward64((unsigned long*) &count, N); return count; } + +#pragma intrinsic(_BitScanReverse64) +static FFTS_INLINE size_t +ffts_next_power_of_2(size_t N) +{ + unsigned long log_2; + _BitScanReverse64((unsigned long*)&log_2, N); + return 1ULL << (log_2 + 1); +} #else #pragma intrinsic(_BitScanForward) -static __inline unsigned long ffts_ctzl(size_t N) +static FFTS_INLINE unsigned long +ffts_ctzl(size_t N) { unsigned long count; _BitScanForward((unsigned long*) &count, N); return count; } + +#pragma intrinsic(_BitScanReverse) +static FFTS_INLINE size_t +ffts_next_power_of_2(size_t N) +{ + unsigned long log_2; + _BitScanReverse((unsigned long*)&log_2, N); + return 1 << (log_2 + 1); +} #endif /* _WIN64 */ #endif /* _MSC_VER */ |