diff options
| author | Michele Calgaro <michele.calgaro@yahoo.it> | 2020-09-11 14:38:47 +0900 |
|---|---|---|
| committer | Michele Calgaro <michele.calgaro@yahoo.it> | 2020-09-11 14:38:47 +0900 |
| commit | 884c8093d63402a1ad0b502244b791e3c6782be3 (patch) | |
| tree | a600d4ab0d431a2bdfe4c15b70df43c14fbd8dd0 /debian/transcode/transcode-1.1.7/aclib/rescale.c | |
| parent | 14e1aa2006796f147f3f4811fb908a6b01e79253 (diff) | |
| download | extra-dependencies-884c8093d63402a1ad0b502244b791e3c6782be3.tar.gz extra-dependencies-884c8093d63402a1ad0b502244b791e3c6782be3.zip | |
Added debian extra dependency packages.
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'debian/transcode/transcode-1.1.7/aclib/rescale.c')
| -rw-r--r-- | debian/transcode/transcode-1.1.7/aclib/rescale.c | 280 |
1 files changed, 280 insertions, 0 deletions
diff --git a/debian/transcode/transcode-1.1.7/aclib/rescale.c b/debian/transcode/transcode-1.1.7/aclib/rescale.c new file mode 100644 index 00000000..5a619735 --- /dev/null +++ b/debian/transcode/transcode-1.1.7/aclib/rescale.c @@ -0,0 +1,280 @@ +/* + * rescale.c -- take the weighted average of two sets of byte data + * Written by Andrew Church <achurch@achurch.org> + * + * This file is part of transcode, a video stream processing tool. + * transcode is free software, distributable under the terms of the GNU + * General Public License (version 2 or later). See the file COPYING + * for details. + */ + +#include "ac.h" +#include "ac_internal.h" + +static void rescale(const uint8_t *, const uint8_t *, uint8_t *, int, + uint32_t, uint32_t); +static void (*rescale_ptr)(const uint8_t *, const uint8_t *, uint8_t *, int, + uint32_t, uint32_t) = rescale; + +/*************************************************************************/ + +/* External interface */ + +void ac_rescale(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int bytes, uint32_t weight1, uint32_t weight2) +{ + if (weight1 >= 0x10000) + ac_memcpy(dest, src1, bytes); + else if (weight2 >= 0x10000) + ac_memcpy(dest, src2, bytes); + else + (*rescale_ptr)(src1, src2, dest, bytes, weight1, weight2); +} + +/*************************************************************************/ +/*************************************************************************/ + +/* Vanilla C version */ + +static void rescale(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int bytes, + uint32_t weight1, uint32_t weight2) +{ + int i; + for (i = 0; i < bytes; i++) + dest[i] = (src1[i]*weight1 + src2[i]*weight2 + 32768) >> 16; +} + +/*************************************************************************/ + +/* MMX version */ + +#if defined(HAVE_ASM_MMX) && defined(ARCH_X86) /* i.e. not x86_64 */ + +static void rescale_mmx(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int bytes, + uint32_t weight1, uint32_t weight2) +{ + if (bytes >= 8) { + /* First store weights in MM4/MM5 to relieve register pressure; + * save time by making 2 copies ahead of time in the general + * registers. Note that we divide by 2 for MMX due to the lack + * of an unsigned SIMD multiply instruction (PMULHUW). */ + int half1 = weight1 / 2; + int half2 = weight2 / 2; + half2 += weight1 & weight2 & 1; // pick up the lost bit here + asm("movd %%eax, %%mm4; movd %%edx, %%mm5" + : : "a" (half1<<16|half1), "d" (half2<<16|half2)); + asm("\ + movq %%mm4, %%mm6 # MM6: 00 00 W1 W1 \n\ + psllq $32, %%mm4 # MM4: W1 W1 00 00 \n\ + por %%mm6, %%mm4 # MM4: W1 W1 W1 W1 \n\ + movq %%mm5, %%mm7 # MM7: 00 00 W2 W2 \n\ + psllq $32, %%mm5 # MM5: W2 W2 00 00 \n\ + por %%mm7, %%mm5 # MM5: W2 W2 W2 W2 \n\ + pxor %%mm7, %%mm7 # MM7: 00 00 00 00 \n\ + pxor %%mm6, %%mm6 # Put 0x0020*4 in MM6 (rounding)\n\ + pcmpeqw %%mm3, %%mm3 \n\ + psubw %%mm3, %%mm6 \n\ + psllw $5, %%mm6 \n\ + 0: \n\ + movq -8(%%esi,%%ecx), %%mm0 \n\ + movq %%mm0, %%mm1 \n\ + punpcklbw %%mm7, %%mm0 \n\ + psllw $7, %%mm0 # 9.7 fixed point \n\ + pmulhw %%mm4, %%mm0 # Multiply to get 10.6 fixed \n\ + punpckhbw %%mm7, %%mm1 \n\ + psllw $7, %%mm1 \n\ + pmulhw %%mm4, %%mm1 \n\ + movq -8(%%edx,%%ecx), %%mm2 \n\ + movq %%mm2, %%mm3 \n\ + punpcklbw %%mm7, %%mm2 \n\ + psllw $7, %%mm2 \n\ + pmulhw %%mm5, %%mm2 \n\ + punpckhbw %%mm7, %%mm3 \n\ + psllw $7, %%mm3 \n\ + pmulhw %%mm5, %%mm3 \n\ + paddw %%mm2, %%mm0 \n\ + paddw %%mm6, %%mm0 \n\ + psrlw $6, %%mm0 \n\ + paddw %%mm3, %%mm1 \n\ + paddw %%mm6, %%mm1 \n\ + psrlw $6, %%mm1 \n\ + packuswb %%mm1, %%mm0 \n\ + movq %%mm0, -8(%%edi,%%ecx) \n\ + subl $8, %%ecx \n\ + jnz 0b \n\ + emms" + : /* no outputs */ + : "S" (src1), "d" (src2), "D" (dest), "c" (bytes & ~7)); + } + if (UNLIKELY(bytes & 7)) { + rescale(src1+(bytes & ~7), src2+(bytes & ~7), dest+(bytes & ~7), + bytes & 7, weight1, weight2); + } +} + +#endif /* HAVE_ASM_MMX && ARCH_X86 */ + +/*************************************************************************/ + +/* MMXEXT version (also for SSE) */ + +#if (defined(HAVE_ASM_MMXEXT) || defined(HAVE_ASM_SSE)) && defined(ARCH_X86) + +static void rescale_mmxext(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int bytes, + uint32_t weight1, uint32_t weight2) +{ + if (bytes >= 8) { + asm("movd %%eax, %%mm4; movd %%edx, %%mm5" + : : "a" (weight1), "d" (weight2)); + asm("\ + pshufw $0, %%mm4, %%mm4 # MM4: W1 W1 W1 W1 \n\ + pshufw $0, %%mm5, %%mm5 # MM5: W2 W2 W2 W2 \n\ + pxor %%mm6, %%mm6 # Put 0x0080*4 in MM6 (rounding)\n\ + pcmpeqw %%mm7, %%mm7 \n\ + psubw %%mm7, %%mm6 \n\ + psllw $7, %%mm6 \n\ + 0: \n\ + movq -8(%%esi,%%ecx), %%mm7 \n\ + pxor %%mm0, %%mm0 # Load data into high bytes \n\ + punpcklbw %%mm7, %%mm0 # (gives 8.8 fixed point) \n\ + pmulhuw %%mm4, %%mm0 # Result: 0000..FF00 \n\ + pxor %%mm1, %%mm1 \n\ + punpckhbw %%mm7, %%mm1 \n\ + pmulhuw %%mm4, %%mm1 \n\ + movq -8(%%edx,%%ecx), %%mm7 \n\ + pxor %%mm2, %%mm2 \n\ + punpcklbw %%mm7, %%mm2 \n\ + pmulhuw %%mm5, %%mm2 \n\ + pxor %%mm3, %%mm3 \n\ + punpckhbw %%mm7, %%mm3 \n\ + pmulhuw %%mm5, %%mm3 \n\ + paddw %%mm2, %%mm0 \n\ + paddw %%mm6, %%mm0 \n\ + psrlw $8, %%mm0 # Shift back down to 00..FF \n\ + paddw %%mm3, %%mm1 \n\ + paddw %%mm6, %%mm1 \n\ + psrlw $8, %%mm1 \n\ + packuswb %%mm1, %%mm0 \n\ + movq %%mm0, -8(%%edi,%%ecx) \n\ + subl $8, %%ecx \n\ + jnz 0b \n\ + emms" + : /* no outputs */ + : "S" (src1), "d" (src2), "D" (dest), "c" (bytes & ~7)); + } + if (UNLIKELY(bytes & 7)) { + rescale(src1+(bytes & ~7), src2+(bytes & ~7), dest+(bytes & ~7), + bytes & 7, weight1, weight2); + } +} + +#endif /* (HAVE_ASM_MMXEXT || HAVE_ASM_SSE) && ARCH_X86 */ + +/*************************************************************************/ + +/* SSE2 version */ + +#if defined(HAVE_ASM_SSE2) + +#ifdef ARCH_X86_64 +# define ECX "%%rcx" +# define EDX "%%rdx" +# define ESI "%%rsi" +# define EDI "%%rdi" +#else +# define ECX "%%ecx" +# define EDX "%%edx" +# define ESI "%%esi" +# define EDI "%%edi" +#endif + +static void rescale_sse2(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int bytes, + uint32_t weight1, uint32_t weight2) +{ + if (bytes >= 16) { + asm("movd %%eax, %%xmm4; movd %%edx, %%xmm5" + : : "a" (weight1<<16|weight1), "d" (weight2<<16|weight2)); + asm("\ + pshufd $0, %%xmm4, %%xmm4 # XMM4: W1 W1 W1 W1 W1 W1 W1 W1 \n\ + pshufd $0, %%xmm5, %%xmm5 # XMM5: W2 W2 W2 W2 W2 W2 W2 W2 \n\ + pxor %%xmm6, %%xmm6 # Put 0x0080*4 in XMM6 (rounding)\n\ + pcmpeqw %%xmm7, %%xmm7 \n\ + psubw %%xmm7, %%xmm6 \n\ + psllw $7, %%xmm6 \n\ + 0: \n\ + movdqu -16("ESI","ECX"), %%xmm7 \n\ + pxor %%xmm0, %%xmm0 \n\ + punpcklbw %%xmm7, %%xmm0 \n\ + pmulhuw %%xmm4, %%xmm0 \n\ + pxor %%xmm1, %%xmm1 \n\ + punpckhbw %%xmm7, %%xmm1 \n\ + pmulhuw %%xmm4, %%xmm1 \n\ + movdqu -16("EDX","ECX"), %%xmm7 \n\ + pxor %%xmm2, %%xmm2 \n\ + punpcklbw %%xmm7, %%xmm2 \n\ + pmulhuw %%xmm5, %%xmm2 \n\ + pxor %%xmm3, %%xmm3 \n\ + punpckhbw %%xmm7, %%xmm3 \n\ + pmulhuw %%xmm5, %%xmm3 \n\ + paddw %%xmm2, %%xmm0 \n\ + paddw %%xmm6, %%xmm0 \n\ + psrlw $8, %%xmm0 \n\ + paddw %%xmm3, %%xmm1 \n\ + paddw %%xmm6, %%xmm1 \n\ + psrlw $8, %%xmm1 \n\ + packuswb %%xmm1, %%xmm0 \n\ + movdqu %%xmm0, -16("EDI","ECX") \n\ + subl $16, %%ecx \n\ + jnz 0b \n\ + emms" + : /* no outputs */ + : "S" (src1), "d" (src2), "D" (dest), "c" (bytes & ~15)); + } + if (UNLIKELY(bytes & 15)) { + rescale(src1+(bytes & ~15), src2+(bytes & ~15), dest+(bytes & ~15), + bytes & 15, weight1, weight2); + } +} + +#endif /* HAVE_ASM_SSE2 */ + +/*************************************************************************/ +/*************************************************************************/ + +/* Initialization routine. */ + +int ac_rescale_init(int accel) +{ + rescale_ptr = rescale; + +#if defined(HAVE_ASM_MMX) && defined(ARCH_X86) + if (HAS_ACCEL(accel, AC_MMX)) + rescale_ptr = rescale_mmx; +#endif +#if (defined(HAVE_ASM_MMXEXT) || defined(HAVE_ASM_SSE)) && defined(ARCH_X86) + if (HAS_ACCEL(accel, AC_MMXEXT) || HAS_ACCEL(accel, AC_SSE)) + rescale_ptr = rescale_mmxext; +#endif +#if defined(HAVE_ASM_SSE2) + if (HAS_ACCEL(accel, AC_SSE2)) + rescale_ptr = rescale_sse2; +#endif + + return 1; +} + +/*************************************************************************/ + +/* + * Local variables: + * c-file-style: "stroustrup" + * c-file-offsets: ((case-label . *) (statement-case-intro . *)) + * indent-tabs-mode: nil + * End: + * + * vim: expandtab shiftwidth=4: + */ |
