summaryrefslogtreecommitdiffstats
path: root/debian/transcode/transcode-1.1.7/aclib/average.c
diff options
context:
space:
mode:
Diffstat (limited to 'debian/transcode/transcode-1.1.7/aclib/average.c')
-rw-r--r--debian/transcode/transcode-1.1.7/aclib/average.c243
1 files changed, 243 insertions, 0 deletions
diff --git a/debian/transcode/transcode-1.1.7/aclib/average.c b/debian/transcode/transcode-1.1.7/aclib/average.c
new file mode 100644
index 00000000..517102e6
--- /dev/null
+++ b/debian/transcode/transcode-1.1.7/aclib/average.c
@@ -0,0 +1,243 @@
+/*
+ * average.c -- average two sets of byte data
+ * Written by Andrew Church <achurch@achurch.org>
+ *
+ * This file is part of transcode, a video stream processing tool.
+ * transcode is free software, distributable under the terms of the GNU
+ * General Public License (version 2 or later). See the file COPYING
+ * for details.
+ */
+
+#include "ac.h"
+#include "ac_internal.h"
+
+static void average(const uint8_t *, const uint8_t *, uint8_t *, int);
+static void (*average_ptr)(const uint8_t *, const uint8_t *, uint8_t *, int)
+ = average;
+
+/*************************************************************************/
+
+/* External interface */
+
+void ac_average(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dest, int bytes)
+{
+ (*average_ptr)(src1, src2, dest, bytes);
+}
+
+/*************************************************************************/
+/*************************************************************************/
+
+/* Vanilla C version */
+
+static void average(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dest, int bytes)
+{
+ int i;
+ for (i = 0; i < bytes; i++)
+ dest[i] = (src1[i]+src2[i]+1) / 2;
+}
+
+/*************************************************************************/
+
+#if defined(HAVE_ASM_MMX) && defined(ARCH_X86) /* i.e. not x86_64 */
+
+static void average_mmx(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dest, int bytes)
+{
+ if (bytes >= 8) {
+ asm("\
+ pxor %%mm7, %%mm7 \n\
+ movq %%mm7, %%mm6 \n\
+ pcmpeqw %%mm5, %%mm5 \n\
+ psubw %%mm5, %%mm6 # Put 0x0001*4 in MM6 \n\
+ 0: \n\
+ movq -8(%%esi,%%eax), %%mm0 \n\
+ movq %%mm0, %%mm1 \n\
+ punpcklbw %%mm7, %%mm0 \n\
+ punpckhbw %%mm7, %%mm1 \n\
+ movq -8(%%edx,%%eax), %%mm2 \n\
+ movq %%mm2, %%mm3 \n\
+ punpcklbw %%mm7, %%mm2 \n\
+ punpckhbw %%mm7, %%mm3 \n\
+ paddw %%mm2, %%mm0 \n\
+ paddw %%mm6, %%mm0 \n\
+ psrlw $1, %%mm0 \n\
+ paddw %%mm3, %%mm1 \n\
+ paddw %%mm6, %%mm1 \n\
+ psrlw $1, %%mm1 \n\
+ packuswb %%mm1, %%mm0 \n\
+ movq %%mm0, -8(%%edi,%%eax) \n\
+ subl $8, %%eax \n\
+ jnz 0b \n\
+ emms"
+ : /* no outputs */
+ : "S" (src1), "d" (src2), "D" (dest), "a" (bytes & ~7));
+ }
+ if (UNLIKELY(bytes & 7)) {
+ average(src1+(bytes & ~7), src2+(bytes & ~7), dest+(bytes & ~7),
+ bytes & 7);
+ }
+}
+
+#endif /* HAVE_ASM_MMX && ARCH_X86 */
+
+/*************************************************************************/
+
+#if defined(HAVE_ASM_SSE) && defined(ARCH_X86)
+
+/* SSE has PAVGB */
+
+static void average_sse(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dest, int bytes)
+{
+ if (bytes >= 8) {
+ asm("\
+ testl $~0x1F, %%eax \n\
+ jz 1f \n\
+ 0: \n\
+ movq -32(%%esi,%%eax), %%mm0 \n\
+ movq -24(%%esi,%%eax), %%mm1 \n\
+ movq -16(%%esi,%%eax), %%mm2 \n\
+ movq -8(%%esi,%%eax), %%mm3 \n\
+ movq -32(%%edx,%%eax), %%mm4 \n\
+ pavgb %%mm4, %%mm0 \n\
+ movq -24(%%edx,%%eax), %%mm5 \n\
+ pavgb %%mm5, %%mm1 \n\
+ movq -16(%%edx,%%eax), %%mm6 \n\
+ pavgb %%mm6, %%mm2 \n\
+ movq -8(%%edx,%%eax), %%mm7 \n\
+ pavgb %%mm7, %%mm3 \n\
+ movntq %%mm0, -32(%%edi,%%eax) \n\
+ movntq %%mm1, -24(%%edi,%%eax) \n\
+ movntq %%mm2, -16(%%edi,%%eax) \n\
+ movntq %%mm3, -8(%%edi,%%eax) \n\
+ subl $32, %%eax \n\
+ testl $~0x1F, %%eax \n\
+ jnz 0b \n\
+ testl %%eax, %%eax \n\
+ jz 2f \n\
+ 1: \n\
+ movq -8(%%esi,%%eax), %%mm0 \n\
+ movq -8(%%edx,%%eax), %%mm1 \n\
+ pavgb %%mm1, %%mm0 \n\
+ movntq %%mm0, -8(%%edi,%%eax) \n\
+ subl $8, %%eax \n\
+ jnz 1b \n\
+ 2: \n\
+ emms \n\
+ sfence"
+ : /* no outputs */
+ : "S" (src1), "d" (src2), "D" (dest), "a" (bytes & ~7));
+ }
+ if (UNLIKELY(bytes & 7)) {
+ average(src1+(bytes & ~7), src2+(bytes & ~7), dest+(bytes & ~7),
+ bytes & 7);
+ }
+}
+
+#endif /* HAVE_ASM_SSE && ARCH_X86 */
+
+/*************************************************************************/
+
+#if defined(HAVE_ASM_SSE2)
+
+#if defined(ARCH_X86_64)
+# define EAX "%%rax"
+# define EDX "%%rdx"
+# define ESI "%%rsi"
+# define EDI "%%rdi"
+#else
+# define EAX "%%eax"
+# define EDX "%%edx"
+# define ESI "%%esi"
+# define EDI "%%edi"
+#endif
+
+static void average_sse2(const uint8_t *src1, const uint8_t *src2,
+ uint8_t *dest, int bytes)
+{
+ if (bytes >= 8) {
+ asm("\
+ testl $~0x3F, %%eax \n\
+ jz 1f \n\
+ 0: \n\
+ movdqu -64("ESI","EAX"), %%xmm0 \n\
+ movdqu -48("ESI","EAX"), %%xmm1 \n\
+ movdqu -32("ESI","EAX"), %%xmm2 \n\
+ movdqu -16("ESI","EAX"), %%xmm3 \n\
+ movdqu -64("EDX","EAX"), %%xmm4 \n\
+ pavgb %%xmm4, %%xmm0 \n\
+ movdqu -48("EDX","EAX"), %%xmm5 \n\
+ pavgb %%xmm5, %%xmm1 \n\
+ movdqu -32("EDX","EAX"), %%xmm6 \n\
+ pavgb %%xmm6, %%xmm2 \n\
+ movdqu -16("EDX","EAX"), %%xmm7 \n\
+ pavgb %%xmm7, %%xmm3 \n\
+ # Note that movntdq requires 16-byte alignment, which we're \n\
+ # not guaranteed \n\
+ movdqu %%xmm0, -64("EDI","EAX") \n\
+ movdqu %%xmm1, -48("EDI","EAX") \n\
+ movdqu %%xmm2, -32("EDI","EAX") \n\
+ movdqu %%xmm3, -16("EDI","EAX") \n\
+ subl $64, %%eax \n\
+ testl $~0x3F, %%eax \n\
+ jnz 0b \n\
+ testl %%eax, %%eax \n\
+ jz 2f \n\
+ 1: \n\
+ movq -8("ESI","EAX"), %%mm0 \n\
+ movq -8("EDX","EAX"), %%mm1 \n\
+ pavgb %%mm1, %%mm0 \n\
+ movq %%mm0, -8("EDI","EAX") \n\
+ subl $8, %%eax \n\
+ jnz 1b \n\
+ 2: \n\
+ emms"
+ : /* no outputs */
+ : "S" (src1), "d" (src2), "D" (dest), "a" (bytes & ~7));
+ }
+ if (UNLIKELY(bytes & 7)) {
+ average(src1+(bytes & ~7), src2+(bytes & ~7), dest+(bytes & ~7),
+ bytes & 7);
+ }
+}
+
+#endif /* HAVE_ASM_SSE2 */
+
+/*************************************************************************/
+/*************************************************************************/
+
+/* Initialization routine. */
+
+int ac_average_init(int accel)
+{
+ average_ptr = average;
+
+#if defined(HAVE_ASM_MMX) && defined(ARCH_X86)
+ if (HAS_ACCEL(accel, AC_MMX))
+ average_ptr = average_mmx;
+#endif
+#if defined(HAVE_ASM_SSE) && defined(ARCH_X86)
+ if (HAS_ACCEL(accel, AC_SSE))
+ average_ptr = average_sse;
+#endif
+#if defined(HAVE_ASM_SSE2)
+ if (HAS_ACCEL(accel, AC_SSE2))
+ average_ptr = average_sse2;
+#endif
+
+ return 1;
+}
+
+/*************************************************************************/
+
+/*
+ * Local variables:
+ * c-file-style: "stroustrup"
+ * c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */