Submitted By: Ryan Oliver <ryan.oliver@pha.com.au>
Date: 2005-07-31
Initial Package Version: 0.9.21
Upstream Status: Unknown
Origin: Gentoo
Description: Supposedly fixes simd instructions on amd64. Didn't work here.

diff -urN DirectFB-0.9.21.orig/src/gfx/generic/generic_mmx.h DirectFB-0.9.21/src/gfx/generic/generic_mmx.h
--- DirectFB-0.9.21.orig/src/gfx/generic/generic_mmx.h	2003-06-30 23:26:04.000000000 +0200
+++ DirectFB-0.9.21/src/gfx/generic/generic_mmx.h	2004-01-08 23:46:40.000000000 +0100
@@ -172,15 +172,19 @@
                "movq     %%mm1, (%1)\n\t"
                "dec      %2\n\t"
                "jz       3f\n\t"
-               "addl     $8, %1\n\t"
-               "addl     %4, %0\n\t"
-               "testl    $0xFFFF0000, %0\n\t"
+               "add     $8, %1\n\t"
+               "add     %4, %0\n\t"
+               "test    $0xFFFF0000, %0\n\t"
                "jz       2b\n\t"
-               "movl     %0, %%edx\n\t"
-               "andl     $0xFFFF0000, %%edx\n\t"
-               "shrl     $14, %%edx\n\t"
+               "mov     %0, %%edx\n\t"
+               "and     $0xFFFF0000, %%edx\n\t"
+               "shr     $14, %%edx\n\t"
+#ifdef __x86_64__
+               "add      %%rbx, %3\n\t"
+#else
                "add      %%edx, %3\n\t"
-               "andl     $0xFFFF, %0\n\t"
+#endif
+               "and     $0xFFFF, %0\n\t"
                "jmp      1b\n"
                "3:\n\t"
                "emms"
@@ -201,8 +205,8 @@
                "movd     (%2), %%mm1\n\t"
                "punpcklbw %%mm0, %%mm1\n\t"
                "movq     %%mm1, (%0)\n\t"
-               "addl     $4, %2\n\t"
-               "addl     $8, %0\n\t"
+               "add     $4, %2\n\t"
+               "add     $8, %0\n\t"
                "dec      %1\n\t"
                "jnz      1b\n\t"
                "emms"
@@ -238,7 +242,7 @@
                "dec      %1\n\t"
                "jz       2f\n\t"
                "psrlq    $16, %%mm0\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $8, %0\n\t"
                /* 2. Konvertierung nach 24 bit interleaved */
 	       "movq     %%mm0, %%mm3\n\t"
                "punpcklwd %%mm3, %%mm3\n\t"
@@ -252,7 +256,7 @@
                "dec      %1\n\t"
                "jz       2f\n\t"
                "psrlq    $16, %%mm0\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $8, %0\n\t"
                /* 3. Konvertierung nach 24 bit interleaved */
 	       "movq     %%mm0, %%mm3\n\t"
                "punpcklwd %%mm3, %%mm3\n\t"
@@ -266,7 +270,7 @@
                "dec      %1\n\t"
                "jz       2f\n\t"
                "psrlq    $16, %%mm0\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $8, %0\n\t"
                /* 4. Konvertierung nach 24 bit interleaved */
 	       "movq     %%mm0, %%mm3\n\t"
                "punpcklwd %%mm3, %%mm3\n\t"
@@ -279,8 +283,8 @@
                "movq     %%mm3, (%0)\n\t"
                "dec      %1\n\t"
                "jz       2f\n\t"
-	       "addl     $8, %0\n\t"
-	       "addl     $8, %2\n\t"
+	       "add     $8, %0\n\t"
+	       "add     $8, %2\n\t"
                "jmp      1b\n"
                "2:\n\t"
                "emms"
@@ -304,8 +308,8 @@
                "punpcklbw %%mm6, %%mm0\n\t"
                "por      %%mm7, %%mm0\n\t"
                "movq     %%mm0, (%0)\n\t"
-	       "addl     $4, %2\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $4, %2\n\t"
+	       "add     $8, %0\n\t"
                "dec      %1\n\t"
                "jnz      1b\n\t"
                "emms"
@@ -322,7 +326,7 @@
 
      __asm__ __volatile__ (
 	       "movq     %3, %%mm7\n\t"
-               "cmpl     $0, %2\n\t"
+               "cmp     $0, %2\n\t"
                "jne      3f\n\t"
                "movq     %4, %%mm6\n\t"
                "movd     %5, %%mm0\n\t"
@@ -341,7 +345,7 @@
                "psrlw    $8, %%mm0\n\t"
                "movq     %%mm0, (%0)\n"
                "1:\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $8, %0\n\t"
                "dec      %1\n\t"
                "jnz      4b\n\t"
                "jmp      2f\n\t"
@@ -360,8 +364,8 @@
                "psrlw    $8, %%mm0\n\t"
                "movq     %%mm0, (%0)\n"
                "1:\n\t"
-	       "addl     $8, %2\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $8, %2\n\t"
+	       "add     $8, %0\n\t"
                "dec      %1\n\t"
                "jnz      3b\n\t"
                "2:\n\t"
@@ -379,7 +383,7 @@
 
      __asm__ __volatile__ (
 	       "movq     %3, %%mm7\n\t"
-               "cmpl     $0, %2\n\t"
+               "cmp     $0, %2\n\t"
                "jne      3f\n\t"
                "movq     %4, %%mm6\n\t"
                "movd     %5, %%mm0\n\t"
@@ -397,7 +401,7 @@
                "psrlw    $8, %%mm1\n\t"
                "movq     %%mm1, (%0)\n"
                "1:\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $8, %0\n\t"
                "dec      %1\n\t"
                "jnz      4b\n\t"
                "jmp      2f\n\t"
@@ -415,8 +419,8 @@
                "psrlw    $8, %%mm1\n\t"
                "movq     %%mm1, (%0)\n"
                "1:\n\t"
-	       "addl     $8, %2\n\t"
-	       "addl     $8, %0\n\t"
+	       "add     $8, %2\n\t"
+	       "add     $8, %0\n\t"
                "dec      %1\n\t"
                "jnz      3b\n\t"
                "2:\n\t"
diff -urN DirectFB-0.9.21.orig/src/misc/cpu_accel.c DirectFB-0.9.21/src/misc/cpu_accel.c
--- DirectFB-0.9.21.orig/lib/direct/cpu_accel.c	2003-08-15 13:32:45.000000000 +0200
+++ DirectFB-0.9.21/lib/direct/cpu_accel.c	2004-01-08 23:48:05.000000000 +0100
@@ -57,6 +57,13 @@
 
 #include "cpu_accel.h"
 
+#ifdef __x86_64__
+static __u32 arch_accel (void)
+{
+	return MM_ACCEL_X86_MMX | MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT | MM_ACCEL_X86_SSE2 | MM_ACCEL_X86_3DNOW;
+}
+#endif 
+
 #ifdef ARCH_X86
 static __u32 arch_accel (void)
 {
@@ -175,7 +182,7 @@
 
 __u32 dfb_mm_accel (void)
 {
-#if defined (ARCH_X86) || (defined (ARCH_PPC) && defined (ENABLE_ALTIVEC))
+#if defined(__x86_64__) || defined (ARCH_X86) || (defined (ARCH_PPC) && defined (ENABLE_ALTIVEC))
      static __u32 accel = ~0U;
 
      if (accel != ~0U)
@@ -183,7 +190,7 @@
 
      accel = arch_accel ();
 
-#ifdef USE_SSE
+#if defined(USE_SSE) && !defined(__x86_64__)
 
      /* test OS support for SSE */
      if (accel & MM_ACCEL_X86_SSE) {
