From: Dale Weiler <killfieldengine@gmail.com>
Date: Sat, 14 Dec 2013 22:30:51 +0000 (-0500)
Subject: Remove SSE hash, t's just too much effort to maintain.
X-Git-Tag: xonotic-v0.8.0~29
X-Git-Url: https://git.xonotic.org/?p=xonotic%2Fgmqcc.git;a=commitdiff_plain;h=f24bdced104d84598b9eb9fa9132a24e67477518

Remove SSE hash, t's just too much effort to maintain.
---

diff --git a/hash.c b/hash.c
index 276817e..6ef3847 100644
--- a/hash.c
+++ b/hash.c
@@ -261,149 +261,8 @@ static GMQCC_FORCEINLINE GMQCC_USED uint32_t hash_native(const void *GMQCC_RESTR
     return hash_native_result(hash, carry, length);
 }
 
-/*
- * Inline assembly optimized SSE version for when SSE is present via CPUID
- * or the host compiler has __SSE__. This is about 16 cycles faster than
- * native at -O2 for GCC and 11 cycles for -O3.
- *
- *  Tested with -m32 on a Phenom II X4 with:
- *      gcc version 4.8.1 20130725 (prerelease) (GCC)
- */
-#if defined(__GNUC__) && defined(__i386__)
-static GMQCC_FORCEINLINE uint32_t hash_sse(const void *GMQCC_RESTRICT key, size_t length) {
-    uint32_t ret;
-    __asm__ __volatile__ (
-        "   mov %%eax, %%ebx\n"
-        "   mov %2, %%eax\n"
-        "   movd %%eax, %%xmm7\n"
-        "   shufps $0, %%xmm7, %%xmm7\n"
-        "   mov %3, %%eax\n"
-        "   movd %%eax, %%xmm6\n"
-        "   shufps $0, %%xmm6, %%xmm6\n"
-        "   lea (%%esi, %%ecx, 1), %%edi\n"
-        "   jmp 2f\n"
-        "1:\n"
-        "   movaps (%%esi), %%xmm0\n"
-        "   pmulld %%xmm7, %%xmm0\n"
-        "   movaps %%xmm0, %%xmm2\n"
-        "   pslld $15, %%xmm0\n"
-        "   psrld $17, %%xmm2\n"
-        "   orps %%xmm2, %%xmm0\n"
-        "   pmulld %%xmm6, %%xmm0\n"
-        "   movd %%xmm0, %%eax\n"
-        "   xor %%eax, %%ebx\n"
-        "   rol $13, %%ebx\n"
-        "   imul $5, %%ebx\n"
-        "   add $0xE6546B64, %%ebx\n"
-        "   shufps $0x39, %%xmm0, %%xmm0\n"
-        "   movd %%xmm0, %%eax\n"
-        "   xor %%eax, %%ebx\n"
-        "   rol $13, %%ebx\n"
-        "   imul $5, %%ebx\n"
-        "   add $0xE6546B64, %%ebx\n"
-        "   shufps $0x39, %%xmm0, %%xmm0\n"
-        "   movd %%xmm0, %%eax\n"
-        "   xor %%eax, %%ebx\n"
-        "   rol $13, %%ebx\n"
-        "   imul $5, %%ebx\n"
-        "   add $0xE6546B64, %%ebx\n"
-        "   shufps $0x39, %%xmm0, %%xmm0\n"
-        "   movd %%xmm0, %%eax\n"
-        "   xor %%eax, %%ebx\n"
-        "   rol $13, %%ebx\n"
-        "   imul $5, %%ebx\n"
-        "   add $0xE6546B64, %%ebx\n"
-        "   add $16, %%esi\n"
-        "2:\n"
-        "   cmp %%esi, %%edi\n"
-        "   jne 1b\n"
-        "   xor %%ecx, %%ebx\n"
-        "   mov %%ebx, %%eax\n"
-        "   shr $16, %%ebx\n"
-        "   xor %%ebx, %%eax\n"
-        "   imul $0x85EBCA6b, %%eax\n"
-        "   mov %%eax, %%ebx\n"
-        "   shr $13, %%ebx\n"
-        "   xor %%ebx, %%eax\n"
-        "   imul $0xC2B2AE35, %%eax\n"
-        "   mov %%eax, %%ebx\n"
-        "   shr $16, %%ebx\n"
-        "   xor %%ebx, %%eax\n"
-        :   "=a" (ret)
-
-        :   "a" (HASH_SEED),
-            "i" (HASH_MASK1),
-            "i" (HASH_MASK2),
-            "S" (key),
-            "c" (length)
-
-        :   "%ebx",
-            "%edi"
-    );
-    return ret;
-}
-#endif
-
-#if defined (__GNUC__) && defined(__i386__) && !defined(__SSE__)
-/*
- * Emulate MSVC _cpuid intrinsic for GCC/MinGW/Clang, this will be used
- * to determine if we should use the SSE route.
- */
-static GMQCC_FORCEINLINE void hash_cpuid(int *lanes, int entry) {
-    __asm__ __volatile__ (
-        "cpuid"
-        :   "=a"(lanes[0]),
-            "=b"(lanes[1]),
-            "=c"(lanes[2]),
-            "=d"(lanes[3])
-
-        :   "a" (entry)
-    );
-}
-
-#endif /* !(defined(__GNUC__) && defined(__i386__) */
-
 static uint32_t hash_entry(const void *GMQCC_RESTRICT key, size_t length) {
-/*
- * No host SSE instruction set assumed do runtime test instead. This
- * is for MinGW32 mostly which doesn't define SSE.
- */
-#if defined (__GNUC__) && defined(__i386__) && !defined(__SSE__)
-    static bool memoize = false;
-    static bool sse     = false;
-
-    if (GMQCC_UNLIKELY(!memoize)) {
-        /*
-         * Only calculate SSE one time, thus it's unlikely that this branch
-         * is taken more than once.
-         */
-        static int lanes[4];
-        hash_cpuid(lanes, 0);
-        /*
-         * It's very likely that lanes[0] will contain a value unless it
-         * isn't a modern x86.
-         */
-        if (GMQCC_LIKELY(*lanes >= 1))
-            sse = (lanes[3] & ((int)1 << 25)) != 0;
-        memoize = true;
-    }
-
-    return (GMQCC_LIKELY(sse))
-                ? hash_sse(key, length)
-                : hash_native(key, length);
-/*
- * Same as above but this time host compiler was defined with SSE support.
- * This handles MinGW32 builds for i686+
- */
-#elif defined (__GNUC__) && defined(__i386__) && defined(__SSE__)
-    return hash_sse(key, length);
-#else
-    /*
-     * Go the native route which itself is highly optimized as well for
-     * unaligned load/store when dealing with LE.
-     */
     return hash_native(key, length);
-#endif
 }
 
 #define HASH_LEN_ALIGN      (sizeof(size_t))