]> git.xonotic.org Git - xonotic/darkplaces.git/blobdiff - dpsoftrast.c
theora encoding: simplify; bump default quality to 48 as that is what encoder_example...
[xonotic/darkplaces.git] / dpsoftrast.c
index 954bfcdf5c16e15f3fd8a8cda2cbebcdd56f0156..86d4f7b5cbc7d8a5da85e155a23124358cff7e9e 100644 (file)
@@ -27,6 +27,20 @@ typedef qboolean bool;
                #define ATOMIC_INCREMENT(counter) (OSAtomicIncrement32Barrier(&(counter)))
                #define ATOMIC_DECREMENT(counter) (OSAtomicDecrement32Barrier(&(counter)))
                #define ATOMIC_ADD(counter, val) ((void)OSAtomicAdd32Barrier((val), &(counter)))
+       #elif defined(__GNUC__) && defined(WIN32)
+               #define ALIGN(var) var __attribute__((__aligned__(16)))
+               #define ATOMIC(var) var __attribute__((__aligned__(32)))
+               #define MEMORY_BARRIER (_mm_sfence())
+               //(__sync_synchronize())
+               #define ATOMIC_COUNTER volatile LONG
+               // this LONG * cast serves to fix an issue with broken mingw
+               // packages on Ubuntu; these only declare the function to take
+               // a LONG *, causing a compile error here. This seems to be
+               // error- and warn-free on platforms that DO declare
+               // InterlockedIncrement correctly, like mingw on Windows.
+               #define ATOMIC_INCREMENT(counter) (InterlockedIncrement((LONG *) &(counter)))
+               #define ATOMIC_DECREMENT(counter) (InterlockedDecrement((LONG *) &(counter)))
+               #define ATOMIC_ADD(counter, val) ((void)InterlockedExchangeAdd((LONG *) &(counter), (val)))
        #elif defined(__GNUC__)
                #define ALIGN(var) var __attribute__((__aligned__(16)))
                #define ATOMIC(var) var __attribute__((__aligned__(32)))
@@ -73,7 +87,7 @@ typedef qboolean bool;
 #ifdef SSE_POSSIBLE
 #include <emmintrin.h>
 
-#if defined(__GNUC__) && (__GNUC < 4 || __GNUC_MINOR__ < 6)
+#if defined(__GNUC__) && (__GNUC < 4 || __GNUC_MINOR__ < 6) && !defined(__clang__)
        #define _mm_cvtss_f32(val) (__builtin_ia32_vec_ext_v4sf ((__v4sf)(val), 0))
 #endif
 
@@ -2297,166 +2311,63 @@ void DPSOFTRAST_Draw_Span_FinishBGRA8(DPSOFTRAST_State_Thread *thread, const DPS
 #endif
 }
 
-static void DPSOFTRAST_Texture2D(DPSOFTRAST_Texture *texture, int mip, float x, float y, float c[4])
-       // warning: this is SLOW, only use if the optimized per-span functions won't do
-       // FIXME does this function need flipping of the color order?
-{
-       const unsigned char * RESTRICT pixelbase;
-       const unsigned char * RESTRICT pixel[4];
-       int tciwrapmask[2];
-       tciwrapmask[0] = texture->mipmap[mip][2]-1;
-       tciwrapmask[1] = texture->mipmap[mip][3]-1;
-       pixelbase = (unsigned char *)texture->bytes + texture->mipmap[mip][0];
-       if(texture->filter & DPSOFTRAST_TEXTURE_FILTER_LINEAR)
-       {
-               if (texture->flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
-               {
-                       unsigned int tc[2] = { x * (texture->mipmap[mip][2]<<12) - 2048, y * (texture->mipmap[mip][3]<<12) - 2048};
-                       unsigned int frac[2] = { tc[0]&0xFFF, tc[1]&0xFFF };
-                       unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
-                       unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
-                       int tci[2] = { tc[0]>>12, tc[1]>>12 };
-                       int tci1[2] = { tci[0] + 1, tci[1] + 1 };
-                       tci[0] = tci[0] >= 0 ? (tci[0] <= texture->mipmap[mip][2]-1 ? tci[0] : texture->mipmap[mip][2]-1) : 0;
-                       tci[1] = tci[1] >= 0 ? (tci[1] <= texture->mipmap[mip][3]-1 ? tci[1] : texture->mipmap[mip][3]-1) : 0;
-                       tci1[0] = tci1[0] >= 0 ? (tci1[0] <= texture->mipmap[mip][2]-1 ? tci1[0] : texture->mipmap[mip][2]-1) : 0;
-                       tci1[1] = tci1[1] >= 0 ? (tci1[1] <= texture->mipmap[mip][3]-1 ? tci1[1] : texture->mipmap[mip][3]-1) : 0;
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[1] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci1[0]);
-                       pixel[2] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[3] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci1[0]);
-                       c[0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3]) * (1.0f / 0xFF00000);
-                       c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3]) * (1.0f / 0xFF00000);
-                       c[2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3]) * (1.0f / 0xFF00000);
-                       c[3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3]) * (1.0f / 0xFF00000);
-               }
-               else
-               {
-                       unsigned int tc[2] = { x * (texture->mipmap[mip][2]<<12) - 2048, y * (texture->mipmap[mip][3]<<12) - 2048};
-                       unsigned int frac[2] = { tc[0]&0xFFF, tc[1]&0xFFF };
-                       unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
-                       unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
-                       int tci[2] = { tc[0]>>12, tc[1]>>12 };
-                       int tci1[2] = { tci[0] + 1, tci[1] + 1 };
-                       tci[0] &= tciwrapmask[0];
-                       tci[1] &= tciwrapmask[1];
-                       tci1[0] &= tciwrapmask[0];
-                       tci1[1] &= tciwrapmask[1];
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[1] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci1[0]);
-                       pixel[2] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[3] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci1[0]);
-                       c[0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3]) * (1.0f / 0xFF00000);
-                       c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3]) * (1.0f / 0xFF00000);
-                       c[2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3]) * (1.0f / 0xFF00000);
-                       c[3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3]) * (1.0f / 0xFF00000);
-               }
-       }
-       else
-       {
-               if (texture->flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
-               {
-                       int tci[2] = { x * texture->mipmap[mip][2], y * texture->mipmap[mip][3] };
-                       tci[0] = tci[0] >= 0 ? (tci[0] <= texture->mipmap[mip][2]-1 ? tci[0] : texture->mipmap[mip][2]-1) : 0;
-                       tci[1] = tci[1] >= 0 ? (tci[1] <= texture->mipmap[mip][3]-1 ? tci[1] : texture->mipmap[mip][3]-1) : 0;
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       c[0] = pixel[0][0] * (1.0f / 255.0f);
-                       c[1] = pixel[0][1] * (1.0f / 255.0f);
-                       c[2] = pixel[0][2] * (1.0f / 255.0f);
-                       c[3] = pixel[0][3] * (1.0f / 255.0f);
-               }
-               else
-               {
-                       int tci[2] = { x * texture->mipmap[mip][2], y * texture->mipmap[mip][3] };
-                       tci[0] &= tciwrapmask[0];
-                       tci[1] &= tciwrapmask[1];
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       c[0] = pixel[0][0] * (1.0f / 255.0f);
-                       c[1] = pixel[0][1] * (1.0f / 255.0f);
-                       c[2] = pixel[0][2] * (1.0f / 255.0f);
-                       c[3] = pixel[0][3] * (1.0f / 255.0f);
-               }
-       }
-}
-
 static void DPSOFTRAST_Texture2DBGRA8(DPSOFTRAST_Texture *texture, int mip, float x, float y, unsigned char c[4])
        // warning: this is SLOW, only use if the optimized per-span functions won't do
 {
        const unsigned char * RESTRICT pixelbase;
        const unsigned char * RESTRICT pixel[4];
-       int tciwrapmask[2];
-       tciwrapmask[0] = texture->mipmap[mip][2]-1;
-       tciwrapmask[1] = texture->mipmap[mip][3]-1;
+       int width = texture->mipmap[mip][2], height = texture->mipmap[mip][3];
+       int wrapmask[2] = { width-1, height-1 };
        pixelbase = (unsigned char *)texture->bytes + texture->mipmap[mip][0];
        if(texture->filter & DPSOFTRAST_TEXTURE_FILTER_LINEAR)
        {
+               unsigned int tc[2] = { x * (width<<12) - 2048, y * (height<<12) - 2048};
+               unsigned int frac[2] = { tc[0]&0xFFF, tc[1]&0xFFF };
+               unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
+               unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
+               int tci[2] = { tc[0]>>12, tc[1]>>12 };
+               int tci1[2] = { tci[0] + 1, tci[1] + 1 };
                if (texture->flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
                {
-                       unsigned int tc[2] = { x * (texture->mipmap[mip][2]<<12) - 2048, y * (texture->mipmap[mip][3]<<12) - 2048};
-                       unsigned int frac[2] = { tc[0]&0xFFF, tc[1]&0xFFF };
-                       unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
-                       unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
-                       int tci[2] = { tc[0]>>12, tc[1]>>12 };
-                       int tci1[2] = { tci[0] + 1, tci[1] + 1 };
-                       tci[0] = tci[0] >= 0 ? (tci[0] <= texture->mipmap[mip][2]-1 ? tci[0] : texture->mipmap[mip][2]-1) : 0;
-                       tci[1] = tci[1] >= 0 ? (tci[1] <= texture->mipmap[mip][3]-1 ? tci[1] : texture->mipmap[mip][3]-1) : 0;
-                       tci1[0] = tci1[0] >= 0 ? (tci1[0] <= texture->mipmap[mip][2]-1 ? tci1[0] : texture->mipmap[mip][2]-1) : 0;
-                       tci1[1] = tci1[1] >= 0 ? (tci1[1] <= texture->mipmap[mip][3]-1 ? tci1[1] : texture->mipmap[mip][3]-1) : 0;
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[1] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci1[0]);
-                       pixel[2] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[3] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci1[0]);
-                       c[0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3])>>24;
-                       c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3])>>24;
-                       c[2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3])>>24;
-                       c[3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3])>>24;
+                       tci[0] = tci[0] >= 0 ? (tci[0] <= wrapmask[0] ? tci[0] : wrapmask[0]) : 0;
+                       tci[1] = tci[1] >= 0 ? (tci[1] <= wrapmask[1] ? tci[1] : wrapmask[1]) : 0;
+                       tci1[0] = tci1[0] >= 0 ? (tci1[0] <= wrapmask[0] ? tci1[0] : wrapmask[0]) : 0;
+                       tci1[1] = tci1[1] >= 0 ? (tci1[1] <= wrapmask[1] ? tci1[1] : wrapmask[1]) : 0;
                }
                else
                {
-                       unsigned int tc[2] = { x * (texture->mipmap[mip][2]<<12) - 2048, y * (texture->mipmap[mip][3]<<12) - 2048};
-                       unsigned int frac[2] = { tc[0]&0xFFF, tc[1]&0xFFF };
-                       unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
-                       unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
-                       int tci[2] = { tc[0]>>12, tc[1]>>12 };
-                       int tci1[2] = { tci[0] + 1, tci[1] + 1 };
-                       tci[0] &= tciwrapmask[0];
-                       tci[1] &= tciwrapmask[1];
-                       tci1[0] &= tciwrapmask[0];
-                       tci1[1] &= tciwrapmask[1];
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[1] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci1[0]);
-                       pixel[2] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci[0]);
-                       pixel[3] = pixelbase + 4 * (tci1[1]*texture->mipmap[mip][2]+tci1[0]);
-                       c[0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3])>>24;
-                       c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3])>>24;
-                       c[2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3])>>24;
-                       c[3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3])>>24;
+                       tci[0] &= wrapmask[0];
+                       tci[1] &= wrapmask[1];
+                       tci1[0] &= wrapmask[0];
+                       tci1[1] &= wrapmask[1];
                }
+               pixel[0] = pixelbase + 4 * (tci[1]*width+tci[0]);
+               pixel[1] = pixelbase + 4 * (tci[1]*width+tci1[0]);
+               pixel[2] = pixelbase + 4 * (tci1[1]*width+tci[0]);
+               pixel[3] = pixelbase + 4 * (tci1[1]*width+tci1[0]);
+               c[0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3])>>24;
+               c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3])>>24;
+               c[2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3])>>24;
+               c[3] = (pixel[0][3]*lerp[0]+pixel[1][3]*lerp[1]+pixel[2][3]*lerp[2]+pixel[3][3]*lerp[3])>>24;
        }
        else
        {
+               int tci[2] = { x * width, y * height };
                if (texture->flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
                {
-                       int tci[2] = { x * texture->mipmap[mip][2], y * texture->mipmap[mip][3] };
-                       tci[0] = tci[0] >= 0 ? (tci[0] <= texture->mipmap[mip][2]-1 ? tci[0] : texture->mipmap[mip][2]-1) : 0;
-                       tci[1] = tci[1] >= 0 ? (tci[1] <= texture->mipmap[mip][3]-1 ? tci[1] : texture->mipmap[mip][3]-1) : 0;
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       c[0] = pixel[0][0];
-                       c[1] = pixel[0][1];
-                       c[2] = pixel[0][2];
-                       c[3] = pixel[0][3];
+                       tci[0] = tci[0] >= 0 ? (tci[0] <= wrapmask[0] ? tci[0] : wrapmask[0]) : 0;
+                       tci[1] = tci[1] >= 0 ? (tci[1] <= wrapmask[1] ? tci[1] : wrapmask[1]) : 0;
                }
                else
                {
-                       int tci[2] = { x * texture->mipmap[mip][2], y * texture->mipmap[mip][3] };
-                       tci[0] &= tciwrapmask[0];
-                       tci[1] &= tciwrapmask[1];
-                       pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[mip][2]+tci[0]);
-                       c[0] = pixel[0][0];
-                       c[1] = pixel[0][1];
-                       c[2] = pixel[0][2];
-                       c[3] = pixel[0][3];
+                       tci[0] &= wrapmask[0];
+                       tci[1] &= wrapmask[1];
                }
+               pixel[0] = pixelbase + 4 * (tci[1]*width+tci[0]);
+               c[0] = pixel[0][0];
+               c[1] = pixel[0][1];
+               c[2] = pixel[0][2];
+               c[3] = pixel[0][3];
        }
 }