X-Git-Url: http://git.xonotic.org/?a=blobdiff_plain;f=dpsoftrast.c;h=b3a00ae417f34004967976d6c7278332b0daa0bf;hb=7818f529ea7fafe4ce34da295f872cb26f24d67d;hp=609f444e70bc0a26816d09581e2c6d8bc6231da9;hpb=f802623d71de8ef53d27ed30969c36583216097d;p=xonotic%2Fdarkplaces.git diff --git a/dpsoftrast.c b/dpsoftrast.c index 609f444e..b3a00ae4 100644 --- a/dpsoftrast.c +++ b/dpsoftrast.c @@ -15,21 +15,35 @@ typedef qboolean bool; #endif #define ALIGN_SIZE 16 -#define ATOMIC_SIZE 32 +#define ATOMIC_SIZE 4 #ifdef SSE_POSSIBLE #if defined(__APPLE__) #include #define ALIGN(var) var __attribute__((__aligned__(16))) - #define ATOMIC(var) var __attribute__((__aligned__(32))) + #define ATOMIC(var) var __attribute__((__aligned__(4))) #define MEMORY_BARRIER (_mm_sfence()) #define ATOMIC_COUNTER volatile int32_t #define ATOMIC_INCREMENT(counter) (OSAtomicIncrement32Barrier(&(counter))) #define ATOMIC_DECREMENT(counter) (OSAtomicDecrement32Barrier(&(counter))) #define ATOMIC_ADD(counter, val) ((void)OSAtomicAdd32Barrier((val), &(counter))) + #elif defined(__GNUC__) && defined(WIN32) + #define ALIGN(var) var __attribute__((__aligned__(16))) + #define ATOMIC(var) var __attribute__((__aligned__(4))) + #define MEMORY_BARRIER (_mm_sfence()) + //(__sync_synchronize()) + #define ATOMIC_COUNTER volatile LONG + // this LONG * cast serves to fix an issue with broken mingw + // packages on Ubuntu; these only declare the function to take + // a LONG *, causing a compile error here. This seems to be + // error- and warn-free on platforms that DO declare + // InterlockedIncrement correctly, like mingw on Windows. + #define ATOMIC_INCREMENT(counter) (InterlockedIncrement((LONG *) &(counter))) + #define ATOMIC_DECREMENT(counter) (InterlockedDecrement((LONG *) &(counter))) + #define ATOMIC_ADD(counter, val) ((void)InterlockedExchangeAdd((LONG *) &(counter), (val))) #elif defined(__GNUC__) #define ALIGN(var) var __attribute__((__aligned__(16))) - #define ATOMIC(var) var __attribute__((__aligned__(32))) + #define ATOMIC(var) var __attribute__((__aligned__(4))) #define MEMORY_BARRIER (_mm_sfence()) //(__sync_synchronize()) #define ATOMIC_COUNTER volatile int @@ -38,7 +52,7 @@ typedef qboolean bool; #define ATOMIC_ADD(counter, val) ((void)__sync_fetch_and_add(&(counter), (val))) #elif defined(_MSC_VER) #define ALIGN(var) __declspec(align(16)) var - #define ATOMIC(var) __declspec(align(32)) var + #define ATOMIC(var) __declspec(align(4)) var #define MEMORY_BARRIER (_mm_sfence()) //(MemoryBarrier()) #define ATOMIC_COUNTER volatile LONG @@ -73,15 +87,15 @@ typedef qboolean bool; #ifdef SSE_POSSIBLE #include -#if defined(__GNUC__) && (__GNUC < 4 || __GNUC_MINOR__ < 6) +#if defined(__GNUC__) && (__GNUC < 4 || __GNUC_MINOR__ < 6) && !defined(__clang__) #define _mm_cvtss_f32(val) (__builtin_ia32_vec_ext_v4sf ((__v4sf)(val), 0)) #endif -#define MM_MALLOC(size) _mm_malloc(size, ATOMIC_SIZE) +#define MM_MALLOC(size) _mm_malloc(size, ALIGN_SIZE) static void *MM_CALLOC(size_t nmemb, size_t size) { - void *ptr = _mm_malloc(nmemb*size, ATOMIC_SIZE); + void *ptr = _mm_malloc(nmemb*size, ALIGN_SIZE); if (ptr != NULL) memset(ptr, 0, nmemb*size); return ptr; } @@ -149,15 +163,15 @@ enum { DPSOFTRAST_OPCODE_Reset = 0 }; #define DPSOFTRAST_DRAW_MAXCOMMANDPOOL 2097152 #define DPSOFTRAST_DRAW_MAXCOMMANDSIZE 16384 -typedef ATOMIC(struct DPSOFTRAST_State_Command_Pool_s +typedef ALIGN(struct DPSOFTRAST_State_Command_Pool_s { int freecommand; int usedcommands; - ATOMIC(unsigned char commands[DPSOFTRAST_DRAW_MAXCOMMANDPOOL]); + ALIGN(unsigned char commands[DPSOFTRAST_DRAW_MAXCOMMANDPOOL]); } DPSOFTRAST_State_Command_Pool); -typedef ATOMIC(struct DPSOFTRAST_State_Triangle_s +typedef ALIGN(struct DPSOFTRAST_State_Triangle_s { unsigned char mip[DPSOFTRAST_MAXTEXTUREUNITS]; // texcoord to screen space density values (for picking mipmap of textures) float w[3]; @@ -222,7 +236,7 @@ typedef enum DPSOFTRAST_BLENDMODE_e } DPSOFTRAST_BLENDMODE; -typedef ATOMIC(struct DPSOFTRAST_State_Thread_s +typedef ALIGN(struct DPSOFTRAST_State_Thread_s { void *thread; int index; @@ -235,9 +249,6 @@ typedef ATOMIC(struct DPSOFTRAST_State_Thread_s int depthtest; int depthfunc; int scissortest; - int alphatest; - int alphafunc; - float alphavalue; int viewport[4]; int scissor[4]; float depthrange[2]; @@ -291,7 +302,7 @@ typedef ATOMIC(struct DPSOFTRAST_State_Thread_s } DPSOFTRAST_State_Thread); -typedef ATOMIC(struct DPSOFTRAST_State_s +typedef ALIGN(struct DPSOFTRAST_State_s { int fb_width; int fb_height; @@ -1154,30 +1165,6 @@ void DPSOFTRAST_CullFace(int mode) command->mode = mode; } -DEFCOMMAND(15, AlphaTest, int enable;) -static void DPSOFTRAST_Interpret_AlphaTest(DPSOFTRAST_State_Thread *thread, DPSOFTRAST_Command_AlphaTest *command) -{ - thread->alphatest = command->enable; -} -void DPSOFTRAST_AlphaTest(int enable) -{ - DPSOFTRAST_Command_AlphaTest *command = DPSOFTRAST_ALLOCATECOMMAND(AlphaTest); - command->enable = enable; -} - -DEFCOMMAND(16, AlphaFunc, int func; float ref;) -static void DPSOFTRAST_Interpret_AlphaFunc(DPSOFTRAST_State_Thread *thread, DPSOFTRAST_Command_AlphaFunc *command) -{ - thread->alphafunc = command->func; - thread->alphavalue = command->ref; -} -void DPSOFTRAST_AlphaFunc(int func, float ref) -{ - DPSOFTRAST_Command_AlphaFunc *command = DPSOFTRAST_ALLOCATECOMMAND(AlphaFunc); - command->func = func; - command->ref = ref; -} - void DPSOFTRAST_Color4f(float r, float g, float b, float a) { dpsoftrast.color[0] = r; @@ -2062,7 +2049,7 @@ void DPSOFTRAST_Draw_Span_FinishBGRA8(DPSOFTRAST_State_Thread *thread, const DPS pixel += (span->y * dpsoftrast.fb_width + span->x) * 4; pixeli += span->y * dpsoftrast.fb_width + span->x; // handle alphatest now (this affects depth writes too) - if (thread->alphatest) + if (thread->shader_permutation & SHADERPERMUTATION_ALPHAKILL) for (x = startx;x < endx;x++) if (in4ub[x*4+3] < 128) pixelmask[x] = false; @@ -3319,7 +3306,7 @@ void DPSOFTRAST_PixelShader_FlatColor(DPSOFTRAST_State_Thread *thread, const DPS unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_colorbgra8, GL20TU_COLOR, 2, buffer_z); - if (thread->alphatest || thread->fb_blendmode != DPSOFTRAST_BLENDMODE_OPAQUE) + if ((thread->shader_permutation & SHADERPERMUTATION_ALPHAKILL) || thread->fb_blendmode != DPSOFTRAST_BLENDMODE_OPAQUE) pixel = buffer_FragColorbgra8; Color_Ambientm = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(&thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4]), _mm_set1_ps(256.0f))), _MM_SHUFFLE(3, 0, 1, 2)); Color_Ambientm = _mm_and_si128(Color_Ambientm, _mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); @@ -3372,7 +3359,7 @@ void DPSOFTRAST_PixelShader_VertexColor(DPSOFTRAST_State_Thread *thread, const D int arrayindex = DPSOFTRAST_ARRAY_COLOR; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_colorbgra8, GL20TU_COLOR, 2, buffer_z); - if (thread->alphatest || thread->fb_blendmode != DPSOFTRAST_BLENDMODE_OPAQUE) + if ((thread->shader_permutation & SHADERPERMUTATION_ALPHAKILL) || thread->fb_blendmode != DPSOFTRAST_BLENDMODE_OPAQUE) pixel = buffer_FragColorbgra8; Color_Ambientm = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(&thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4]), _mm_set1_ps(256.0f))), _MM_SHUFFLE(3, 0, 1, 2)); Color_Ambientm = _mm_and_si128(Color_Ambientm, _mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); @@ -3447,7 +3434,7 @@ void DPSOFTRAST_PixelShader_Lightmap(DPSOFTRAST_State_Thread *thread, const DPSO DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_colorbgra8, GL20TU_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z); - if (thread->alphatest || thread->fb_blendmode != DPSOFTRAST_BLENDMODE_OPAQUE) + if ((thread->shader_permutation & SHADERPERMUTATION_ALPHAKILL) || thread->fb_blendmode != DPSOFTRAST_BLENDMODE_OPAQUE) pixel = buffer_FragColorbgra8; Color_Ambientm = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(&thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4]), _mm_set1_ps(256.0f))), _MM_SHUFFLE(3, 0, 1, 2)); Color_Ambientm = _mm_and_si128(Color_Ambientm, _mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); @@ -3870,8 +3857,8 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f; } + specular = pow(specular, 1.0f + SpecularPower * glosstex[3]); - specular = pow(specular, SpecularPower * glosstex[3]); if (thread->shader_permutation & SHADERPERMUTATION_GLOW) { d[0] = (int)(buffer_texture_glowbgra8[x*4+0] * Color_Glow[0] + diffusetex[0] * Color_Ambient[0] + (diffusetex[0] * Color_Diffuse[0] * diffuse + glosstex[0] * Color_Specular[0] * specular) * LightColor[0]);if (d[0] > 255) d[0] = 255; @@ -4279,7 +4266,7 @@ void DPSOFTRAST_PixelShader_LightSource(DPSOFTRAST_State_Thread *thread, const D specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f; } - specular = pow(specular, SpecularPower * glosstex[3]); + specular = pow(specular, 1.0f + SpecularPower * glosstex[3]); if (thread->shader_permutation & SHADERPERMUTATION_CUBEFILTER) { @@ -4579,9 +4566,9 @@ void DPSOFTRAST_PixelShader_Water(DPSOFTRAST_State_Thread *thread, const DPSOFTR float EyeVectorslope[4]; // uniforms - float ScreenScaleRefractReflect[2]; - float ScreenCenterRefractReflect[2]; - float DistortScaleRefractReflect[2]; + float ScreenScaleRefractReflect[4]; + float ScreenCenterRefractReflect[4]; + float DistortScaleRefractReflect[4]; float RefractColor[4]; float ReflectColor[4]; float ReflectFactor; @@ -4752,6 +4739,8 @@ static const DPSOFTRAST_ShaderModeInfo DPSOFTRAST_ShaderModeTable[SHADERMODE_COU {2, DPSOFTRAST_VertexShader_FakeLight, DPSOFTRAST_PixelShader_FakeLight, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}}, {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace, DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_LIGHTMAP, GL20TU_DELUXEMAP, ~0}}, {2, DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace, DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_LIGHTMAP, GL20TU_DELUXEMAP, ~0}}, + {2, DPSOFTRAST_VertexShader_Lightmap, DPSOFTRAST_PixelShader_Lightmap, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}}, + {2, DPSOFTRAST_VertexShader_VertexColor, DPSOFTRAST_PixelShader_VertexColor, {DPSOFTRAST_ARRAY_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, ~0}, {GL20TU_COLOR, ~0}}, {2, DPSOFTRAST_VertexShader_LightDirection, DPSOFTRAST_PixelShader_LightDirection, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}}, {2, DPSOFTRAST_VertexShader_LightSource, DPSOFTRAST_PixelShader_LightSource, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_CUBE, ~0}}, {2, DPSOFTRAST_VertexShader_Refraction, DPSOFTRAST_PixelShader_Refraction, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_NORMAL, GL20TU_REFRACTION, ~0}}, @@ -5472,8 +5461,6 @@ static void DPSOFTRAST_Draw_InterpretCommands(DPSOFTRAST_State_Thread *thread, i INTERPCOMMAND(DepthRange) INTERPCOMMAND(PolygonOffset) INTERPCOMMAND(CullFace) - INTERPCOMMAND(AlphaTest) - INTERPCOMMAND(AlphaFunc) INTERPCOMMAND(SetTexture) INTERPCOMMAND(SetShader) INTERPCOMMAND(Uniform4f) @@ -5629,9 +5616,6 @@ int DPSOFTRAST_Init(int width, int height, int numthreads, int interlace, unsign thread->depthtest = true; thread->depthfunc = GL_LEQUAL; thread->scissortest = false; - thread->alphatest = false; - thread->alphafunc = GL_GREATER; - thread->alphavalue = 0.5f; thread->viewport[0] = 0; thread->viewport[1] = 0; thread->viewport[2] = dpsoftrast.fb_width;