X-Git-Url: http://git.xonotic.org/?a=blobdiff_plain;f=dpsoftrast.c;h=e8eb95706a1fbf7fd082d5fc9473d2f628399847;hb=1ce5b955cc1c29b384cc48834ae2b567bcc877fb;hp=9598c63f2a59b4044bfac6f4d9dd8b36be0135ed;hpb=503f6fef27f5b87b9d71aa290f5a0890b159ff15;p=xonotic%2Fdarkplaces.git diff --git a/dpsoftrast.c b/dpsoftrast.c index 9598c63f..e8eb9570 100644 --- a/dpsoftrast.c +++ b/dpsoftrast.c @@ -1246,8 +1246,9 @@ void DPSOFTRAST_CopyRectangleToTexture(int index, int mip, int tx, int ty, int s if (th > sh) th = sh; if (tw < 1 || th < 1) return; + sy1 = sheight - 1 - sy1; for (y = 0;y < th;y++) - memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 + y) * swidth + sx1), tw*4); + memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 - y) * swidth + sx1), tw*4); if (texture->mipmaps > 1) DPSOFTRAST_Texture_CalculateMipmaps(index); } @@ -2992,19 +2993,19 @@ void DPSOFTRAST_Draw_Span_AddBloomBGRA8(const DPSOFTRAST_State_Triangle * RESTRI #ifdef SSE2_PRESENT int x, startx = span->startx, endx = span->endx; __m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(subcolor), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)); - localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0)); + localcolor = _mm_packs_epi32(localcolor, localcolor); for (x = startx;x+2 <= endx;x+=2) { __m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128()); __m128i pix2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&inb4ub[x*4]), _mm_setzero_si128()); - pix1 = _mm_add_epi16(pix1, _mm_sub_epi16(pix2, localcolor)); + pix1 = _mm_add_epi16(pix1, _mm_subs_epu16(pix2, localcolor)); _mm_storel_epi64((__m128i *)&out4ub[x*4], _mm_packus_epi16(pix1, pix1)); } if (x < endx) { __m128i pix1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int *)&ina4ub[x*4]), _mm_setzero_si128()); __m128i pix2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int *)&inb4ub[x*4]), _mm_setzero_si128()); - pix1 = _mm_add_epi16(pix1, _mm_sub_epi16(pix2, localcolor)); + pix1 = _mm_add_epi16(pix1, _mm_subs_epu16(pix2, localcolor)); *(int *)&out4ub[x*4] = _mm_cvtsi128_si32(_mm_packus_epi16(pix1, pix1)); } #endif @@ -3057,7 +3058,7 @@ void DPSOFTRAST_Draw_Span_TintedAddBuffersBGRA8(const DPSOFTRAST_State_Triangle #ifdef SSE2_PRESENT int x, startx = span->startx, endx = span->endx; __m128i tint = _mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(inbtintbgra), _mm_set1_ps(256.0f))); - tint = _mm_shuffle_epi32(_mm_packs_epi32(tint, tint), _MM_SHUFFLE(1, 0, 1, 0)); + tint = _mm_packs_epi32(tint, tint); for (x = startx;x+2 <= endx;x+=2) { __m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128()); @@ -3103,7 +3104,7 @@ void DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(const DPSOFTRAST_State_Triangle * #ifdef SSE2_PRESENT int x, startx = span->startx, endx = span->endx; __m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(color), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)), blend; - localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0)); + localcolor = _mm_packs_epi32(localcolor, localcolor); blend = _mm_slli_epi16(_mm_shufflehi_epi16(_mm_shufflelo_epi16(localcolor, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)), 4); for (x = startx;x+2 <= endx;x+=2) { @@ -3173,7 +3174,7 @@ void DPSOFTRAST_VertexShader_PostProcess(void) { DPSOFTRAST_Array_TransformProject(DPSOFTRAST_ARRAY_POSITION, DPSOFTRAST_ARRAY_POSITION, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1); DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD0); - DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD1); + DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD4); } void DPSOFTRAST_PixelShader_PostProcess(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span) @@ -3461,15 +3462,17 @@ void DPSOFTRAST_PixelShader_FakeLight(DPSOFTRAST_State_Thread *thread, const DPS +void DPSOFTRAST_VertexShader_LightDirection(void); void DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace(void) { - DPSOFTRAST_VertexShader_Lightmap(); + DPSOFTRAST_VertexShader_LightDirection(); + DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD4); } +void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span); void DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span) { - DPSOFTRAST_PixelShader_Lightmap(thread, triangle, span); - // TODO: IMPLEMENT + DPSOFTRAST_PixelShader_LightDirection(thread, triangle, span); } @@ -3530,20 +3533,20 @@ void DPSOFTRAST_VertexShader_LightDirection(void) LightVector[0] = svector[0] * LightDir[0] + svector[1] * LightDir[1] + svector[2] * LightDir[2]; LightVector[1] = tvector[0] * LightDir[0] + tvector[1] * LightDir[1] + tvector[2] * LightDir[2]; LightVector[2] = normal[0] * LightDir[0] + normal[1] * LightDir[1] + normal[2] * LightDir[2]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0] = LightVector[0]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1] = LightVector[1]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2] = LightVector[2]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+3] = 0.0f; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+0] = LightVector[0]; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+1] = LightVector[1]; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+2] = LightVector[2]; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+3] = 0.0f; EyeVectorModelSpace[0] = EyePosition[0] - position[0]; EyeVectorModelSpace[1] = EyePosition[1] - position[1]; EyeVectorModelSpace[2] = EyePosition[2] - position[2]; EyeVector[0] = svector[0] * EyeVectorModelSpace[0] + svector[1] * EyeVectorModelSpace[1] + svector[2] * EyeVectorModelSpace[2]; EyeVector[1] = tvector[0] * EyeVectorModelSpace[0] + tvector[1] * EyeVectorModelSpace[1] + tvector[2] * EyeVectorModelSpace[2]; EyeVector[2] = normal[0] * EyeVectorModelSpace[0] + normal[1] * EyeVectorModelSpace[1] + normal[2] * EyeVectorModelSpace[2]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0] = EyeVector[0]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1] = EyeVector[1]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2] = EyeVector[2]; - dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+3] = 0.0f; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+0] = EyeVector[0]; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+1] = EyeVector[1]; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+2] = EyeVector[2]; + dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+3] = 0.0f; } DPSOFTRAST_Array_TransformProject(DPSOFTRAST_ARRAY_POSITION, -1, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1); } @@ -3576,6 +3579,8 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons unsigned char buffer_texture_glowbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; unsigned char buffer_texture_pantsbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; unsigned char buffer_texture_shirtbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; + unsigned char buffer_texture_deluxemapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; + unsigned char buffer_texture_lightmapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; int x, startx = span->startx, endx = span->endx; float Color_Ambient[4], Color_Diffuse[4], Color_Specular[4], Color_Glow[4], Color_Pants[4], Color_Shirt[4], LightColor[4]; @@ -3583,11 +3588,18 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons float LightVectorslope[4]; float EyeVectordata[4]; float EyeVectorslope[4]; + float VectorSdata[4]; + float VectorSslope[4]; + float VectorTdata[4]; + float VectorTslope[4]; + float VectorRdata[4]; + float VectorRslope[4]; float z; float diffusetex[4]; float glosstex[4]; float surfacenormal[4]; float lightnormal[4]; + float lightnormal_modelspace[4]; float eyenormal[4]; float specularnormal[4]; float diffuse; @@ -3631,15 +3643,25 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons LightColor[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1]; LightColor[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2]; LightColor[3] = 0.0f; - DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD1); + DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD5); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z); Color_Specular[2] = thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+0]; Color_Specular[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+1]; Color_Specular[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+2]; Color_Specular[3] = 0.0f; SpecularPower = thread->uniform4f[DPSOFTRAST_UNIFORM_SpecularPower*4+0] * (1.0f / 255.0f); - DPSOFTRAST_CALCATTRIB4F(triangle, span, EyeVectordata, EyeVectorslope, DPSOFTRAST_ARRAY_TEXCOORD2); + DPSOFTRAST_CALCATTRIB4F(triangle, span, EyeVectordata, EyeVectorslope, DPSOFTRAST_ARRAY_TEXCOORD6); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_glossbgra8, GL20TU_GLOSS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z); + + if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE) + { + DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorSdata, VectorSslope, DPSOFTRAST_ARRAY_TEXCOORD1); + DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorTdata, VectorTslope, DPSOFTRAST_ARRAY_TEXCOORD2); + DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorRdata, VectorRslope, DPSOFTRAST_ARRAY_TEXCOORD3); + DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z); + DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_deluxemapbgra8, GL20TU_DELUXEMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z); + } + for (x = startx;x < endx;x++) { z = buffer_z[x]; @@ -3663,10 +3685,46 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f; DPSOFTRAST_Vector3Normalize(surfacenormal); - lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z; - lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z; - lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z; - DPSOFTRAST_Vector3Normalize(lightnormal); + if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE) + { + // myhalf3 lightnormal_modelspace = myhalf3(dp_texture2D(Texture_Deluxemap, TexCoordSurfaceLightmap.zw)) * 2.0 + myhalf3(-1.0, -1.0, -1.0);\n"; + lightnormal_modelspace[0] = buffer_texture_deluxemapbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f; + lightnormal_modelspace[1] = buffer_texture_deluxemapbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f; + lightnormal_modelspace[2] = buffer_texture_deluxemapbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f; + + // lightnormal.x = dot(lightnormal_modelspace, myhalf3(VectorS));\n" + lightnormal[0] = lightnormal_modelspace[0] * (VectorSdata[0] + VectorSslope[0] * x) + + lightnormal_modelspace[1] * (VectorSdata[1] + VectorSslope[1] * x) + + lightnormal_modelspace[2] * (VectorSdata[2] + VectorSslope[2] * x); + + // lightnormal.y = dot(lightnormal_modelspace, myhalf3(VectorT));\n" + lightnormal[1] = lightnormal_modelspace[0] * (VectorTdata[0] + VectorTslope[0] * x) + + lightnormal_modelspace[1] * (VectorTdata[1] + VectorTslope[1] * x) + + lightnormal_modelspace[2] * (VectorTdata[2] + VectorTslope[2] * x); + + // lightnormal.z = dot(lightnormal_modelspace, myhalf3(VectorR));\n" + lightnormal[2] = lightnormal_modelspace[0] * (VectorRdata[0] + VectorRslope[0] * x) + + lightnormal_modelspace[1] * (VectorRdata[1] + VectorRslope[1] * x) + + lightnormal_modelspace[2] * (VectorRdata[2] + VectorRslope[2] * x); + + // lightnormal = normalize(lightnormal); // VectorS/T/R are not always perfectly normalized, and EXACTSPECULARMATH is very picky about this\n" + DPSOFTRAST_Vector3Normalize(lightnormal); + + // myhalf3 lightcolor = myhalf3(dp_texture2D(Texture_Lightmap, TexCoordSurfaceLightmap.zw));\n"; + { + float f = 1.0f / (256.0f * max(0.25f, lightnormal[2])); + LightColor[0] = buffer_texture_lightmapbgra8[x*4+0] * f; + LightColor[1] = buffer_texture_lightmapbgra8[x*4+1] * f; + LightColor[2] = buffer_texture_lightmapbgra8[x*4+2] * f; + } + } + else + { + lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z; + lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z; + lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z; + DPSOFTRAST_Vector3Normalize(lightnormal); + } eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z; eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z; @@ -3695,6 +3753,7 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons d[2] = (int)( diffusetex[2] * Color_Ambient[2] + (diffusetex[2] * Color_Diffuse[2] * diffuse + glosstex[2] * Color_Specular[2] * specular) * LightColor[2]);if (d[2] > 255) d[2] = 255; d[3] = (int)( diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255; } + buffer_FragColorbgra8[x*4+0] = d[0]; buffer_FragColorbgra8[x*4+1] = d[1]; buffer_FragColorbgra8[x*4+2] = d[2]; @@ -3711,8 +3770,18 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons LightColor[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1]; LightColor[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2]; LightColor[3] = 0.0f; - DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD1); + DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD5); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z); + + if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE) + { + DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorSdata, VectorSslope, DPSOFTRAST_ARRAY_TEXCOORD1); + DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorTdata, VectorTslope, DPSOFTRAST_ARRAY_TEXCOORD2); + DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorRdata, VectorRslope, DPSOFTRAST_ARRAY_TEXCOORD3); + DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z); + DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_deluxemapbgra8, GL20TU_DELUXEMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z); + } + for (x = startx;x < endx;x++) { z = buffer_z[x]; @@ -3725,10 +3794,46 @@ void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, cons surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f; DPSOFTRAST_Vector3Normalize(surfacenormal); - lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z; - lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z; - lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z; - DPSOFTRAST_Vector3Normalize(lightnormal); + if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE) + { + // myhalf3 lightnormal_modelspace = myhalf3(dp_texture2D(Texture_Deluxemap, TexCoordSurfaceLightmap.zw)) * 2.0 + myhalf3(-1.0, -1.0, -1.0);\n"; + lightnormal_modelspace[0] = buffer_texture_deluxemapbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f; + lightnormal_modelspace[1] = buffer_texture_deluxemapbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f; + lightnormal_modelspace[2] = buffer_texture_deluxemapbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f; + + // lightnormal.x = dot(lightnormal_modelspace, myhalf3(VectorS));\n" + lightnormal[0] = lightnormal_modelspace[0] * (VectorSdata[0] + VectorSslope[0] * x) + + lightnormal_modelspace[1] * (VectorSdata[1] + VectorSslope[1] * x) + + lightnormal_modelspace[2] * (VectorSdata[2] + VectorSslope[2] * x); + + // lightnormal.y = dot(lightnormal_modelspace, myhalf3(VectorT));\n" + lightnormal[1] = lightnormal_modelspace[0] * (VectorTdata[0] + VectorTslope[0] * x) + + lightnormal_modelspace[1] * (VectorTdata[1] + VectorTslope[1] * x) + + lightnormal_modelspace[2] * (VectorTdata[2] + VectorTslope[2] * x); + + // lightnormal.z = dot(lightnormal_modelspace, myhalf3(VectorR));\n" + lightnormal[2] = lightnormal_modelspace[0] * (VectorRdata[0] + VectorRslope[0] * x) + + lightnormal_modelspace[1] * (VectorRdata[1] + VectorRslope[1] * x) + + lightnormal_modelspace[2] * (VectorRdata[2] + VectorRslope[2] * x); + + // lightnormal = normalize(lightnormal); // VectorS/T/R are not always perfectly normalized, and EXACTSPECULARMATH is very picky about this\n" + DPSOFTRAST_Vector3Normalize(lightnormal); + + // myhalf3 lightcolor = myhalf3(dp_texture2D(Texture_Lightmap, TexCoordSurfaceLightmap.zw));\n"; + { + float f = 1.0f / (256.0f * max(0.25f, lightnormal[2])); + LightColor[0] = buffer_texture_lightmapbgra8[x*4+0] * f; + LightColor[1] = buffer_texture_lightmapbgra8[x*4+1] * f; + LightColor[2] = buffer_texture_lightmapbgra8[x*4+2] * f; + } + } + else + { + lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z; + lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z; + lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z; + DPSOFTRAST_Vector3Normalize(lightnormal); + } diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f; if (thread->shader_permutation & SHADERPERMUTATION_GLOW) @@ -4232,9 +4337,9 @@ static const DPSOFTRAST_ShaderModeInfo DPSOFTRAST_ShaderModeTable[SHADERMODE_COU {2, DPSOFTRAST_VertexShader_VertexColor, DPSOFTRAST_PixelShader_VertexColor, {DPSOFTRAST_ARRAY_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, ~0}, {GL20TU_COLOR, ~0}}, {2, DPSOFTRAST_VertexShader_Lightmap, DPSOFTRAST_PixelShader_Lightmap, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}}, {2, DPSOFTRAST_VertexShader_FakeLight, DPSOFTRAST_PixelShader_FakeLight, {~0}, {~0}}, - {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace, DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}}, + {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace, DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_LIGHTMAP, GL20TU_DELUXEMAP, ~0}}, {2, DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace, DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}}, - {2, DPSOFTRAST_VertexShader_LightDirection, DPSOFTRAST_PixelShader_LightDirection, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}}, + {2, DPSOFTRAST_VertexShader_LightDirection, DPSOFTRAST_PixelShader_LightDirection, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}}, {2, DPSOFTRAST_VertexShader_LightSource, DPSOFTRAST_PixelShader_LightSource, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_CUBE, ~0}}, {2, DPSOFTRAST_VertexShader_Refraction, DPSOFTRAST_PixelShader_Refraction, {~0}}, {2, DPSOFTRAST_VertexShader_Water, DPSOFTRAST_PixelShader_Water, {~0}}, @@ -4587,9 +4692,9 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS attribxslope = _mm_sub_ps(_mm_mul_ps(attribuxslope, attribedge1), _mm_mul_ps(attribvxslope, attribedge2)); attribyslope = _mm_sub_ps(_mm_mul_ps(attribvyslope, attribedge2), _mm_mul_ps(attribuyslope, attribedge1)); attriborigin = _mm_sub_ps(attriborigin, _mm_add_ps(_mm_mul_ps(attribxslope, x1), _mm_mul_ps(attribyslope, y1))); - _mm_stream_ps(triangle->attribs[k][0], attribxslope); - _mm_stream_ps(triangle->attribs[k][1], attribyslope); - _mm_stream_ps(triangle->attribs[k][2], attriborigin); + _mm_storeu_ps(triangle->attribs[k][0], attribxslope); + _mm_storeu_ps(triangle->attribs[k][1], attribyslope); + _mm_storeu_ps(triangle->attribs[k][2], attriborigin); if (k == DPSOFTRAST_ShaderModeTable[thread->shader_mode].lodarrayindex) { mipedgescale = _mm_movelh_ps(triangleedge1, triangleedge2);