#include "thread.h"
#include "dpsoftrast.h"
+#ifdef _MSC_VER
+#pragma warning(disable : 4324)
+#endif
+
#ifndef __cplusplus
typedef qboolean bool;
#endif
#define ALIGN_SIZE 16
#define ATOMIC_SIZE 32
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
#if defined(__APPLE__)
#include <libkern/OSAtomic.h>
#define ALIGN(var) var __attribute__((__aligned__(16)))
#define ATOMIC_ADD(counter, val) ((void)((counter) += (val)))
#endif
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
#include <emmintrin.h>
#define MM_MALLOC(size) _mm_malloc(size, ATOMIC_SIZE)
int shader_mode;
int shader_permutation;
+ int shader_exactspecularmath;
DPSOFTRAST_Texture *texbound[DPSOFTRAST_MAXTEXTUREUNITS];
int shader_mode;
int shader_permutation;
+ int shader_exactspecularmath;
int texture_max;
int texture_end;
fb_viewportscale[0] = 1.0f;
}
+static void DPSOFTRAST_RecalcThread(DPSOFTRAST_State_Thread *thread)
+{
+ if (dpsoftrast.interlace)
+ {
+ thread->miny1 = (thread->index*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
+ thread->maxy1 = ((thread->index+1)*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
+ thread->miny2 = ((dpsoftrast.numthreads+thread->index)*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
+ thread->maxy2 = ((dpsoftrast.numthreads+thread->index+1)*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
+ }
+ else
+ {
+ thread->miny1 = thread->miny2 = (thread->index*dpsoftrast.fb_height)/dpsoftrast.numthreads;
+ thread->maxy1 = thread->maxy2 = ((thread->index+1)*dpsoftrast.fb_height)/dpsoftrast.numthreads;
+ }
+}
+
static void DPSOFTRAST_RecalcFB(DPSOFTRAST_State_Thread *thread)
{
// calculate framebuffer scissor, viewport, viewport clipped by scissor,
thread->fb_scissor[3] = y2 - y1;
DPSOFTRAST_RecalcViewport(thread->viewport, thread->fb_viewportcenter, thread->fb_viewportscale);
+ DPSOFTRAST_RecalcThread(thread);
}
static void DPSOFTRAST_RecalcDepthFunc(DPSOFTRAST_State_Thread *thread)
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
if (texture->binds)
DPSOFTRAST_Flush();
- dst = texture->bytes + (blocky * texture->mipmap[0][2] + blockx) * 4;
- while (blockheight > 0)
+ if (pixels)
{
- memcpy(dst, pixels, blockwidth * 4);
- pixels += blockwidth * 4;
- dst += texture->mipmap[0][2] * 4;
- blockheight--;
+ dst = texture->bytes + (blocky * texture->mipmap[0][2] + blockx) * 4;
+ while (blockheight > 0)
+ {
+ memcpy(dst, pixels, blockwidth * 4);
+ pixels += blockwidth * 4;
+ dst += texture->mipmap[0][2] * 4;
+ blockheight--;
+ }
}
DPSOFTRAST_Texture_CalculateMipmaps(index);
}
texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return;
if (texture->binds)
DPSOFTRAST_Flush();
- memcpy(texture->bytes, pixels, texture->mipmap[0][1]);
+ if (pixels)
+ memcpy(texture->bytes, pixels, texture->mipmap[0][1]);
DPSOFTRAST_Texture_CalculateMipmaps(index);
}
int DPSOFTRAST_Texture_GetWidth(int index, int mip)
texture->filter = filter;
}
-void DPSOFTRAST_SetRenderTargets(int width, int height, unsigned int *depthpixels, unsigned int *colorpixels0, unsigned int *colorpixels1, unsigned int *colorpixels2, unsigned int *colorpixels3)
-{
- if (width != dpsoftrast.fb_width || height != dpsoftrast.fb_height || depthpixels != dpsoftrast.fb_depthpixels ||
- colorpixels0 != dpsoftrast.fb_colorpixels[0] || colorpixels1 != dpsoftrast.fb_colorpixels[1] ||
- colorpixels2 != dpsoftrast.fb_colorpixels[2] || colorpixels3 != dpsoftrast.fb_colorpixels[3])
- DPSOFTRAST_Flush();
- dpsoftrast.fb_width = width;
- dpsoftrast.fb_height = height;
- dpsoftrast.fb_depthpixels = depthpixels;
- dpsoftrast.fb_colorpixels[0] = colorpixels0;
- dpsoftrast.fb_colorpixels[1] = colorpixels1;
- dpsoftrast.fb_colorpixels[2] = colorpixels2;
- dpsoftrast.fb_colorpixels[3] = colorpixels3;
-}
-
static void DPSOFTRAST_Draw_FlushThreads(void);
static void DPSOFTRAST_Draw_SyncCommands(void)
static void DPSOFTRAST_Interpret_ClearColor(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_Command_ClearColor *command)
{
int i, x1, y1, x2, y2, w, h, x, y;
- int miny1 = thread->miny1;
- int maxy1 = thread->maxy1;
- int miny2 = thread->miny2;
- int maxy2 = thread->maxy2;
+ int miny1, maxy1, miny2, maxy2;
int bandy;
unsigned int *p;
unsigned int c;
DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB);
+ miny1 = thread->miny1;
+ maxy1 = thread->maxy1;
+ miny2 = thread->miny2;
+ maxy2 = thread->maxy2;
x1 = thread->fb_scissor[0];
y1 = thread->fb_scissor[1];
x2 = thread->fb_scissor[0] + thread->fb_scissor[2];
static void DPSOFTRAST_Interpret_ClearDepth(DPSOFTRAST_State_Thread *thread, DPSOFTRAST_Command_ClearDepth *command)
{
int x1, y1, x2, y2, w, h, x, y;
- int miny1 = thread->miny1;
- int maxy1 = thread->maxy1;
- int miny2 = thread->miny2;
- int maxy2 = thread->maxy2;
+ int miny1, maxy1, miny2, maxy2;
int bandy;
unsigned int *p;
unsigned int c;
DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB);
+ miny1 = thread->miny1;
+ maxy1 = thread->maxy1;
+ miny2 = thread->miny2;
+ maxy2 = thread->maxy2;
x1 = thread->fb_scissor[0];
y1 = thread->fb_scissor[1];
x2 = thread->fb_scissor[0] + thread->fb_scissor[2];
if (th > sh) th = sh;
if (tw < 1 || th < 1)
return;
+ sy1 = sheight - 1 - sy1;
for (y = 0;y < th;y++)
- memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 + y) * swidth + sx1), tw*4);
+ memcpy(tpixels + ((ty1 + y) * twidth + tx1), spixels + ((sy1 - y) * swidth + sx1), tw*4);
if (texture->mipmaps > 1)
DPSOFTRAST_Texture_CalculateMipmaps(index);
}
dpsoftrast.stride_texcoord[unitnum] = stride;
}
-DEFCOMMAND(18, SetShader, int mode; int permutation;)
+DEFCOMMAND(18, SetShader, int mode; int permutation; int exactspecularmath;)
static void DPSOFTRAST_Interpret_SetShader(DPSOFTRAST_State_Thread *thread, DPSOFTRAST_Command_SetShader *command)
{
thread->shader_mode = command->mode;
thread->shader_permutation = command->permutation;
+ thread->shader_exactspecularmath = command->exactspecularmath;
}
-void DPSOFTRAST_SetShader(int mode, int permutation)
+void DPSOFTRAST_SetShader(int mode, int permutation, int exactspecularmath)
{
DPSOFTRAST_Command_SetShader *command = DPSOFTRAST_ALLOCATECOMMAND(SetShader);
command->mode = mode;
command->permutation = permutation;
+ command->exactspecularmath = exactspecularmath;
dpsoftrast.shader_mode = mode;
dpsoftrast.shader_permutation = permutation;
+ dpsoftrast.shader_exactspecularmath = exactspecularmath;
}
DEFCOMMAND(19, Uniform4f, DPSOFTRAST_UNIFORM index; float val[4];)
}
void DPSOFTRAST_UniformMatrix4fv(DPSOFTRAST_UNIFORM uniform, int arraysize, int transpose, const float *v)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int i, index;
for (i = 0, index = (int)uniform;i < arraysize;i++, index += 4, v += 16)
{
dpsoftrast.uniform1i[command->index] = i0;
}
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
static void DPSOFTRAST_Load4fTo4f(float *dst, const unsigned char *src, int size, int stride)
{
float *end = dst + size*4;
void DPSOFTRAST_Vertex_Transform(float *out4f, const float *in4f, int numitems, const float *inmatrix16f)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
static const float identitymatrix[4][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}};
__m128 m0, m1, m2, m3;
float *end;
memcpy(out4f, in4f, numitems * sizeof(float[4]));
}
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
#define DPSOFTRAST_PROJECTVERTEX(out, in, viewportcenter, viewportscale) \
{ \
__m128 p = (in), w = _mm_shuffle_ps(p, p, _MM_SHUFFLE(3, 3, 3, 3)); \
_mm_mul_ps(_mm_shuffle_ps(p, p, _MM_SHUFFLE(3, 3, 3, 3)), m3)))); \
}
-static int DPSOFTRAST_Vertex_BoundY(int *starty, int *endy, __m128 minpos, __m128 maxpos, __m128 viewportcenter, __m128 viewportscale, __m128 m0, __m128 m1, __m128 m2, __m128 m3)
+static int DPSOFTRAST_Vertex_BoundY(int *starty, int *endy, const float *minposf, const float *maxposf, const float *inmatrix16f)
{
int clipmask = 0xFF;
+ __m128 viewportcenter = _mm_load_ps(dpsoftrast.fb_viewportcenter), viewportscale = _mm_load_ps(dpsoftrast.fb_viewportscale);
__m128 bb[8], clipdist[8], minproj = _mm_set_ss(2.0f), maxproj = _mm_set_ss(-2.0f);
+ __m128 m0 = _mm_loadu_ps(inmatrix16f), m1 = _mm_loadu_ps(inmatrix16f + 4), m2 = _mm_loadu_ps(inmatrix16f + 8), m3 = _mm_loadu_ps(inmatrix16f + 12);
+ __m128 minpos = _mm_load_ps(minposf), maxpos = _mm_load_ps(maxposf);
m0 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(3, 2, 0, 1));
m1 = _mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3, 2, 0, 1));
m2 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3, 2, 0, 1));
static int DPSOFTRAST_Vertex_Project(float *out4f, float *screen4f, int *starty, int *endy, const float *in4f, int numitems)
{
+ static const float identitymatrix[16] = {1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1};
float *end = out4f + numitems*4;
__m128 viewportcenter = _mm_load_ps(dpsoftrast.fb_viewportcenter), viewportscale = _mm_load_ps(dpsoftrast.fb_viewportscale);
__m128 minpos, maxpos;
}
}
if (starty && endy)
- return DPSOFTRAST_Vertex_BoundY(starty, endy, minpos, maxpos, viewportcenter, viewportscale,
- _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f),
- _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f),
- _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f),
- _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));
+ {
+ ALIGN(float minposf[4]);
+ ALIGN(float maxposf[4]);
+ _mm_store_ps(minposf, minpos);
+ _mm_store_ps(maxposf, maxpos);
+ return DPSOFTRAST_Vertex_BoundY(starty, endy, minposf, maxposf, identitymatrix);
+ }
return 0;
}
static int DPSOFTRAST_Vertex_TransformProject(float *out4f, float *screen4f, int *starty, int *endy, const float *in4f, int numitems, const float *inmatrix16f)
{
- static const float identitymatrix[4][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}};
+ static const float identitymatrix[16] = {1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1};
__m128 m0, m1, m2, m3, viewportcenter, viewportscale, minpos, maxpos;
float *end;
if (!memcmp(identitymatrix, inmatrix16f, sizeof(float[16])))
}
}
if (starty && endy)
- return DPSOFTRAST_Vertex_BoundY(starty, endy, minpos, maxpos, viewportcenter, viewportscale, m0, m1, m2, m3);
+ {
+ ALIGN(float minposf[4]);
+ ALIGN(float maxposf[4]);
+ _mm_store_ps(minposf, minpos);
+ _mm_store_ps(maxposf, maxpos);
+ return DPSOFTRAST_Vertex_BoundY(starty, endy, minposf, maxposf, inmatrix16f);
+ }
return 0;
}
#endif
static float *DPSOFTRAST_Array_Load(int outarray, int inarray)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
float *outf = dpsoftrast.post_array4f[outarray];
const unsigned char *inb;
int firstvertex = dpsoftrast.firstvertex;
#if 0
static float *DPSOFTRAST_Array_Project(int outarray, int inarray)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray];
dpsoftrast.drawclipped = DPSOFTRAST_Vertex_Project(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices);
return data;
static float *DPSOFTRAST_Array_TransformProject(int outarray, int inarray, const float *inmatrix16f)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray];
dpsoftrast.drawclipped = DPSOFTRAST_Vertex_TransformProject(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices, inmatrix16f);
return data;
void DPSOFTRAST_Draw_Span_FinishBGRA8(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, const unsigned char* RESTRICT in4ub)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
tcimax[1] = texture->mipmap[mip][3]-1;
tciwrapmask[0] = texture->mipmap[mip][2]-1;
tciwrapmask[1] = texture->mipmap[mip][3]-1;
- endtc[0] = (data[0] + slope[0]*startx) * zf[startx] * tcscale[0] - 0.5f;
- endtc[1] = (data[1] + slope[1]*startx) * zf[startx] * tcscale[1] - 0.5f;
+ endtc[0] = (data[0] + slope[0]*startx) * zf[startx] * tcscale[0];
+ endtc[1] = (data[1] + slope[1]*startx) * zf[startx] * tcscale[1];
+ if (filter)
+ {
+ endtc[0] -= 0.5f;
+ endtc[1] -= 0.5f;
+ }
for (x = startx;x < endx;)
{
unsigned int subtc[2];
unsigned int substep[2];
- float subscale = 65536.0f/DPSOFTRAST_DRAW_MAXSUBSPAN;
+ float subscale = 4096.0f/DPSOFTRAST_DRAW_MAXSUBSPAN;
int nextsub = x + DPSOFTRAST_DRAW_MAXSUBSPAN, endsub = nextsub - 1;
if (nextsub >= endx)
{
nextsub = endsub = endx-1;
- if (x < nextsub) subscale = 65536.0f / (nextsub - x);
+ if (x < nextsub) subscale = 4096.0f / (nextsub - x);
}
tc[0] = endtc[0];
tc[1] = endtc[1];
- endtc[0] = (data[0] + slope[0]*nextsub) * zf[nextsub] * tcscale[0] - 0.5f;
- endtc[1] = (data[1] + slope[1]*nextsub) * zf[nextsub] * tcscale[1] - 0.5f;
+ endtc[0] = (data[0] + slope[0]*nextsub) * zf[nextsub] * tcscale[0];
+ endtc[1] = (data[1] + slope[1]*nextsub) * zf[nextsub] * tcscale[1];
+ if (filter)
+ {
+ endtc[0] -= 0.5f;
+ endtc[1] -= 0.5f;
+ }
substep[0] = (endtc[0] - tc[0]) * subscale;
substep[1] = (endtc[1] - tc[1]) * subscale;
- subtc[0] = tc[0] * (1<<16);
- subtc[1] = tc[1] * (1<<16);
+ subtc[0] = tc[0] * (1<<12);
+ subtc[1] = tc[1] * (1<<12);
if (filter)
{
if (flags & DPSOFTRAST_TEXTURE_FLAG_CLAMPTOEDGE)
unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
- tci[0] = subtc[0]>>16;
- tci[1] = subtc[1]>>16;
+ tci[0] = subtc[0]>>12;
+ tci[1] = subtc[1]>>12;
tci1[0] = tci[0] + 1;
tci1[1] = tci[1] + 1;
tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
unsigned int frac[2] = { subtc[0]&0xFFF, subtc[1]&0xFFF };
unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
- tci[0] = subtc[0]>>16;
- tci[1] = subtc[1]>>16;
+ tci[0] = subtc[0]>>12;
+ tci[1] = subtc[1]>>12;
tci1[0] = tci[0] + 1;
tci1[1] = tci[1] + 1;
tci[0] &= tciwrapmask[0];
{
for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
{
- tci[0] = subtc[0]>>16;
- tci[1] = subtc[1]>>16;
+ tci[0] = subtc[0]>>12;
+ tci[1] = subtc[1]>>12;
tci[0] = tci[0] >= tcimin[0] ? (tci[0] <= tcimax[0] ? tci[0] : tcimax[0]) : tcimin[0];
tci[1] = tci[1] >= tcimin[1] ? (tci[1] <= tcimax[1] ? tci[1] : tcimax[1]) : tcimin[1];
pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
{
for (; x <= endsub; x++, subtc[0] += substep[0], subtc[1] += substep[1])
{
- tci[0] = subtc[0]>>16;
- tci[1] = subtc[1]>>16;
+ tci[0] = subtc[0]>>12;
+ tci[1] = subtc[1]>>12;
tci[0] &= tciwrapmask[0];
tci[1] &= tciwrapmask[1];
pixel[0] = pixelbase + 4 * (tci[1]*tciwidth+tci[0]);
void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
tcscale = _mm_cvtepi32_ps(tcsize);
data = _mm_mul_ps(_mm_movelh_ps(data, data), tcscale);
slope = _mm_mul_ps(_mm_movelh_ps(slope, slope), tcscale);
- endtc = _mm_sub_ps(_mm_mul_ps(_mm_add_ps(data, _mm_mul_ps(slope, _mm_set1_ps(startx))), _mm_load1_ps(&zf[startx])), _mm_set1_ps(0.5f));
+ endtc = _mm_mul_ps(_mm_add_ps(data, _mm_mul_ps(slope, _mm_set1_ps(startx))), _mm_load1_ps(&zf[startx]));
+ if (filter)
+ endtc = _mm_sub_ps(endtc, _mm_set1_ps(0.5f));
endsubtc = _mm_cvtps_epi32(_mm_mul_ps(endtc, _mm_set1_ps(65536.0f)));
tcoffset = _mm_add_epi32(_mm_slli_epi32(_mm_shuffle_epi32(tcsize, _MM_SHUFFLE(0, 0, 0, 0)), 18), _mm_set1_epi32(4));
tcmax = _mm_packs_epi32(tcmask, tcmask);
}
tc = endtc;
subtc = endsubtc;
- endtc = _mm_sub_ps(_mm_mul_ps(_mm_add_ps(data, _mm_mul_ps(slope, _mm_set1_ps(nextsub))), _mm_load1_ps(&zf[nextsub])), _mm_set1_ps(0.5f));
+ endtc = _mm_mul_ps(_mm_add_ps(data, _mm_mul_ps(slope, _mm_set1_ps(nextsub))), _mm_load1_ps(&zf[nextsub]));
+ if (filter)
+ endtc = _mm_sub_ps(endtc, _mm_set1_ps(0.5f));
substep = _mm_cvtps_epi32(_mm_mul_ps(_mm_sub_ps(endtc, tc), subscale));
endsubtc = _mm_cvtps_epi32(_mm_mul_ps(endtc, _mm_set1_ps(65536.0f)));
subtc = _mm_unpacklo_epi64(subtc, _mm_add_epi32(subtc, substep));
void DPSOFTRAST_Draw_Span_MultiplyVaryingBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, int arrayindex, const float *zf)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
void DPSOFTRAST_Draw_Span_VaryingBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, int arrayindex, const float *zf)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x;
int startx = span->startx;
int endx = span->endx;
void DPSOFTRAST_Draw_Span_AddBloomBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *subcolor)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x, startx = span->startx, endx = span->endx;
__m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(subcolor), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2));
- localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0));
+ localcolor = _mm_packs_epi32(localcolor, localcolor);
for (x = startx;x+2 <= endx;x+=2)
{
__m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128());
__m128i pix2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&inb4ub[x*4]), _mm_setzero_si128());
- pix1 = _mm_add_epi16(pix1, _mm_sub_epi16(pix2, localcolor));
+ pix1 = _mm_add_epi16(pix1, _mm_subs_epu16(pix2, localcolor));
_mm_storel_epi64((__m128i *)&out4ub[x*4], _mm_packus_epi16(pix1, pix1));
}
if (x < endx)
{
__m128i pix1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int *)&ina4ub[x*4]), _mm_setzero_si128());
__m128i pix2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int *)&inb4ub[x*4]), _mm_setzero_si128());
- pix1 = _mm_add_epi16(pix1, _mm_sub_epi16(pix2, localcolor));
+ pix1 = _mm_add_epi16(pix1, _mm_subs_epu16(pix2, localcolor));
*(int *)&out4ub[x*4] = _mm_cvtsi128_si32(_mm_packus_epi16(pix1, pix1));
}
#endif
void DPSOFTRAST_Draw_Span_MultiplyBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x, startx = span->startx, endx = span->endx;
for (x = startx;x+2 <= endx;x+=2)
{
void DPSOFTRAST_Draw_Span_AddBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x, startx = span->startx, endx = span->endx;
for (x = startx;x+2 <= endx;x+=2)
{
void DPSOFTRAST_Draw_Span_TintedAddBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub, const float *inbtintbgra)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x, startx = span->startx, endx = span->endx;
__m128i tint = _mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(inbtintbgra), _mm_set1_ps(256.0f)));
- tint = _mm_shuffle_epi32(_mm_packs_epi32(tint, tint), _MM_SHUFFLE(1, 0, 1, 0));
+ tint = _mm_packs_epi32(tint, tint);
for (x = startx;x+2 <= endx;x+=2)
{
__m128i pix1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&ina4ub[x*4]), _mm_setzero_si128());
void DPSOFTRAST_Draw_Span_MixBuffersBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *ina4ub, const unsigned char *inb4ub)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x, startx = span->startx, endx = span->endx;
for (x = startx;x+2 <= endx;x+=2)
{
void DPSOFTRAST_Draw_Span_MixUniformColorBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char *out4ub, const unsigned char *in4ub, const float *color)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int x, startx = span->startx, endx = span->endx;
__m128i localcolor = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_loadu_ps(color), _mm_set1_ps(255.0f))), _MM_SHUFFLE(3, 0, 1, 2)), blend;
- localcolor = _mm_shuffle_epi32(_mm_packs_epi32(localcolor, localcolor), _MM_SHUFFLE(1, 0, 1, 0));
+ localcolor = _mm_packs_epi32(localcolor, localcolor);
blend = _mm_slli_epi16(_mm_shufflehi_epi16(_mm_shufflelo_epi16(localcolor, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)), 4);
for (x = startx;x+2 <= endx;x+=2)
{
{
DPSOFTRAST_Array_TransformProject(DPSOFTRAST_ARRAY_POSITION, DPSOFTRAST_ARRAY_POSITION, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD0);
- DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD1);
+ DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD4);
}
void DPSOFTRAST_PixelShader_PostProcess(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
void DPSOFTRAST_PixelShader_FlatColor(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
unsigned char * RESTRICT pixelmask = span->pixelmask;
unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;
int x, startx = span->startx, endx = span->endx;
void DPSOFTRAST_PixelShader_VertexColor(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
unsigned char * RESTRICT pixelmask = span->pixelmask;
unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;
int x, startx = span->startx, endx = span->endx;
void DPSOFTRAST_PixelShader_Lightmap(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
unsigned char * RESTRICT pixelmask = span->pixelmask;
unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;
int x, startx = span->startx, endx = span->endx;
}
+void DPSOFTRAST_VertexShader_LightDirection(void);
+void DPSOFTRAST_PixelShader_LightDirection(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span);
void DPSOFTRAST_VertexShader_FakeLight(void)
{
- DPSOFTRAST_Array_TransformProject(DPSOFTRAST_ARRAY_POSITION, DPSOFTRAST_ARRAY_POSITION, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_VertexShader_LightDirection();
}
void DPSOFTRAST_PixelShader_FakeLight(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
- // TODO: IMPLEMENT
- float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
- unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
- DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
- DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
+ DPSOFTRAST_PixelShader_LightDirection(thread, triangle, span);
}
void DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace(void)
{
- DPSOFTRAST_VertexShader_Lightmap();
+ DPSOFTRAST_VertexShader_LightDirection();
+ DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD4);
}
void DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
- DPSOFTRAST_PixelShader_Lightmap(thread, triangle, span);
- // TODO: IMPLEMENT
+ DPSOFTRAST_PixelShader_LightDirection(thread, triangle, span);
}
void DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace(void)
{
- DPSOFTRAST_VertexShader_Lightmap();
+ DPSOFTRAST_VertexShader_LightDirection();
+ DPSOFTRAST_Array_Load(DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD4);
}
void DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
- DPSOFTRAST_PixelShader_Lightmap(thread, triangle, span);
- // TODO: IMPLEMENT
+ DPSOFTRAST_PixelShader_LightDirection(thread, triangle, span);
}
LightVector[0] = svector[0] * LightDir[0] + svector[1] * LightDir[1] + svector[2] * LightDir[2];
LightVector[1] = tvector[0] * LightDir[0] + tvector[1] * LightDir[1] + tvector[2] * LightDir[2];
LightVector[2] = normal[0] * LightDir[0] + normal[1] * LightDir[1] + normal[2] * LightDir[2];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+0] = LightVector[0];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+1] = LightVector[1];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+2] = LightVector[2];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD1][i*4+3] = 0.0f;
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+0] = LightVector[0];
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+1] = LightVector[1];
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+2] = LightVector[2];
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD5][i*4+3] = 0.0f;
EyeVectorModelSpace[0] = EyePosition[0] - position[0];
EyeVectorModelSpace[1] = EyePosition[1] - position[1];
EyeVectorModelSpace[2] = EyePosition[2] - position[2];
EyeVector[0] = svector[0] * EyeVectorModelSpace[0] + svector[1] * EyeVectorModelSpace[1] + svector[2] * EyeVectorModelSpace[2];
EyeVector[1] = tvector[0] * EyeVectorModelSpace[0] + tvector[1] * EyeVectorModelSpace[1] + tvector[2] * EyeVectorModelSpace[2];
EyeVector[2] = normal[0] * EyeVectorModelSpace[0] + normal[1] * EyeVectorModelSpace[1] + normal[2] * EyeVectorModelSpace[2];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+0] = EyeVector[0];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+1] = EyeVector[1];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+2] = EyeVector[2];
- dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD2][i*4+3] = 0.0f;
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+0] = EyeVector[0];
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+1] = EyeVector[1];
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+2] = EyeVector[2];
+ dpsoftrast.post_array4f[DPSOFTRAST_ARRAY_TEXCOORD6][i*4+3] = 0.0f;
}
DPSOFTRAST_Array_TransformProject(DPSOFTRAST_ARRAY_POSITION, -1, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
}
unsigned char buffer_texture_glowbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
unsigned char buffer_texture_pantsbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
unsigned char buffer_texture_shirtbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_deluxemapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+ unsigned char buffer_texture_lightmapbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
int x, startx = span->startx, endx = span->endx;
float Color_Ambient[4], Color_Diffuse[4], Color_Specular[4], Color_Glow[4], Color_Pants[4], Color_Shirt[4], LightColor[4];
float LightVectorslope[4];
float EyeVectordata[4];
float EyeVectorslope[4];
+ float VectorSdata[4];
+ float VectorSslope[4];
+ float VectorTdata[4];
+ float VectorTslope[4];
+ float VectorRdata[4];
+ float VectorRslope[4];
float z;
float diffusetex[4];
float glosstex[4];
float surfacenormal[4];
float lightnormal[4];
+ float lightnormal_modelspace[4];
float eyenormal[4];
float specularnormal[4];
float diffuse;
LightColor[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
LightColor[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
LightColor[3] = 0.0f;
- DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD1);
DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
Color_Specular[2] = thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+0];
Color_Specular[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+1];
Color_Specular[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Specular*4+2];
Color_Specular[3] = 0.0f;
SpecularPower = thread->uniform4f[DPSOFTRAST_UNIFORM_SpecularPower*4+0] * (1.0f / 255.0f);
- DPSOFTRAST_CALCATTRIB4F(triangle, span, EyeVectordata, EyeVectorslope, DPSOFTRAST_ARRAY_TEXCOORD2);
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, EyeVectordata, EyeVectorslope, DPSOFTRAST_ARRAY_TEXCOORD6);
DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_glossbgra8, GL20TU_GLOSS, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+
+ if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE)
+ {
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorSdata, VectorSslope, DPSOFTRAST_ARRAY_TEXCOORD1);
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorTdata, VectorTslope, DPSOFTRAST_ARRAY_TEXCOORD2);
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorRdata, VectorRslope, DPSOFTRAST_ARRAY_TEXCOORD3);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_deluxemapbgra8, GL20TU_DELUXEMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ }
+ else if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_TANGENTSPACE)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_deluxemapbgra8, GL20TU_DELUXEMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ }
+ else if(thread->shader_mode == SHADERMODE_FAKELIGHT)
+ {
+ // nothing of this needed
+ }
+ else
+ {
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD5);
+ }
+
for (x = startx;x < endx;x++)
{
z = buffer_z[x];
surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
DPSOFTRAST_Vector3Normalize(surfacenormal);
- lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
- lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
- lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
- DPSOFTRAST_Vector3Normalize(lightnormal);
+ if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE)
+ {
+ // myhalf3 lightnormal_modelspace = myhalf3(dp_texture2D(Texture_Deluxemap, TexCoordSurfaceLightmap.zw)) * 2.0 + myhalf3(-1.0, -1.0, -1.0);\n";
+ lightnormal_modelspace[0] = buffer_texture_deluxemapbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ lightnormal_modelspace[1] = buffer_texture_deluxemapbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ lightnormal_modelspace[2] = buffer_texture_deluxemapbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+
+ // lightnormal.x = dot(lightnormal_modelspace, myhalf3(VectorS));\n"
+ lightnormal[0] = lightnormal_modelspace[0] * (VectorSdata[0] + VectorSslope[0] * x)
+ + lightnormal_modelspace[1] * (VectorSdata[1] + VectorSslope[1] * x)
+ + lightnormal_modelspace[2] * (VectorSdata[2] + VectorSslope[2] * x);
+
+ // lightnormal.y = dot(lightnormal_modelspace, myhalf3(VectorT));\n"
+ lightnormal[1] = lightnormal_modelspace[0] * (VectorTdata[0] + VectorTslope[0] * x)
+ + lightnormal_modelspace[1] * (VectorTdata[1] + VectorTslope[1] * x)
+ + lightnormal_modelspace[2] * (VectorTdata[2] + VectorTslope[2] * x);
+
+ // lightnormal.z = dot(lightnormal_modelspace, myhalf3(VectorR));\n"
+ lightnormal[2] = lightnormal_modelspace[0] * (VectorRdata[0] + VectorRslope[0] * x)
+ + lightnormal_modelspace[1] * (VectorRdata[1] + VectorRslope[1] * x)
+ + lightnormal_modelspace[2] * (VectorRdata[2] + VectorRslope[2] * x);
+
+ // lightnormal = normalize(lightnormal); // VectorS/T/R are not always perfectly normalized, and EXACTSPECULARMATH is very picky about this\n"
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ // myhalf3 lightcolor = myhalf3(dp_texture2D(Texture_Lightmap, TexCoordSurfaceLightmap.zw));\n";
+ {
+ float f = 1.0f / (256.0f * max(0.25f, lightnormal[2]));
+ LightColor[0] = buffer_texture_lightmapbgra8[x*4+0] * f;
+ LightColor[1] = buffer_texture_lightmapbgra8[x*4+1] * f;
+ LightColor[2] = buffer_texture_lightmapbgra8[x*4+2] * f;
+ }
+ }
+ else if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_TANGENTSPACE)
+ {
+ lightnormal[0] = buffer_texture_deluxemapbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ lightnormal[1] = buffer_texture_deluxemapbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ lightnormal[2] = buffer_texture_deluxemapbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+ {
+ float f = 1.0f / 256.0f;
+ LightColor[0] = buffer_texture_lightmapbgra8[x*4+0] * f;
+ LightColor[1] = buffer_texture_lightmapbgra8[x*4+1] * f;
+ LightColor[2] = buffer_texture_lightmapbgra8[x*4+2] * f;
+ }
+ }
+ else if(thread->shader_mode == SHADERMODE_FAKELIGHT)
+ {
+ lightnormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ lightnormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ lightnormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ LightColor[0] = 1.0;
+ LightColor[1] = 1.0;
+ LightColor[2] = 1.0;
+ }
+ else
+ {
+ lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
+ lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
+ lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+ }
- eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
- eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
- eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
- DPSOFTRAST_Vector3Normalize(eyenormal);
+ diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
- specularnormal[0] = lightnormal[0] + eyenormal[0];
- specularnormal[1] = lightnormal[1] + eyenormal[1];
- specularnormal[2] = lightnormal[2] + eyenormal[2];
- DPSOFTRAST_Vector3Normalize(specularnormal);
+ if(thread->shader_exactspecularmath)
+ {
+ // reflect lightnormal at surfacenormal, take the negative of that
+ // i.e. we want (2*dot(N, i) * N - I) for N=surfacenormal, I=lightnormal
+ float f;
+ f = DPSOFTRAST_Vector3Dot(lightnormal, surfacenormal);
+ specularnormal[0] = 2*f*surfacenormal[0] - lightnormal[0];
+ specularnormal[1] = 2*f*surfacenormal[1] - lightnormal[1];
+ specularnormal[2] = 2*f*surfacenormal[2] - lightnormal[2];
+
+ // dot of this and normalize(EyeVectorFogDepth.xyz)
+ eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(eyenormal);
+
+ specular = DPSOFTRAST_Vector3Dot(eyenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
+ }
+ else
+ {
+ eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(eyenormal);
+
+ specularnormal[0] = lightnormal[0] + eyenormal[0];
+ specularnormal[1] = lightnormal[1] + eyenormal[1];
+ specularnormal[2] = lightnormal[2] + eyenormal[2];
+ DPSOFTRAST_Vector3Normalize(specularnormal);
+
+ specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
+ }
- diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
- specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
specular = pow(specular, SpecularPower * glosstex[3]);
if (thread->shader_permutation & SHADERPERMUTATION_GLOW)
{
d[2] = (int)( diffusetex[2] * Color_Ambient[2] + (diffusetex[2] * Color_Diffuse[2] * diffuse + glosstex[2] * Color_Specular[2] * specular) * LightColor[2]);if (d[2] > 255) d[2] = 255;
d[3] = (int)( diffusetex[3] * Color_Ambient[3]);if (d[3] > 255) d[3] = 255;
}
+
buffer_FragColorbgra8[x*4+0] = d[0];
buffer_FragColorbgra8[x*4+1] = d[1];
buffer_FragColorbgra8[x*4+2] = d[2];
LightColor[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+1];
LightColor[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_LightColor*4+2];
LightColor[3] = 0.0f;
- DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD1);
DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+
+ if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE)
+ {
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorSdata, VectorSslope, DPSOFTRAST_ARRAY_TEXCOORD1);
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorTdata, VectorTslope, DPSOFTRAST_ARRAY_TEXCOORD2);
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, VectorRdata, VectorRslope, DPSOFTRAST_ARRAY_TEXCOORD3);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_deluxemapbgra8, GL20TU_DELUXEMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ }
+ else if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_TANGENTSPACE)
+ {
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_lightmapbgra8, GL20TU_LIGHTMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_deluxemapbgra8, GL20TU_DELUXEMAP, DPSOFTRAST_ARRAY_TEXCOORD4, buffer_z);
+ }
+ else if(thread->shader_mode == SHADERMODE_FAKELIGHT)
+ {
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, EyeVectordata, EyeVectorslope, DPSOFTRAST_ARRAY_TEXCOORD6);
+ }
+ else
+ {
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, LightVectordata, LightVectorslope, DPSOFTRAST_ARRAY_TEXCOORD5);
+ }
+
for (x = startx;x < endx;x++)
{
z = buffer_z[x];
surfacenormal[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
DPSOFTRAST_Vector3Normalize(surfacenormal);
- lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
- lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
- lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
- DPSOFTRAST_Vector3Normalize(lightnormal);
+ if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_MODELSPACE)
+ {
+ // myhalf3 lightnormal_modelspace = myhalf3(dp_texture2D(Texture_Deluxemap, TexCoordSurfaceLightmap.zw)) * 2.0 + myhalf3(-1.0, -1.0, -1.0);\n";
+ lightnormal_modelspace[0] = buffer_texture_deluxemapbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ lightnormal_modelspace[1] = buffer_texture_deluxemapbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ lightnormal_modelspace[2] = buffer_texture_deluxemapbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+
+ // lightnormal.x = dot(lightnormal_modelspace, myhalf3(VectorS));\n"
+ lightnormal[0] = lightnormal_modelspace[0] * (VectorSdata[0] + VectorSslope[0] * x)
+ + lightnormal_modelspace[1] * (VectorSdata[1] + VectorSslope[1] * x)
+ + lightnormal_modelspace[2] * (VectorSdata[2] + VectorSslope[2] * x);
+
+ // lightnormal.y = dot(lightnormal_modelspace, myhalf3(VectorT));\n"
+ lightnormal[1] = lightnormal_modelspace[0] * (VectorTdata[0] + VectorTslope[0] * x)
+ + lightnormal_modelspace[1] * (VectorTdata[1] + VectorTslope[1] * x)
+ + lightnormal_modelspace[2] * (VectorTdata[2] + VectorTslope[2] * x);
+
+ // lightnormal.z = dot(lightnormal_modelspace, myhalf3(VectorR));\n"
+ lightnormal[2] = lightnormal_modelspace[0] * (VectorRdata[0] + VectorRslope[0] * x)
+ + lightnormal_modelspace[1] * (VectorRdata[1] + VectorRslope[1] * x)
+ + lightnormal_modelspace[2] * (VectorRdata[2] + VectorRslope[2] * x);
+
+ // lightnormal = normalize(lightnormal); // VectorS/T/R are not always perfectly normalized, and EXACTSPECULARMATH is very picky about this\n"
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ // myhalf3 lightcolor = myhalf3(dp_texture2D(Texture_Lightmap, TexCoordSurfaceLightmap.zw));\n";
+ {
+ float f = 1.0f / (256.0f * max(0.25f, lightnormal[2]));
+ LightColor[0] = buffer_texture_lightmapbgra8[x*4+0] * f;
+ LightColor[1] = buffer_texture_lightmapbgra8[x*4+1] * f;
+ LightColor[2] = buffer_texture_lightmapbgra8[x*4+2] * f;
+ }
+ }
+ else if(thread->shader_mode == SHADERMODE_LIGHTDIRECTIONMAP_TANGENTSPACE)
+ {
+ lightnormal[0] = buffer_texture_deluxemapbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ lightnormal[1] = buffer_texture_deluxemapbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ lightnormal[2] = buffer_texture_deluxemapbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+ {
+ float f = 1.0f / 256.0f;
+ LightColor[0] = buffer_texture_lightmapbgra8[x*4+0] * f;
+ LightColor[1] = buffer_texture_lightmapbgra8[x*4+1] * f;
+ LightColor[2] = buffer_texture_lightmapbgra8[x*4+2] * f;
+ }
+ }
+ else if(thread->shader_mode == SHADERMODE_FAKELIGHT)
+ {
+ lightnormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ lightnormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ lightnormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+
+ LightColor[0] = 1.0;
+ LightColor[1] = 1.0;
+ LightColor[2] = 1.0;
+ }
+ else
+ {
+ lightnormal[0] = (LightVectordata[0] + LightVectorslope[0]*x) * z;
+ lightnormal[1] = (LightVectordata[1] + LightVectorslope[1]*x) * z;
+ lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(lightnormal);
+ }
diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
if (thread->shader_permutation & SHADERPERMUTATION_GLOW)
void DPSOFTRAST_PixelShader_LightSource(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
unsigned char buffer_texture_normalbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
lightnormal[2] = (LightVectordata[2] + LightVectorslope[2]*x) * z;
DPSOFTRAST_Vector3Normalize(lightnormal);
- eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
- eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
- eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
- DPSOFTRAST_Vector3Normalize(eyenormal);
+ diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
- specularnormal[0] = lightnormal[0] + eyenormal[0];
- specularnormal[1] = lightnormal[1] + eyenormal[1];
- specularnormal[2] = lightnormal[2] + eyenormal[2];
- DPSOFTRAST_Vector3Normalize(specularnormal);
+ if(thread->shader_exactspecularmath)
+ {
+ // reflect lightnormal at surfacenormal, take the negative of that
+ // i.e. we want (2*dot(N, i) * N - I) for N=surfacenormal, I=lightnormal
+ float f;
+ f = DPSOFTRAST_Vector3Dot(lightnormal, surfacenormal);
+ specularnormal[0] = 2*f*surfacenormal[0] - lightnormal[0];
+ specularnormal[1] = 2*f*surfacenormal[1] - lightnormal[1];
+ specularnormal[2] = 2*f*surfacenormal[2] - lightnormal[2];
+
+ // dot of this and normalize(EyeVectorFogDepth.xyz)
+ eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(eyenormal);
+
+ specular = DPSOFTRAST_Vector3Dot(eyenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
+ }
+ else
+ {
+ eyenormal[0] = (EyeVectordata[0] + EyeVectorslope[0]*x) * z;
+ eyenormal[1] = (EyeVectordata[1] + EyeVectorslope[1]*x) * z;
+ eyenormal[2] = (EyeVectordata[2] + EyeVectorslope[2]*x) * z;
+ DPSOFTRAST_Vector3Normalize(eyenormal);
- diffuse = DPSOFTRAST_Vector3Dot(surfacenormal, lightnormal);if (diffuse < 0.0f) diffuse = 0.0f;
- specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
+ specularnormal[0] = lightnormal[0] + eyenormal[0];
+ specularnormal[1] = lightnormal[1] + eyenormal[1];
+ specularnormal[2] = lightnormal[2] + eyenormal[2];
+ DPSOFTRAST_Vector3Normalize(specularnormal);
+
+ specular = DPSOFTRAST_Vector3Dot(surfacenormal, specularnormal);if (specular < 0.0f) specular = 0.0f;
+ }
specular = pow(specular, SpecularPower * glosstex[3]);
+
if (thread->shader_permutation & SHADERPERMUTATION_CUBEFILTER)
{
// scale down the attenuation to account for the cubefilter multiplying everything by 255
void DPSOFTRAST_VertexShader_Refraction(void)
{
+ DPSOFTRAST_Array_Transform(DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_POSITION, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
+ DPSOFTRAST_Array_Transform(DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD0, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_TexMatrixM1);
DPSOFTRAST_Array_TransformProject(DPSOFTRAST_ARRAY_POSITION, DPSOFTRAST_ARRAY_POSITION, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1);
}
void DPSOFTRAST_PixelShader_Refraction(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span)
{
- // TODO: IMPLEMENT
+ // DIRTY TRICK: only do sideways displacement. Not correct, but cheaper and thus better for SW.
+
float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH];
+ float z;
+ int x, startx = span->startx, endx = span->endx;
+
+ // texture reads
+ unsigned char buffer_texture_normalbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4];
+
+ // varyings
+ float ModelViewProjectionPositiondata[4];
+ float ModelViewProjectionPositionslope[4];
+
+ // uniforms
+ float ScreenScaleRefractReflect[2];
+ float ScreenCenterRefractReflect[2];
+ float DistortScaleRefractReflect[2];
+ float RefractColor[4];
+
+ const unsigned char * RESTRICT pixelbase;
+ const unsigned char * RESTRICT pixel[4];
+ DPSOFTRAST_Texture *texture = thread->texbound[GL20TU_REFRACTION];
+ if(!texture) return;
+ pixelbase = (unsigned char *)texture->bytes + texture->mipmap[0][0];
+
+ // read textures
DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z);
- memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4);
+ DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_normalbgra8, GL20TU_NORMAL, DPSOFTRAST_ARRAY_TEXCOORD0, buffer_z);
+
+ // read varyings
+ DPSOFTRAST_CALCATTRIB4F(triangle, span, ModelViewProjectionPositiondata, ModelViewProjectionPositionslope, DPSOFTRAST_ARRAY_TEXCOORD1); // or POSITION?
+
+ // read uniforms
+ ScreenScaleRefractReflect[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_ScreenScaleRefractReflect*4+0];
+ ScreenScaleRefractReflect[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_ScreenScaleRefractReflect*4+1];
+ ScreenCenterRefractReflect[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_ScreenCenterRefractReflect*4+0];
+ ScreenCenterRefractReflect[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_ScreenCenterRefractReflect*4+1];
+ DistortScaleRefractReflect[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_DistortScaleRefractReflect*4+0];
+ DistortScaleRefractReflect[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_DistortScaleRefractReflect*4+1];
+ RefractColor[0] = thread->uniform4f[DPSOFTRAST_UNIFORM_RefractColor*4+2];
+ RefractColor[1] = thread->uniform4f[DPSOFTRAST_UNIFORM_RefractColor*4+1];
+ RefractColor[2] = thread->uniform4f[DPSOFTRAST_UNIFORM_RefractColor*4+0];
+ RefractColor[3] = thread->uniform4f[DPSOFTRAST_UNIFORM_RefractColor*4+3];
+
+ // do stuff
+ for (x = startx;x < endx;x++)
+ {
+ float SafeScreenTexCoord[2];
+ float ScreenTexCoord[2];
+ float v[3];
+ float iw;
+ unsigned char c[4];
+
+ z = buffer_z[x];
+
+ // " vec2 ScreenScaleRefractReflectIW = ScreenScaleRefractReflect.xy * (1.0 / ModelViewProjectionPosition.w);\n"
+ iw = 1.0f / (ModelViewProjectionPositiondata[3] + ModelViewProjectionPositionslope[3]*x); // / z
+
+ // " vec2 SafeScreenTexCoord = ModelViewProjectionPosition.xy * ScreenScaleRefractReflectIW + ScreenCenterRefractReflect.xy;\n"
+ SafeScreenTexCoord[0] = (ModelViewProjectionPositiondata[0] + ModelViewProjectionPositionslope[0]*x) * iw * ScreenScaleRefractReflect[0] + ScreenCenterRefractReflect[0]; // * z (disappears)
+ SafeScreenTexCoord[1] = (ModelViewProjectionPositiondata[1] + ModelViewProjectionPositionslope[1]*x) * iw * ScreenScaleRefractReflect[1] + ScreenCenterRefractReflect[1]; // * z (disappears)
+
+ // " vec2 ScreenTexCoord = SafeScreenTexCoord + vec3(normalize(myhalf3(dp_texture2D(Texture_Normal, TexCoord)) - myhalf3(0.5))).xy * DistortScaleRefractReflect.zw;\n"
+ v[0] = buffer_texture_normalbgra8[x*4+2] * (1.0f / 128.0f) - 1.0f;
+ v[1] = buffer_texture_normalbgra8[x*4+1] * (1.0f / 128.0f) - 1.0f;
+ v[2] = buffer_texture_normalbgra8[x*4+0] * (1.0f / 128.0f) - 1.0f;
+ DPSOFTRAST_Vector3Normalize(v);
+ ScreenTexCoord[0] = SafeScreenTexCoord[0] + v[0] * DistortScaleRefractReflect[0];
+ ScreenTexCoord[1] = SafeScreenTexCoord[1] + v[1] * DistortScaleRefractReflect[1];
+
+ // " dp_FragColor = vec4(dp_texture2D(Texture_Refraction, ScreenTexCoord).rgb, 1.0) * RefractColor;\n"
+ if(texture->filter & DPSOFTRAST_TEXTURE_FILTER_LINEAR)
+ {
+ unsigned int tc[2] = { ScreenTexCoord[0] * (texture->mipmap[0][2]<<12) - 2048, ScreenTexCoord[1] * (texture->mipmap[0][3]<<12) - 2048};
+ unsigned int frac[2] = { tc[0]&0xFFF, tc[1]&0xFFF };
+ unsigned int ifrac[2] = { 0x1000 - frac[0], 0x1000 - frac[1] };
+ unsigned int lerp[4] = { ifrac[0]*ifrac[1], frac[0]*ifrac[1], ifrac[0]*frac[1], frac[0]*frac[1] };
+ int tci[2] = { tc[0]>>12, tc[1]>>12 };
+ int tci1[2] = { tci[0] + 1, tci[1] + 1 };
+ tci[0] = tci[0] >= 0 ? (tci[0] <= texture->mipmap[0][2]-1 ? tci[0] : texture->mipmap[0][2]-1) : 0;
+ tci[1] = tci[1] >= 0 ? (tci[1] <= texture->mipmap[0][3]-1 ? tci[1] : texture->mipmap[0][3]-1) : 0;
+ tci1[0] = tci1[0] >= 0 ? (tci1[0] <= texture->mipmap[0][2]-1 ? tci1[0] : texture->mipmap[0][2]-1) : 0;
+ tci1[1] = tci1[1] >= 0 ? (tci1[1] <= texture->mipmap[0][3]-1 ? tci1[1] : texture->mipmap[0][3]-1) : 0;
+ pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[0][2]+tci[0]);
+ pixel[1] = pixelbase + 4 * (tci[1]*texture->mipmap[0][2]+tci1[0]);
+ pixel[2] = pixelbase + 4 * (tci1[1]*texture->mipmap[0][2]+tci[0]);
+ pixel[3] = pixelbase + 4 * (tci1[1]*texture->mipmap[0][2]+tci1[0]);
+ c[0] = (pixel[0][0]*lerp[0]+pixel[1][0]*lerp[1]+pixel[2][0]*lerp[2]+pixel[3][0]*lerp[3])>>24;
+ c[1] = (pixel[0][1]*lerp[0]+pixel[1][1]*lerp[1]+pixel[2][1]*lerp[2]+pixel[3][1]*lerp[3])>>24;
+ c[2] = (pixel[0][2]*lerp[0]+pixel[1][2]*lerp[1]+pixel[2][2]*lerp[2]+pixel[3][2]*lerp[3])>>24;
+ }
+ else
+ {
+ int tci[2] = { ScreenTexCoord[0] * texture->mipmap[0][2], ScreenTexCoord[1] * texture->mipmap[0][3] };
+ tci[0] = tci[0] >= 0 ? (tci[0] <= texture->mipmap[0][2]-1 ? tci[0] : texture->mipmap[0][2]-1) : 0;
+ tci[1] = tci[1] >= 0 ? (tci[1] <= texture->mipmap[0][3]-1 ? tci[1] : texture->mipmap[0][3]-1) : 0;
+ pixel[0] = pixelbase + 4 * (tci[1]*texture->mipmap[0][2]+tci[0]);
+ c[0] = pixel[0][0];
+ c[1] = pixel[0][1];
+ c[2] = pixel[0][2];
+ }
+
+ //p = (int) bound(startx, x + (ScreenTexCoord[0] - SafeScreenTexCoord[0]) / (ModelViewProjectionPositionslope[0]*z), endx-1);
+ buffer_FragColorbgra8[x*4+0] = c[0] * RefractColor[0];
+ buffer_FragColorbgra8[x*4+1] = c[1] * RefractColor[1];
+ buffer_FragColorbgra8[x*4+2] = c[2] * RefractColor[2];
+ buffer_FragColorbgra8[x*4+3] = min(RefractColor[3] * 256, 255);
+ }
+
DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8);
}
{2, DPSOFTRAST_VertexShader_FlatColor, DPSOFTRAST_PixelShader_FlatColor, {DPSOFTRAST_ARRAY_TEXCOORD0, ~0}, {GL20TU_COLOR, ~0}},
{2, DPSOFTRAST_VertexShader_VertexColor, DPSOFTRAST_PixelShader_VertexColor, {DPSOFTRAST_ARRAY_COLOR, DPSOFTRAST_ARRAY_TEXCOORD0, ~0}, {GL20TU_COLOR, ~0}},
{2, DPSOFTRAST_VertexShader_Lightmap, DPSOFTRAST_PixelShader_Lightmap, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}},
- {2, DPSOFTRAST_VertexShader_FakeLight, DPSOFTRAST_PixelShader_FakeLight, {~0}, {~0}},
- {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace, DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}},
- {2, DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace, DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_LIGHTMAP, GL20TU_GLOW, ~0}},
- {2, DPSOFTRAST_VertexShader_LightDirection, DPSOFTRAST_PixelShader_LightDirection, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}},
+ {2, DPSOFTRAST_VertexShader_FakeLight, DPSOFTRAST_PixelShader_FakeLight, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}},
+ {2, DPSOFTRAST_VertexShader_LightDirectionMap_ModelSpace, DPSOFTRAST_PixelShader_LightDirectionMap_ModelSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_LIGHTMAP, GL20TU_DELUXEMAP, ~0}},
+ {2, DPSOFTRAST_VertexShader_LightDirectionMap_TangentSpace, DPSOFTRAST_PixelShader_LightDirectionMap_TangentSpace, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_LIGHTMAP, GL20TU_DELUXEMAP, ~0}},
+ {2, DPSOFTRAST_VertexShader_LightDirection, DPSOFTRAST_PixelShader_LightDirection, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD5, DPSOFTRAST_ARRAY_TEXCOORD6, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, ~0}},
{2, DPSOFTRAST_VertexShader_LightSource, DPSOFTRAST_PixelShader_LightSource, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, DPSOFTRAST_ARRAY_TEXCOORD2, DPSOFTRAST_ARRAY_TEXCOORD3, DPSOFTRAST_ARRAY_TEXCOORD4, ~0}, {GL20TU_COLOR, GL20TU_PANTS, GL20TU_SHIRT, GL20TU_GLOW, GL20TU_NORMAL, GL20TU_GLOSS, GL20TU_CUBE, ~0}},
- {2, DPSOFTRAST_VertexShader_Refraction, DPSOFTRAST_PixelShader_Refraction, {~0}},
+ {2, DPSOFTRAST_VertexShader_Refraction, DPSOFTRAST_PixelShader_Refraction, {DPSOFTRAST_ARRAY_TEXCOORD0, DPSOFTRAST_ARRAY_TEXCOORD1, ~0}, {GL20TU_NORMAL, GL20TU_REFRACTION, ~0}},
{2, DPSOFTRAST_VertexShader_Water, DPSOFTRAST_PixelShader_Water, {~0}},
{2, DPSOFTRAST_VertexShader_ShowDepth, DPSOFTRAST_PixelShader_ShowDepth, {~0}},
{2, DPSOFTRAST_VertexShader_DeferredGeometry, DPSOFTRAST_PixelShader_DeferredGeometry, {~0}},
static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAST_Command_Draw *command)
{
-#ifdef SSE2_PRESENT
+#ifdef SSE_POSSIBLE
int cullface = thread->cullface;
int minx, maxx, miny, maxy;
int miny1, maxy1, miny2, maxy2;
attribxslope = _mm_sub_ps(_mm_mul_ps(attribuxslope, attribedge1), _mm_mul_ps(attribvxslope, attribedge2));
attribyslope = _mm_sub_ps(_mm_mul_ps(attribvyslope, attribedge2), _mm_mul_ps(attribuyslope, attribedge1));
attriborigin = _mm_sub_ps(attriborigin, _mm_add_ps(_mm_mul_ps(attribxslope, x1), _mm_mul_ps(attribyslope, y1)));
- _mm_stream_ps(triangle->attribs[k][0], attribxslope);
- _mm_stream_ps(triangle->attribs[k][1], attribyslope);
- _mm_stream_ps(triangle->attribs[k][2], attriborigin);
+ _mm_storeu_ps(triangle->attribs[k][0], attribxslope);
+ _mm_storeu_ps(triangle->attribs[k][1], attribyslope);
+ _mm_storeu_ps(triangle->attribs[k][2], attriborigin);
if (k == DPSOFTRAST_ShaderModeTable[thread->shader_mode].lodarrayindex)
{
mipedgescale = _mm_movelh_ps(triangleedge1, triangleedge2);
DPSOFTRAST_Draw_FlushThreads();
}
}
+
+DEFCOMMAND(23, SetRenderTargets, int width; int height;);
+static void DPSOFTRAST_Interpret_SetRenderTargets(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_Command_SetRenderTargets *command)
+{
+ thread->validate |= DPSOFTRAST_VALIDATE_FB;
+}
+void DPSOFTRAST_SetRenderTargets(int width, int height, unsigned int *depthpixels, unsigned int *colorpixels0, unsigned int *colorpixels1, unsigned int *colorpixels2, unsigned int *colorpixels3)
+{
+ DPSOFTRAST_Command_SetRenderTargets *command;
+ if (width != dpsoftrast.fb_width || height != dpsoftrast.fb_height || depthpixels != dpsoftrast.fb_depthpixels ||
+ colorpixels0 != dpsoftrast.fb_colorpixels[0] || colorpixels1 != dpsoftrast.fb_colorpixels[1] ||
+ colorpixels2 != dpsoftrast.fb_colorpixels[2] || colorpixels3 != dpsoftrast.fb_colorpixels[3])
+ DPSOFTRAST_Flush();
+ dpsoftrast.fb_width = width;
+ dpsoftrast.fb_height = height;
+ dpsoftrast.fb_depthpixels = depthpixels;
+ dpsoftrast.fb_colorpixels[0] = colorpixels0;
+ dpsoftrast.fb_colorpixels[1] = colorpixels1;
+ dpsoftrast.fb_colorpixels[2] = colorpixels2;
+ dpsoftrast.fb_colorpixels[3] = colorpixels3;
+ DPSOFTRAST_RecalcViewport(dpsoftrast.viewport, dpsoftrast.fb_viewportcenter, dpsoftrast.fb_viewportscale);
+ command = DPSOFTRAST_ALLOCATECOMMAND(SetRenderTargets);
+ command->width = width;
+ command->height = height;
+}
static void DPSOFTRAST_Draw_InterpretCommands(DPSOFTRAST_State_Thread *thread, int endoffset)
{
INTERPCOMMAND(Uniform4f)
INTERPCOMMAND(UniformMatrix4f)
INTERPCOMMAND(Uniform1i)
+ INTERPCOMMAND(SetRenderTargets)
case DPSOFTRAST_OPCODE_Draw:
DPSOFTRAST_Interpret_Draw(thread, (DPSOFTRAST_Command_Draw *)command);
thread->polygonoffset[0] = 0;
thread->polygonoffset[1] = 0;
- if (dpsoftrast.interlace)
- {
- thread->miny1 = (i*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
- thread->maxy1 = ((i+1)*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
- thread->miny2 = ((dpsoftrast.numthreads+i)*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
- thread->maxy2 = ((dpsoftrast.numthreads+i+1)*dpsoftrast.fb_height)/(2*dpsoftrast.numthreads);
- }
- else
- {
- thread->miny1 = thread->miny2 = (i*dpsoftrast.fb_height)/dpsoftrast.numthreads;
- thread->maxy1 = thread->maxy2 = ((i+1)*dpsoftrast.fb_height)/dpsoftrast.numthreads;
- }
-
+ DPSOFTRAST_RecalcThread(thread);
+
thread->numspans = 0;
thread->numtriangles = 0;
thread->commandoffset = 0;