X-Git-Url: http://git.xonotic.org/?a=blobdiff_plain;f=dpsoftrast.c;h=216d628cecd5199159c2da71a06d4fc9899675e4;hb=ac7539a1050dfcec9acc5e702c1d1ece4d2b94c6;hp=072fd564466bc209f876be0aa463e75132c07cce;hpb=d4f23b4163ffd60907a657149c1fde37c0fe5bc1;p=xonotic%2Fdarkplaces.git diff --git a/dpsoftrast.c b/dpsoftrast.c index 072fd564..216d628c 100644 --- a/dpsoftrast.c +++ b/dpsoftrast.c @@ -3,12 +3,9 @@ #define _USE_MATH_DEFINES #include #include "quakedef.h" +#include "thread.h" #include "dpsoftrast.h" -#ifdef USE_SDL -#define USE_THREADS -#endif - #ifndef __cplusplus typedef qboolean bool; #endif @@ -17,51 +14,56 @@ typedef qboolean bool; #define ATOMIC_SIZE 32 #ifdef SSE2_PRESENT - #if defined(__GNUC__) + #if defined(__APPLE__) + #include #define ALIGN(var) var __attribute__((__aligned__(16))) #define ATOMIC(var) var __attribute__((__aligned__(32))) - #ifdef USE_THREADS - #define MEMORY_BARRIER (_mm_sfence()) - //(__sync_synchronize()) - #define ATOMIC_COUNTER volatile int - #define ATOMIC_INCREMENT(counter) (__sync_add_and_fetch(&(counter), 1)) - #define ATOMIC_DECREMENT(counter) (__sync_add_and_fetch(&(counter), -1)) - #define ATOMIC_ADD(counter, val) ((void)__sync_fetch_and_add(&(counter), (val))) - #endif + #define MEMORY_BARRIER (_mm_sfence()) + #define ATOMIC_COUNTER volatile int32_t + #define ATOMIC_INCREMENT(counter) (OSAtomicIncrement32Barrier(&(counter))) + #define ATOMIC_DECREMENT(counter) (OSAtomicDecrement32Barrier(&(counter))) + #define ATOMIC_ADD(counter, val) ((void)OSAtomicAdd32Barrier((val), &(counter))) + #elif defined(__GNUC__) + #define ALIGN(var) var __attribute__((__aligned__(16))) + #define ATOMIC(var) var __attribute__((__aligned__(32))) + #define MEMORY_BARRIER (_mm_sfence()) + //(__sync_synchronize()) + #define ATOMIC_COUNTER volatile int + #define ATOMIC_INCREMENT(counter) (__sync_add_and_fetch(&(counter), 1)) + #define ATOMIC_DECREMENT(counter) (__sync_add_and_fetch(&(counter), -1)) + #define ATOMIC_ADD(counter, val) ((void)__sync_fetch_and_add(&(counter), (val))) #elif defined(_MSC_VER) #define ALIGN(var) __declspec(align(16)) var #define ATOMIC(var) __declspec(align(32)) var - #ifdef USE_THREADS - #define MEMORY_BARRIER (_mm_sfence()) - //(MemoryBarrier()) - #define ATOMIC_COUNTER volatile LONG - #define ATOMIC_INCREMENT(counter) (InterlockedIncrement(&(counter))) - #define ATOMIC_DECREMENT(counter) (InterlockedDecrement(&(counter))) - #define ATOMIC_ADD(counter, val) (InterlockedExchangeAdd(&(counter), (val))) - #endif - #else - #undef USE_THREADS - #undef SSE2_PRESENT + #define MEMORY_BARRIER (_mm_sfence()) + //(MemoryBarrier()) + #define ATOMIC_COUNTER volatile LONG + #define ATOMIC_INCREMENT(counter) (InterlockedIncrement(&(counter))) + #define ATOMIC_DECREMENT(counter) (InterlockedDecrement(&(counter))) + #define ATOMIC_ADD(counter, val) ((void)InterlockedExchangeAdd(&(counter), (val))) #endif #endif -#ifndef SSE2_PRESENT - #define ALIGN(var) var - #define ATOMIC(var) var +#ifndef ALIGN +#define ALIGN(var) var #endif - -#ifdef USE_THREADS -#include -#include -#else - #define MEMORY_BARRIER ((void)0) - #define ATOMIC_COUNTER int - #define ATOMIC_INCREMENT(counter) (++(counter)) - #define ATOMIC_DECREMENT(counter) (--(counter)) - #define ATOMIC_ADD(counter, val) ((void)((counter) += (val))) - typedef void SDL_Thread; - typedef void SDL_cond; - typedef void SDL_mutex; +#ifndef ATOMIC +#define ATOMIC(var) var +#endif +#ifndef MEMORY_BARRIER +#define MEMORY_BARRIER ((void)0) +#endif +#ifndef ATOMIC_COUNTER +#define ATOMIC_COUNTER int +#endif +#ifndef ATOMIC_INCREMENT +#define ATOMIC_INCREMENT(counter) (++(counter)) +#endif +#ifndef ATOMIC_DECREMENT +#define ATOMIC_DECREMENT(counter) (--(counter)) +#endif +#ifndef ATOMIC_ADD +#define ATOMIC_ADD(counter, val) ((void)((counter) += (val))) #endif #ifdef SSE2_PRESENT @@ -179,7 +181,6 @@ typedef ALIGN(struct DPSOFTRAST_State_Span_s int triangle; // triangle this span was generated by int x; // framebuffer x coord int y; // framebuffer y coord - int length; // pixel count int startx; // usable range (according to pixelmask) int endx; // usable range (according to pixelmask) unsigned char *pixelmask; // true for pixels that passed depth test, false for others @@ -205,13 +206,14 @@ typedef enum DPSOFTRAST_BLENDMODE_e DPSOFTRAST_BLENDMODE_MUL2, DPSOFTRAST_BLENDMODE_SUBALPHA, DPSOFTRAST_BLENDMODE_PSEUDOALPHA, + DPSOFTRAST_BLENDMODE_INVADD, DPSOFTRAST_BLENDMODE_TOTAL } DPSOFTRAST_BLENDMODE; typedef ATOMIC(struct DPSOFTRAST_State_Thread_s { - SDL_Thread *thread; + void *thread; int index; int cullface; @@ -243,7 +245,7 @@ typedef ATOMIC(struct DPSOFTRAST_State_Thread_s // derived values (DPSOFTRAST_VALIDATE_FB) int fb_colormask; - int fb_clearscissor[4]; + int fb_scissor[4]; ALIGN(float fb_viewportcenter[4]); ALIGN(float fb_viewportscale[4]); @@ -263,9 +265,9 @@ typedef ATOMIC(struct DPSOFTRAST_State_Thread_s volatile bool waiting; volatile bool starving; - SDL_cond *waitcond; - SDL_cond *drawcond; - SDL_mutex *drawmutex; + void *waitcond; + void *drawcond; + void *drawmutex; int numspans; int numtriangles; @@ -320,6 +322,7 @@ typedef ATOMIC(struct DPSOFTRAST_State_s // error reporting const char *errorstring; + bool usethreads; int interlace; int numthreads; DPSOFTRAST_State_Thread *threads; @@ -365,10 +368,10 @@ static void DPSOFTRAST_RecalcFB(DPSOFTRAST_State_Thread *thread) if (x2 > dpsoftrast.fb_width) x2 = dpsoftrast.fb_width; if (y1 < 0) y1 = 0; if (y2 > dpsoftrast.fb_height) y2 = dpsoftrast.fb_height; - thread->fb_clearscissor[0] = x1; - thread->fb_clearscissor[1] = y1; - thread->fb_clearscissor[2] = x2 - x1; - thread->fb_clearscissor[3] = y2 - y1; + thread->fb_scissor[0] = x1; + thread->fb_scissor[1] = y1; + thread->fb_scissor[2] = x2 - x1; + thread->fb_scissor[3] = y2 - y1; DPSOFTRAST_RecalcViewport(thread->viewport, thread->fb_viewportcenter, thread->fb_viewportscale); } @@ -386,7 +389,7 @@ static void DPSOFTRAST_RecalcBlendFunc(DPSOFTRAST_State_Thread *thread) { #define BLENDFUNC(sfactor, dfactor, blendmode) \ case (sfactor<<16)|dfactor: thread->fb_blendmode = blendmode; break; - BLENDFUNC(GL_SRC_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA) + BLENDFUNC(GL_SRC_ALPHA, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA) default: thread->fb_blendmode = DPSOFTRAST_BLENDMODE_OPAQUE; break; } } @@ -403,7 +406,7 @@ static void DPSOFTRAST_RecalcBlendFunc(DPSOFTRAST_State_Thread *thread) BLENDFUNC(GL_DST_COLOR, GL_ZERO, DPSOFTRAST_BLENDMODE_MUL) BLENDFUNC(GL_DST_COLOR, GL_SRC_COLOR, DPSOFTRAST_BLENDMODE_MUL2) BLENDFUNC(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, DPSOFTRAST_BLENDMODE_PSEUDOALPHA) - BLENDFUNC(GL_SRC_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_SUBALPHA) + BLENDFUNC(GL_ONE_MINUS_DST_COLOR, GL_ONE, DPSOFTRAST_BLENDMODE_INVADD) default: thread->fb_blendmode = DPSOFTRAST_BLENDMODE_OPAQUE; break; } } @@ -765,13 +768,12 @@ static void DPSOFTRAST_Draw_FlushThreads(void); static void DPSOFTRAST_Draw_SyncCommands(void) { - MEMORY_BARRIER; + if(dpsoftrast.usethreads) MEMORY_BARRIER; dpsoftrast.drawcommand = dpsoftrast.commandpool.freecommand; } static void DPSOFTRAST_Draw_FreeCommandPool(int space) { -#ifdef USE_THREADS DPSOFTRAST_State_Thread *thread; int i; int freecommand = dpsoftrast.commandpool.freecommand; @@ -799,20 +801,17 @@ static void DPSOFTRAST_Draw_FreeCommandPool(int space) if (usedcommands <= DPSOFTRAST_DRAW_MAXCOMMANDPOOL-space || waitindex < 0) break; thread = &dpsoftrast.threads[waitindex]; - SDL_LockMutex(thread->drawmutex); + Thread_LockMutex(thread->drawmutex); if (thread->commandoffset != dpsoftrast.drawcommand) { thread->waiting = true; - if (thread->starving) SDL_CondSignal(thread->drawcond); - SDL_CondWait(thread->waitcond, thread->drawmutex); + if (thread->starving) Thread_CondSignal(thread->drawcond); + Thread_CondWait(thread->waitcond, thread->drawmutex); thread->waiting = false; } - SDL_UnlockMutex(thread->drawmutex); + Thread_UnlockMutex(thread->drawmutex); } dpsoftrast.commandpool.usedcommands = usedcommands; -#else - DPSOFTRAST_Draw_FlushThreads(); -#endif } #define DPSOFTRAST_ALIGNCOMMAND(size) \ @@ -830,7 +829,10 @@ static void *DPSOFTRAST_AllocateCommand(int opcode, int size) extra += DPSOFTRAST_DRAW_MAXCOMMANDPOOL - freecommand; if (usedcommands > DPSOFTRAST_DRAW_MAXCOMMANDPOOL - (size + extra)) { - DPSOFTRAST_Draw_FreeCommandPool(size + extra); + if (dpsoftrast.usethreads) + DPSOFTRAST_Draw_FreeCommandPool(size + extra); + else + DPSOFTRAST_Draw_FlushThreads(); freecommand = dpsoftrast.commandpool.freecommand; usedcommands = dpsoftrast.commandpool.usedcommands; } @@ -900,10 +902,10 @@ static void DPSOFTRAST_Interpret_ClearColor(DPSOFTRAST_State_Thread *thread, con unsigned int *p; unsigned int c; DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB); - x1 = thread->fb_clearscissor[0]; - y1 = thread->fb_clearscissor[1]; - x2 = thread->fb_clearscissor[0] + thread->fb_clearscissor[2]; - y2 = thread->fb_clearscissor[1] + thread->fb_clearscissor[3]; + x1 = thread->fb_scissor[0]; + y1 = thread->fb_scissor[1]; + x2 = thread->fb_scissor[0] + thread->fb_scissor[2]; + y2 = thread->fb_scissor[1] + thread->fb_scissor[3]; if (y1 < miny1) y1 = miny1; if (y2 > maxy2) y2 = maxy2; w = x2 - x1; @@ -946,10 +948,10 @@ static void DPSOFTRAST_Interpret_ClearDepth(DPSOFTRAST_State_Thread *thread, DPS unsigned int *p; unsigned int c; DPSOFTRAST_Validate(thread, DPSOFTRAST_VALIDATE_FB); - x1 = thread->fb_clearscissor[0]; - y1 = thread->fb_clearscissor[1]; - x2 = thread->fb_clearscissor[0] + thread->fb_clearscissor[2]; - y2 = thread->fb_clearscissor[1] + thread->fb_clearscissor[3]; + x1 = thread->fb_scissor[0]; + y1 = thread->fb_scissor[1]; + x2 = thread->fb_scissor[0] + thread->fb_scissor[2]; + y2 = thread->fb_scissor[1] + thread->fb_scissor[3]; if (y1 < miny1) y1 = miny1; if (y2 > maxy2) y2 = maxy2; w = x2 - x1; @@ -1157,7 +1159,6 @@ void DPSOFTRAST_GetPixelsBGRA(int blockx, int blocky, int blockwidth, int blockh int bx2 = blockx + blockwidth; int by2 = blocky + blockheight; int bw; - int bh; int x; int y; unsigned char *inpixels; @@ -1169,7 +1170,6 @@ void DPSOFTRAST_GetPixelsBGRA(int blockx, int blocky, int blockwidth, int blockh if (bx2 > dpsoftrast.fb_width) bx2 = dpsoftrast.fb_width; if (by2 > dpsoftrast.fb_height) by2 = dpsoftrast.fb_height; bw = bx2 - bx1; - bh = by2 - by1; inpixels = (unsigned char *)dpsoftrast.fb_colorpixels[0]; if (dpsoftrast.bigendian) { @@ -1223,8 +1223,7 @@ void DPSOFTRAST_CopyRectangleToTexture(int index, int mip, int tx, int ty, int s DPSOFTRAST_Texture *texture; texture = DPSOFTRAST_Texture_GetByIndex(index);if (!texture) return; if (mip < 0 || mip >= texture->mipmaps) return; - if (texture->binds) - DPSOFTRAST_Flush(); + DPSOFTRAST_Flush(); spixels = dpsoftrast.fb_colorpixels[0]; swidth = dpsoftrast.fb_width; sheight = dpsoftrast.fb_height; @@ -1365,7 +1364,7 @@ void DPSOFTRAST_UniformMatrix4fv(DPSOFTRAST_UNIFORM uniform, int arraysize, int { __m128 m0, m1, m2, m3; DPSOFTRAST_Command_UniformMatrix4f *command = DPSOFTRAST_ALLOCATECOMMAND(UniformMatrix4f); - command->index = index; + command->index = (DPSOFTRAST_UNIFORM)index; if (((size_t)v)&(ALIGN_SIZE-1)) { m0 = _mm_loadu_ps(v); @@ -1761,11 +1760,9 @@ static int DPSOFTRAST_Vertex_BoundY(int *starty, int *endy, __m128 minpos, __m12 *endy = _mm_cvttss_si32(minproj)+1; return clipmask; } -#endif static int DPSOFTRAST_Vertex_Project(float *out4f, float *screen4f, int *starty, int *endy, const float *in4f, int numitems) { -#ifdef SSE2_PRESENT float *end = out4f + numitems*4; __m128 viewportcenter = _mm_load_ps(dpsoftrast.fb_viewportcenter), viewportscale = _mm_load_ps(dpsoftrast.fb_viewportscale); __m128 minpos, maxpos; @@ -1808,12 +1805,10 @@ static int DPSOFTRAST_Vertex_Project(float *out4f, float *screen4f, int *starty, _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f), _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f)); return 0; -#endif } static int DPSOFTRAST_Vertex_TransformProject(float *out4f, float *screen4f, int *starty, int *endy, const float *in4f, int numitems, const float *inmatrix16f) { -#ifdef SSE2_PRESENT static const float identitymatrix[4][4] = {{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}}; __m128 m0, m1, m2, m3, viewportcenter, viewportscale, minpos, maxpos; float *end; @@ -1863,11 +1858,12 @@ static int DPSOFTRAST_Vertex_TransformProject(float *out4f, float *screen4f, int if (starty && endy) return DPSOFTRAST_Vertex_BoundY(starty, endy, minpos, maxpos, viewportcenter, viewportscale, m0, m1, m2, m3); return 0; -#endif } +#endif static float *DPSOFTRAST_Array_Load(int outarray, int inarray) { +#ifdef SSE2_PRESENT float *outf = dpsoftrast.post_array4f[outarray]; const unsigned char *inb; int firstvertex = dpsoftrast.firstvertex; @@ -1919,6 +1915,9 @@ static float *DPSOFTRAST_Array_Load(int outarray, int inarray) break; } return outf; +#else + return NULL; +#endif } static float *DPSOFTRAST_Array_Transform(int outarray, int inarray, const float *inmatrix16f) @@ -1931,17 +1930,25 @@ static float *DPSOFTRAST_Array_Transform(int outarray, int inarray, const float #if 0 static float *DPSOFTRAST_Array_Project(int outarray, int inarray) { +#ifdef SSE2_PRESENT float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray]; dpsoftrast.drawclipped = DPSOFTRAST_Vertex_Project(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices); return data; +#else + return NULL; +#endif } #endif static float *DPSOFTRAST_Array_TransformProject(int outarray, int inarray, const float *inmatrix16f) { +#ifdef SSE2_PRESENT float *data = inarray >= 0 ? DPSOFTRAST_Array_Load(outarray, inarray) : dpsoftrast.post_array4f[outarray]; dpsoftrast.drawclipped = DPSOFTRAST_Vertex_TransformProject(data, dpsoftrast.screencoord4f, &dpsoftrast.drawstarty, &dpsoftrast.drawendy, data, dpsoftrast.numvertices, inmatrix16f); return data; +#else + return NULL; +#endif } void DPSOFTRAST_Draw_Span_Begin(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, float *zf) @@ -2125,6 +2132,21 @@ void DPSOFTRAST_Draw_Span_Finish(DPSOFTRAST_State_Thread *thread, const DPSOFTRA pixel[x*4+3] = d[3]; } break; + case DPSOFTRAST_BLENDMODE_INVADD: + for (x = startx;x < endx;x++) + { + if (!pixelmask[x]) + continue; + d[0] = (int)((255.0f-pixel[x*4+2])*in4f[x*4+0] + pixel[x*4+2]);if (d[0] > 255) d[0] = 255; + d[1] = (int)((255.0f-pixel[x*4+1])*in4f[x*4+1] + pixel[x*4+1]);if (d[1] > 255) d[1] = 255; + d[2] = (int)((255.0f-pixel[x*4+0])*in4f[x*4+2] + pixel[x*4+0]);if (d[2] > 255) d[2] = 255; + d[3] = (int)((255.0f-pixel[x*4+3])*in4f[x*4+3] + pixel[x*4+3]);if (d[3] > 255) d[3] = 255; + pixel[x*4+0] = d[0]; + pixel[x*4+1] = d[1]; + pixel[x*4+2] = d[2]; + pixel[x*4+3] = d[3]; + } + break; } } @@ -2171,7 +2193,7 @@ void DPSOFTRAST_Draw_Span_FinishBGRA8(DPSOFTRAST_State_Thread *thread, const DPS break; case DPSOFTRAST_BLENDMODE_ALPHA: #define FINISHBLEND(blend2, blend1) \ - for (x = startx;x + 2 <= endx;x += 2) \ + for (x = startx;x + 1 < endx;x += 2) \ { \ __m128i src, dst; \ switch (*(const unsigned short*)&pixelmask[x]) \ @@ -2259,6 +2281,13 @@ void DPSOFTRAST_Draw_Span_FinishBGRA8(DPSOFTRAST_State_Thread *thread, const DPS dst = _mm_add_epi16(src, _mm_sub_epi16(dst, _mm_srli_epi16(_mm_mullo_epi16(dst, blend), 8))); }); break; + case DPSOFTRAST_BLENDMODE_INVADD: + FINISHBLEND({ + dst = _mm_add_epi16(dst, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(_mm_set1_epi16(255), dst), 4), _mm_slli_epi16(src, 4))); + }, { + dst = _mm_add_epi16(dst, _mm_mulhi_epi16(_mm_slli_epi16(_mm_sub_epi16(_mm_set1_epi16(255), dst), 4), _mm_slli_epi16(src, 4))); + }); + break; } #endif } @@ -2469,7 +2498,7 @@ void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(DPSOFTRAST_State_Thread *thread, // if no texture is bound, just fill it with white if (!texture) { - memset(out4ub + startx*4, 255, span->length*4); + memset(out4ub + startx*4, 255, (span->endx - span->startx)*4); return; } mip = triangle->mip[texunitindex]; @@ -2728,7 +2757,7 @@ void DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(DPSOFTRAST_State_Thread *thread, void DPSOFTRAST_Draw_Span_TextureCubeVaryingBGRA8(const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span, unsigned char * RESTRICT out4ub, int texunitindex, int arrayindex, const float * RESTRICT zf) { // TODO: IMPLEMENT - memset(out4ub, 255, span->length*4); + memset(out4ub + span->startx*4, 255, (span->startx - span->endx)*4); } float DPSOFTRAST_SampleShadowmap(const float *vector) @@ -3185,7 +3214,7 @@ void DPSOFTRAST_PixelShader_Depth_Or_Shadow(DPSOFTRAST_State_Thread *thread, con float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -3199,25 +3228,44 @@ void DPSOFTRAST_VertexShader_FlatColor(void) void DPSOFTRAST_PixelShader_FlatColor(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span) { +#ifdef SSE2_PRESENT + unsigned char * RESTRICT pixelmask = span->pixelmask; + unsigned char * RESTRICT pixel = (unsigned char *)dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4; int x, startx = span->startx, endx = span->endx; - int Color_Ambienti[4]; + __m128i Color_Ambientm; float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_texture_colorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; - Color_Ambienti[2] = (int)(thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+0]*256.0f); - Color_Ambienti[1] = (int)(thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+1]*256.0f); - Color_Ambienti[0] = (int)(thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4+2]*256.0f); - Color_Ambienti[3] = (int)(thread->uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0] *256.0f); DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); DPSOFTRAST_Draw_Span_Texture2DVaryingBGRA8(thread, triangle, span, buffer_texture_colorbgra8, GL20TU_COLOR, 2, buffer_z); + if (thread->alphatest || thread->fb_blendmode != DPSOFTRAST_BLENDMODE_OPAQUE) + pixel = buffer_FragColorbgra8; + Color_Ambientm = _mm_shuffle_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_load_ps(&thread->uniform4f[DPSOFTRAST_UNIFORM_Color_Ambient*4]), _mm_set1_ps(256.0f))), _MM_SHUFFLE(3, 0, 1, 2)); + Color_Ambientm = _mm_and_si128(Color_Ambientm, _mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); + Color_Ambientm = _mm_or_si128(Color_Ambientm, _mm_setr_epi32(0, 0, 0, (int)(thread->uniform4f[DPSOFTRAST_UNIFORM_Alpha*4+0]*255.0f))); + Color_Ambientm = _mm_packs_epi32(Color_Ambientm, Color_Ambientm); for (x = startx;x < endx;x++) { - buffer_FragColorbgra8[x*4+0] = (buffer_texture_colorbgra8[x*4+0] * Color_Ambienti[0])>>8; - buffer_FragColorbgra8[x*4+1] = (buffer_texture_colorbgra8[x*4+1] * Color_Ambienti[1])>>8; - buffer_FragColorbgra8[x*4+2] = (buffer_texture_colorbgra8[x*4+2] * Color_Ambienti[2])>>8; - buffer_FragColorbgra8[x*4+3] = (buffer_texture_colorbgra8[x*4+3] * Color_Ambienti[3])>>8; + __m128i color, pix; + if (x + 4 <= endx && *(const unsigned int *)&pixelmask[x] == 0x01010101) + { + __m128i pix2; + color = _mm_loadu_si128((const __m128i *)&buffer_texture_colorbgra8[x*4]); + pix = _mm_mulhi_epu16(Color_Ambientm, _mm_unpacklo_epi8(_mm_setzero_si128(), color)); + pix2 = _mm_mulhi_epu16(Color_Ambientm, _mm_unpackhi_epi8(_mm_setzero_si128(), color)); + _mm_storeu_si128((__m128i *)&pixel[x*4], _mm_packus_epi16(pix, pix2)); + x += 3; + continue; + } + if (!pixelmask[x]) + continue; + color = _mm_unpacklo_epi8(_mm_setzero_si128(), _mm_cvtsi32_si128(*(const int *)&buffer_texture_colorbgra8[x*4])); + pix = _mm_mulhi_epu16(Color_Ambientm, color); + *(int *)&pixel[x*4] = _mm_cvtsi128_si32(_mm_packus_epi16(pix, pix)); } - DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); + if (pixel == buffer_FragColorbgra8) + DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); +#endif } @@ -3407,7 +3455,7 @@ void DPSOFTRAST_PixelShader_FakeLight(DPSOFTRAST_State_Thread *thread, const DPS float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4090,7 +4138,7 @@ void DPSOFTRAST_PixelShader_Refraction(DPSOFTRAST_State_Thread *thread, const DP float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4108,7 +4156,7 @@ void DPSOFTRAST_PixelShader_Water(DPSOFTRAST_State_Thread *thread, const DPSOFTR float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4125,7 +4173,7 @@ void DPSOFTRAST_PixelShader_ShowDepth(DPSOFTRAST_State_Thread *thread, const DPS float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4142,7 +4190,7 @@ void DPSOFTRAST_PixelShader_DeferredGeometry(DPSOFTRAST_State_Thread *thread, co float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4159,7 +4207,24 @@ void DPSOFTRAST_PixelShader_DeferredLightSource(DPSOFTRAST_State_Thread *thread, float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); - memset(buffer_FragColorbgra8, 0, span->length*4); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); + DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); +} + + + +void DPSOFTRAST_VertexShader_DeferredBounceLight(void) +{ + DPSOFTRAST_Array_TransformProject(DPSOFTRAST_ARRAY_POSITION, DPSOFTRAST_ARRAY_POSITION, dpsoftrast.uniform4f + 4*DPSOFTRAST_UNIFORM_ModelViewProjectionMatrixM1); +} + +void DPSOFTRAST_PixelShader_DeferredBounceLight(DPSOFTRAST_State_Thread *thread, const DPSOFTRAST_State_Triangle * RESTRICT triangle, const DPSOFTRAST_State_Span * RESTRICT span) +{ + // TODO: IMPLEMENT + float buffer_z[DPSOFTRAST_DRAW_MAXSPANLENGTH]; + unsigned char buffer_FragColorbgra8[DPSOFTRAST_DRAW_MAXSPANLENGTH*4]; + DPSOFTRAST_Draw_Span_Begin(thread, triangle, span, buffer_z); + memset(buffer_FragColorbgra8 + span->startx*4, 0, (span->endx - span->startx)*4); DPSOFTRAST_Draw_Span_FinishBGRA8(thread, triangle, span, buffer_FragColorbgra8); } @@ -4192,7 +4257,8 @@ static const DPSOFTRAST_ShaderModeInfo DPSOFTRAST_ShaderModeTable[SHADERMODE_COU {2, DPSOFTRAST_VertexShader_Water, DPSOFTRAST_PixelShader_Water, {~0}}, {2, DPSOFTRAST_VertexShader_ShowDepth, DPSOFTRAST_PixelShader_ShowDepth, {~0}}, {2, DPSOFTRAST_VertexShader_DeferredGeometry, DPSOFTRAST_PixelShader_DeferredGeometry, {~0}}, - {2, DPSOFTRAST_VertexShader_DeferredLightSource, DPSOFTRAST_PixelShader_DeferredLightSource, {~0}} + {2, DPSOFTRAST_VertexShader_DeferredLightSource, DPSOFTRAST_PixelShader_DeferredLightSource, {~0}}, + {2, DPSOFTRAST_VertexShader_DeferredBounceLight, DPSOFTRAST_PixelShader_DeferredBounceLight, {~0}} }; void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread) @@ -4223,23 +4289,23 @@ void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread) depthslope = (int)(wslope*DPSOFTRAST_DEPTHSCALE); depth = (int)(w*DPSOFTRAST_DEPTHSCALE - DPSOFTRAST_DEPTHOFFSET*(thread->polygonoffset[1] + fabs(wslope)*thread->polygonoffset[0])); depthpixel = dpsoftrast.fb_depthpixels + span->y * dpsoftrast.fb_width + span->x; + startx = span->startx; + endx = span->endx; switch(thread->fb_depthfunc) { default: - case GL_ALWAYS: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = true; break; - case GL_LESS: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] < d; break; - case GL_LEQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] <= d; break; - case GL_EQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] == d; break; - case GL_GEQUAL: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] >= d; break; - case GL_GREATER: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = depthpixel[x] > d; break; - case GL_NEVER: for (x = 0, d = depth;x < span->length;x++, d += depthslope) pixelmask[x] = false; break; + case GL_ALWAYS: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = true; break; + case GL_LESS: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] < d; break; + case GL_LEQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] <= d; break; + case GL_EQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] == d; break; + case GL_GEQUAL: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] >= d; break; + case GL_GREATER: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = depthpixel[x] > d; break; + case GL_NEVER: for (x = startx, d = depth + depthslope*startx;x < endx;x++, d += depthslope) pixelmask[x] = false; break; } //colorpixel = dpsoftrast.fb_colorpixels[0] + (span->y * dpsoftrast.fb_width + span->x) * 4;; - //for (x = 0;x < span->length;x++) + //for (x = startx;x < endx;x++) // colorpixel[x] = (depthpixel[x] & 0xFF000000) ? (0x00FF0000) : (depthpixel[x] & 0x00FF0000); // if there is no color buffer, skip pixel shader - startx = 0; - endx = span->length; while (startx < endx && !pixelmask[startx]) startx++; while (endx > startx && !pixelmask[endx-1]) @@ -4265,10 +4331,8 @@ void DPSOFTRAST_Draw_ProcessSpans(DPSOFTRAST_State_Thread *thread) // if there is no color buffer, skip pixel shader if (dpsoftrast.fb_colorpixels[0] && thread->fb_colormask) { - memset(pixelmask, 1, span->length); + memset(pixelmask + span->startx, 1, span->endx - span->startx); span->pixelmask = pixelmask; - span->startx = 0; - span->endx = span->length; DPSOFTRAST_ShaderModeTable[thread->shader_mode].Span(thread, triangle, span); } } @@ -4282,11 +4346,8 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS { #ifdef SSE2_PRESENT int cullface = thread->cullface; - int width = dpsoftrast.fb_width; - int miny1 = thread->miny1; - int maxy1 = thread->maxy1; - int miny2 = thread->miny2; - int maxy2 = thread->maxy2; + int minx, maxx, miny, maxy; + int miny1, maxy1, miny2, maxy2; __m128i fbmin, fbmax; __m128 viewportcenter, viewportscale; int firstvertex = command->firstvertex; @@ -4310,6 +4371,13 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS __m128 screen[4]; DPSOFTRAST_State_Triangle *triangle; DPSOFTRAST_Texture *texture; + DPSOFTRAST_ValidateQuick(thread, DPSOFTRAST_VALIDATE_DRAW); + miny = thread->fb_scissor[1]; + maxy = thread->fb_scissor[1] + thread->fb_scissor[3]; + miny1 = bound(miny, thread->miny1, maxy); + maxy1 = bound(miny, thread->maxy1, maxy); + miny2 = bound(miny, thread->miny2, maxy); + maxy2 = bound(miny, thread->maxy2, maxy); if ((command->starty >= maxy1 || command->endy <= miny1) && (command->starty >= maxy2 || command->endy <= miny2)) { if (!ATOMIC_DECREMENT(command->refcount)) @@ -4319,9 +4387,10 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS } return; } - DPSOFTRAST_ValidateQuick(thread, DPSOFTRAST_VALIDATE_DRAW); - fbmin = _mm_setr_epi16(0, miny1, 0, miny1, 0, miny1, 0, miny1); - fbmax = _mm_sub_epi16(_mm_setr_epi16(width, maxy2, width, maxy2, width, maxy2, width, maxy2), _mm_set1_epi16(1)); + minx = thread->fb_scissor[0]; + maxx = thread->fb_scissor[0] + thread->fb_scissor[2]; + fbmin = _mm_setr_epi16(minx, miny1, minx, miny1, minx, miny1, minx, miny1); + fbmax = _mm_sub_epi16(_mm_setr_epi16(maxx, maxy2, maxx, maxy2, maxx, maxy2, maxx, maxy2), _mm_set1_epi16(1)); viewportcenter = _mm_load_ps(thread->fb_viewportcenter); viewportscale = _mm_load_ps(thread->fb_viewportscale); screen[3] = _mm_setzero_ps(); @@ -4625,38 +4694,38 @@ static void DPSOFTRAST_Interpret_Draw(DPSOFTRAST_State_Thread *thread, DPSOFTRAS ycc = _mm_min_epi16(ycc, _mm_shuffle_epi32(ycc, _MM_SHUFFLE(2, 3, 0, 1))); nexty = _mm_extract_epi16(ycc, 0); if (nexty >= bandy) nexty = bandy-1; - if (_mm_ucomigt_ss(_mm_max_ss(screen[edge0n], screen[edge0p]), _mm_min_ss(screen[edge1n], screen[edge1p]))) - { - int tmp = edge0n; - edge0n = edge1n; - edge1n = tmp; - tmp = edge0p; - edge0p = edge1p; - edge1p = tmp; - } xslope = _mm_sub_ps(_mm_movelh_ps(screen[edge0n], screen[edge1n]), _mm_movelh_ps(screen[edge0p], screen[edge1p])); xslope = _mm_div_ps(xslope, _mm_shuffle_ps(xslope, xslope, _MM_SHUFFLE(3, 3, 1, 1))); xcoords = _mm_add_ps(_mm_movelh_ps(screen[edge0p], screen[edge1p]), _mm_mul_ps(xslope, _mm_sub_ps(_mm_set1_ps(y), _mm_shuffle_ps(screen[edge0p], screen[edge1p], _MM_SHUFFLE(1, 1, 1, 1))))); xcoords = _mm_add_ps(xcoords, _mm_set1_ps(0.5f)); + if (_mm_ucomigt_ss(xcoords, _mm_shuffle_ps(xcoords, xcoords, _MM_SHUFFLE(1, 0, 3, 2)))) + { + xcoords = _mm_shuffle_ps(xcoords, xcoords, _MM_SHUFFLE(1, 0, 3, 2)); + xslope = _mm_shuffle_ps(xslope, xslope, _MM_SHUFFLE(1, 0, 3, 2)); + } for(; y <= nexty; y++, xcoords = _mm_add_ps(xcoords, xslope)) { int startx, endx, offset; startx = _mm_cvtss_si32(xcoords); endx = _mm_cvtss_si32(_mm_movehl_ps(xcoords, xcoords)); - if (startx < 0) startx = 0; - if (endx > dpsoftrast.fb_width) endx = dpsoftrast.fb_width; + if (startx < minx) + { + if (startx < 0) startx = 0; + startx += (minx-startx)&~(DPSOFTRAST_DRAW_MAXSPANLENGTH-1); + } + if (endx > maxx) endx = maxx; if (startx >= endx) continue; - for (offset = startx; offset < endx;) + for (offset = startx; offset < endx;offset += DPSOFTRAST_DRAW_MAXSPANLENGTH) { DPSOFTRAST_State_Span *span = &thread->spans[thread->numspans]; span->triangle = thread->numtriangles; span->x = offset; span->y = y; - span->length = endx - offset; - if (span -> length > DPSOFTRAST_DRAW_MAXSPANLENGTH) - span -> length = DPSOFTRAST_DRAW_MAXSPANLENGTH; - offset += span->length; + span->startx = max(minx - offset, 0); + span->endx = min(endx - offset, DPSOFTRAST_DRAW_MAXSPANLENGTH); + if (span->startx >= span->endx) + continue; if (++thread->numspans >= DPSOFTRAST_DRAW_MAXSPANS) DPSOFTRAST_Draw_ProcessSpans(thread); } @@ -4764,20 +4833,21 @@ void DPSOFTRAST_DrawTriangles(int firstvertex, int numvertices, int numtriangles command->clipped = dpsoftrast.drawclipped; command->refcount = dpsoftrast.numthreads; -#ifdef USE_THREADS - DPSOFTRAST_Draw_SyncCommands(); + if (dpsoftrast.usethreads) { int i; + DPSOFTRAST_Draw_SyncCommands(); for (i = 0; i < dpsoftrast.numthreads; i++) { DPSOFTRAST_State_Thread *thread = &dpsoftrast.threads[i]; if (((command->starty < thread->maxy1 && command->endy > thread->miny1) || (command->starty < thread->maxy2 && command->endy > thread->miny2)) && thread->starving) - SDL_CondSignal(thread->drawcond); + Thread_CondSignal(thread->drawcond); } } -#else - DPSOFTRAST_Draw_FlushThreads(); -#endif + else + { + DPSOFTRAST_Draw_FlushThreads(); + } } static void DPSOFTRAST_Draw_InterpretCommands(DPSOFTRAST_State_Thread *thread, int endoffset) @@ -4833,7 +4903,6 @@ static void DPSOFTRAST_Draw_InterpretCommands(DPSOFTRAST_State_Thread *thread, i thread->commandoffset = commandoffset; } -#ifdef USE_THREADS static int DPSOFTRAST_Draw_Thread(void *data) { DPSOFTRAST_State_Thread *thread = (DPSOFTRAST_State_Thread *)data; @@ -4845,58 +4914,62 @@ static int DPSOFTRAST_Draw_Thread(void *data) } else { - SDL_LockMutex(thread->drawmutex); + Thread_LockMutex(thread->drawmutex); if (thread->commandoffset == dpsoftrast.drawcommand && thread->index >= 0) { - if (thread->waiting) SDL_CondSignal(thread->waitcond); + if (thread->waiting) Thread_CondSignal(thread->waitcond); thread->starving = true; - SDL_CondWait(thread->drawcond, thread->drawmutex); + Thread_CondWait(thread->drawcond, thread->drawmutex); thread->starving = false; } - SDL_UnlockMutex(thread->drawmutex); + Thread_UnlockMutex(thread->drawmutex); } } return 0; } -#endif static void DPSOFTRAST_Draw_FlushThreads(void) { DPSOFTRAST_State_Thread *thread; int i; DPSOFTRAST_Draw_SyncCommands(); -#ifdef USE_THREADS - for (i = 0; i < dpsoftrast.numthreads; i++) + if (dpsoftrast.usethreads) { - thread = &dpsoftrast.threads[i]; - if (thread->commandoffset != dpsoftrast.drawcommand) + for (i = 0; i < dpsoftrast.numthreads; i++) { - SDL_LockMutex(thread->drawmutex); - if (thread->commandoffset != dpsoftrast.drawcommand && thread->starving) - SDL_CondSignal(thread->drawcond); - SDL_UnlockMutex(thread->drawmutex); + thread = &dpsoftrast.threads[i]; + if (thread->commandoffset != dpsoftrast.drawcommand) + { + Thread_LockMutex(thread->drawmutex); + if (thread->commandoffset != dpsoftrast.drawcommand && thread->starving) + Thread_CondSignal(thread->drawcond); + Thread_UnlockMutex(thread->drawmutex); + } } - } -#endif - for (i = 0; i < dpsoftrast.numthreads; i++) - { - thread = &dpsoftrast.threads[i]; -#ifdef USE_THREADS - if (thread->commandoffset != dpsoftrast.drawcommand) + for (i = 0; i < dpsoftrast.numthreads; i++) { - SDL_LockMutex(thread->drawmutex); + thread = &dpsoftrast.threads[i]; if (thread->commandoffset != dpsoftrast.drawcommand) { - thread->waiting = true; - SDL_CondWait(thread->waitcond, thread->drawmutex); - thread->waiting = false; + Thread_LockMutex(thread->drawmutex); + if (thread->commandoffset != dpsoftrast.drawcommand) + { + thread->waiting = true; + Thread_CondWait(thread->waitcond, thread->drawmutex); + thread->waiting = false; + } + Thread_UnlockMutex(thread->drawmutex); } - SDL_UnlockMutex(thread->drawmutex); } -#else - if (thread->commandoffset != dpsoftrast.drawcommand) - DPSOFTRAST_Draw_InterpretCommands(thread, dpsoftrast.drawcommand); -#endif + } + else + { + for (i = 0; i < dpsoftrast.numthreads; i++) + { + thread = &dpsoftrast.threads[i]; + if (thread->commandoffset != dpsoftrast.drawcommand) + DPSOFTRAST_Draw_InterpretCommands(thread, dpsoftrast.drawcommand); + } } dpsoftrast.commandpool.usedcommands = 0; } @@ -4911,7 +4984,7 @@ void DPSOFTRAST_Finish(void) DPSOFTRAST_Flush(); } -void DPSOFTRAST_Init(int width, int height, int numthreads, int interlace, unsigned int *colorpixels, unsigned int *depthpixels) +int DPSOFTRAST_Init(int width, int height, int numthreads, int interlace, unsigned int *colorpixels, unsigned int *depthpixels) { int i; union @@ -4942,12 +5015,9 @@ void DPSOFTRAST_Init(int width, int height, int numthreads, int interlace, unsig dpsoftrast.color[1] = 1; dpsoftrast.color[2] = 1; dpsoftrast.color[3] = 1; - dpsoftrast.interlace = bound(0, interlace, 1); -#ifdef USE_THREADS - dpsoftrast.numthreads = bound(1, numthreads, 64); -#else - dpsoftrast.numthreads = 1; -#endif + dpsoftrast.usethreads = numthreads > 0 && Thread_HasThreads(); + dpsoftrast.interlace = dpsoftrast.usethreads ? bound(0, interlace, 1) : 0; + dpsoftrast.numthreads = dpsoftrast.usethreads ? bound(1, numthreads, 64) : 1; dpsoftrast.threads = (DPSOFTRAST_State_Thread *)MM_CALLOC(dpsoftrast.numthreads, sizeof(DPSOFTRAST_State_Thread)); for (i = 0; i < dpsoftrast.numthreads; i++) { @@ -4997,41 +5067,40 @@ void DPSOFTRAST_Init(int width, int height, int numthreads, int interlace, unsig thread->commandoffset = 0; thread->waiting = false; thread->starving = false; -#ifdef USE_THREADS - thread->waitcond = SDL_CreateCond(); - thread->drawcond = SDL_CreateCond(); - thread->drawmutex = SDL_CreateMutex(); -#endif - + thread->validate = -1; DPSOFTRAST_Validate(thread, -1); -#ifdef USE_THREADS - thread->thread = SDL_CreateThread(DPSOFTRAST_Draw_Thread, thread); -#endif + + if (dpsoftrast.usethreads) + { + thread->waitcond = Thread_CreateCond(); + thread->drawcond = Thread_CreateCond(); + thread->drawmutex = Thread_CreateMutex(); + thread->thread = Thread_CreateThread(DPSOFTRAST_Draw_Thread, thread); + } } + return 0; } void DPSOFTRAST_Shutdown(void) { int i; -#ifdef USE_THREADS - if (dpsoftrast.numthreads > 0) + if (dpsoftrast.usethreads && dpsoftrast.numthreads > 0) { DPSOFTRAST_State_Thread *thread; for (i = 0; i < dpsoftrast.numthreads; i++) { thread = &dpsoftrast.threads[i]; - SDL_LockMutex(thread->drawmutex); + Thread_LockMutex(thread->drawmutex); thread->index = -1; - SDL_CondSignal(thread->drawcond); - SDL_UnlockMutex(thread->drawmutex); - SDL_WaitThread(thread->thread, NULL); - SDL_DestroyCond(thread->waitcond); - SDL_DestroyCond(thread->drawcond); - SDL_DestroyMutex(thread->drawmutex); + Thread_CondSignal(thread->drawcond); + Thread_UnlockMutex(thread->drawmutex); + Thread_WaitThread(thread->thread, 0); + Thread_DestroyCond(thread->waitcond); + Thread_DestroyCond(thread->drawcond); + Thread_DestroyMutex(thread->drawmutex); } } -#endif for (i = 0;i < dpsoftrast.texture_end;i++) if (dpsoftrast.texture[i].bytes) MM_FREE(dpsoftrast.texture[i].bytes);