]> git.xonotic.org Git - xonotic/darkplaces.git/blob - thread_sdl.c
Overhauled r_shadow_bouncegrid, it performs much faster, makes use of as many threads...
[xonotic/darkplaces.git] / thread_sdl.c
1 #include <SDL.h>
2 #include <SDL_thread.h>
3 #include "quakedef.h"
4 #include "thread.h"
5
6 cvar_t taskqueue_maxthreads = {CVAR_SAVE, "taskqueue_maxthreads", "32", "how many threads to use for executing tasks"};
7 cvar_t taskqueue_linkedlist = {CVAR_SAVE, "taskqueue_linkedlist", "1", "whether to use a doubly linked list or an array for the FIFO queue"};
8
9 typedef struct taskqueue_state_thread_s
10 {
11         void *handle;
12 }
13 taskqueue_state_thread_t;
14
15 typedef struct taskqueue_state_s
16 {
17         int numthreads;
18         taskqueue_state_thread_t threads[1024];
19
20         // we can enqueue this many tasks before execution of them must proceed
21         int queue_used;
22         int queue_max; // size of queue array
23         taskqueue_task_t **queue_tasks;
24
25         // command 
26         Thread_SpinLock command_lock;
27
28         volatile uint64_t threads_quit;
29
30         // doubly linked list - enqueue pushes to list.prev, dequeue pops from list.next
31         taskqueue_task_t list;
32 }
33 taskqueue_state_t;
34
35 static taskqueue_state_t taskqueue_state;
36
37 int Thread_Init(void)
38 {
39         Cvar_RegisterVariable(&taskqueue_maxthreads);
40         Cvar_RegisterVariable(&taskqueue_linkedlist);
41 #ifdef THREADDISABLE
42         Con_Printf("Threading disabled in this build\n");
43 #endif
44         // initialize the doubly-linked list header
45         taskqueue_state.list.next = &taskqueue_state.list;
46         taskqueue_state.list.prev = &taskqueue_state.list;
47         return 0;
48 }
49
50 void Thread_Shutdown(void)
51 {
52         if (taskqueue_state.numthreads)
53                 TaskQueue_Frame(true);
54         if (taskqueue_state.queue_tasks)
55                 Mem_Free(taskqueue_state.queue_tasks);
56         taskqueue_state.queue_tasks = NULL;
57 }
58
59 qboolean Thread_HasThreads(void)
60 {
61 #ifdef THREADDISABLE
62         return false;
63 #else
64         return true;
65 #endif
66 }
67
68 void *_Thread_CreateMutex(const char *filename, int fileline)
69 {
70         void *mutex = SDL_CreateMutex();
71 #ifdef THREADDEBUG
72         Sys_PrintfToTerminal("%p mutex create %s:%i\n" , mutex, filename, fileline);
73 #endif
74         return mutex;
75 }
76
77 void _Thread_DestroyMutex(void *mutex, const char *filename, int fileline)
78 {
79 #ifdef THREADDEBUG
80         Sys_PrintfToTerminal("%p mutex destroy %s:%i\n", mutex, filename, fileline);
81 #endif
82         SDL_DestroyMutex((SDL_mutex *)mutex);
83 }
84
85 int _Thread_LockMutex(void *mutex, const char *filename, int fileline)
86 {
87 #ifdef THREADDEBUG
88         Sys_PrintfToTerminal("%p mutex lock %s:%i\n"   , mutex, filename, fileline);
89 #endif
90         return SDL_LockMutex((SDL_mutex *)mutex);
91 }
92
93 int _Thread_UnlockMutex(void *mutex, const char *filename, int fileline)
94 {
95 #ifdef THREADDEBUG
96         Sys_PrintfToTerminal("%p mutex unlock %s:%i\n" , mutex, filename, fileline);
97 #endif
98         return SDL_UnlockMutex((SDL_mutex *)mutex);
99 }
100
101 void *_Thread_CreateCond(const char *filename, int fileline)
102 {
103         void *cond = (void *)SDL_CreateCond();
104 #ifdef THREADDEBUG
105         Sys_PrintfToTerminal("%p cond create %s:%i\n"   , cond, filename, fileline);
106 #endif
107         return cond;
108 }
109
110 void _Thread_DestroyCond(void *cond, const char *filename, int fileline)
111 {
112 #ifdef THREADDEBUG
113         Sys_PrintfToTerminal("%p cond destroy %s:%i\n"   , cond, filename, fileline);
114 #endif
115         SDL_DestroyCond((SDL_cond *)cond);
116 }
117
118 int _Thread_CondSignal(void *cond, const char *filename, int fileline)
119 {
120 #ifdef THREADDEBUG
121         Sys_PrintfToTerminal("%p cond signal %s:%i\n"   , cond, filename, fileline);
122 #endif
123         return SDL_CondSignal((SDL_cond *)cond);
124 }
125
126 int _Thread_CondBroadcast(void *cond, const char *filename, int fileline)
127 {
128 #ifdef THREADDEBUG
129         Sys_PrintfToTerminal("%p cond broadcast %s:%i\n"   , cond, filename, fileline);
130 #endif
131         return SDL_CondBroadcast((SDL_cond *)cond);
132 }
133
134 int _Thread_CondWait(void *cond, void *mutex, const char *filename, int fileline)
135 {
136 #ifdef THREADDEBUG
137         Sys_PrintfToTerminal("%p cond wait %s:%i\n"   , cond, filename, fileline);
138 #endif
139         return SDL_CondWait((SDL_cond *)cond, (SDL_mutex *)mutex);
140 }
141
142 void *_Thread_CreateThread(int (*fn)(void *), void *data, const char *filename, int fileline)
143 {
144         void *thread = (void *)SDL_CreateThread(fn, filename, data);
145 #ifdef THREADDEBUG
146         Sys_PrintfToTerminal("%p thread create %s:%i\n"   , thread, filename, fileline);
147 #endif
148         return thread;
149 }
150
151 int _Thread_WaitThread(void *thread, int retval, const char *filename, int fileline)
152 {
153         int status = retval;
154 #ifdef THREADDEBUG
155         Sys_PrintfToTerminal("%p thread wait %s:%i\n"   , thread, filename, fileline);
156 #endif
157         SDL_WaitThread((SDL_Thread *)thread, &status);
158         return status;
159 }
160
161 // standard barrier implementation using conds and mutexes
162 // see: http://www.howforge.com/implementing-barrier-in-pthreads
163 typedef struct {
164         unsigned int needed;
165         unsigned int called;
166         void *mutex;
167         void *cond;
168 } barrier_t;
169
170 void *_Thread_CreateBarrier(unsigned int count, const char *filename, int fileline)
171 {
172         volatile barrier_t *b = (volatile barrier_t *) Z_Malloc(sizeof(barrier_t));
173 #ifdef THREADDEBUG
174         Sys_PrintfToTerminal("%p barrier create(%d) %s:%i\n", b, count, filename, fileline);
175 #endif
176         b->needed = count;
177         b->called = 0;
178         b->mutex = Thread_CreateMutex();
179         b->cond = Thread_CreateCond();
180         return (void *) b;
181 }
182
183 void _Thread_DestroyBarrier(void *barrier, const char *filename, int fileline)
184 {
185         volatile barrier_t *b = (volatile barrier_t *) barrier;
186 #ifdef THREADDEBUG
187         Sys_PrintfToTerminal("%p barrier destroy %s:%i\n", b, filename, fileline);
188 #endif
189         Thread_DestroyMutex(b->mutex);
190         Thread_DestroyCond(b->cond);
191 }
192
193 void _Thread_WaitBarrier(void *barrier, const char *filename, int fileline)
194 {
195         volatile barrier_t *b = (volatile barrier_t *) barrier;
196 #ifdef THREADDEBUG
197         Sys_PrintfToTerminal("%p barrier wait %s:%i\n", b, filename, fileline);
198 #endif
199         Thread_LockMutex(b->mutex);
200         b->called++;
201         if (b->called == b->needed) {
202                 b->called = 0;
203                 Thread_CondBroadcast(b->cond);
204         } else {
205                 do {
206                         Thread_CondWait(b->cond, b->mutex);
207                 } while(b->called);
208         }
209         Thread_UnlockMutex(b->mutex);
210 }
211
212 int _Thread_AtomicGet(Thread_Atomic *a, const char *filename, int fileline)
213 {
214 #ifdef THREADDEBUG
215         Sys_PrintfToTerminal("%p atomic get at %s:%i\n", a, v, filename, fileline);
216 #endif
217         return SDL_AtomicGet((SDL_atomic_t *)a);
218 }
219
220 int _Thread_AtomicSet(Thread_Atomic *a, int v, const char *filename, int fileline)
221 {
222 #ifdef THREADDEBUG
223         Sys_PrintfToTerminal("%p atomic set %v at %s:%i\n", a, v, filename, fileline);
224 #endif
225         return SDL_AtomicSet((SDL_atomic_t *)a, v);
226 }
227
228 int _Thread_AtomicAdd(Thread_Atomic *a, int v, const char *filename, int fileline)
229 {
230 #ifdef THREADDEBUG
231         Sys_PrintfToTerminal("%p atomic add %v at %s:%i\n", a, v, filename, fileline);
232 #endif
233         return SDL_AtomicAdd((SDL_atomic_t *)a, v);
234 }
235
236 void _Thread_AtomicIncRef(Thread_Atomic *a, const char *filename, int fileline)
237 {
238 #ifdef THREADDEBUG
239         Sys_PrintfToTerminal("%p atomic incref %s:%i\n", lock, filename, fileline);
240 #endif
241         SDL_AtomicIncRef((SDL_atomic_t *)a);
242 }
243
244 qboolean _Thread_AtomicDecRef(Thread_Atomic *a, const char *filename, int fileline)
245 {
246 #ifdef THREADDEBUG
247         Sys_PrintfToTerminal("%p atomic decref %s:%i\n", lock, filename, fileline);
248 #endif
249         return SDL_AtomicDecRef((SDL_atomic_t *)a) != SDL_FALSE;
250 }
251
252 qboolean _Thread_AtomicTryLock(Thread_SpinLock *lock, const char *filename, int fileline)
253 {
254 #ifdef THREADDEBUG
255         Sys_PrintfToTerminal("%p atomic try lock %s:%i\n", lock, filename, fileline);
256 #endif
257         return SDL_AtomicTryLock(lock) != SDL_FALSE;
258 }
259
260 void _Thread_AtomicLock(Thread_SpinLock *lock, const char *filename, int fileline)
261 {
262 #ifdef THREADDEBUG
263         Sys_PrintfToTerminal("%p atomic lock %s:%i\n", lock, filename, fileline);
264 #endif
265         SDL_AtomicLock(lock);
266 }
267
268 void _Thread_AtomicUnlock(Thread_SpinLock *lock, const char *filename, int fileline)
269 {
270 #ifdef THREADDEBUG
271         Sys_PrintfToTerminal("%p atomic unlock %s:%i\n", lock, filename, fileline);
272 #endif
273         SDL_AtomicUnlock(lock);
274 }
275
276 static taskqueue_task_t *TaskQueue_GetPending(void)
277 {
278         taskqueue_task_t *t = NULL;
279         if (taskqueue_state.list.next != &taskqueue_state.list)
280         {
281                 // pop from list.next
282                 t = taskqueue_state.list.next;
283                 t->next->prev = t->prev;
284                 t->prev->next = t->next;
285                 t->prev = t->next = NULL;
286         }
287         if (t == NULL)
288         {
289                 if (taskqueue_state.queue_used > 0)
290                 {
291                         t = taskqueue_state.queue_tasks[0];
292                         taskqueue_state.queue_used--;
293                         memmove(taskqueue_state.queue_tasks, taskqueue_state.queue_tasks + 1, taskqueue_state.queue_used * sizeof(taskqueue_task_t *));
294                         taskqueue_state.queue_tasks[taskqueue_state.queue_used] = NULL;
295                 }
296         }
297         return t;
298 }
299
300 static void TaskQueue_ExecuteTask(taskqueue_task_t *t)
301 {
302         // see if t is waiting on something
303         if (t->preceding && t->preceding->done == 0)
304                 TaskQueue_Yield(t);
305         else
306                 t->func(t);
307 }
308
309 // FIXME: don't use mutex
310 // FIXME: this is basically fibers but less featureful - context switching for yield is not implemented
311 static int TaskQueue_ThreadFunc(void *d)
312 {
313         for (;;)
314         {
315                 qboolean quit;
316                 taskqueue_task_t *t = NULL;
317                 Thread_AtomicLock(&taskqueue_state.command_lock);
318                 quit = taskqueue_state.threads_quit != 0;
319                 t = TaskQueue_GetPending();
320                 Thread_AtomicUnlock(&taskqueue_state.command_lock);
321                 if (t)
322                         TaskQueue_ExecuteTask(t);
323                 else if (quit)
324                         break;
325         }
326         return 0;
327 }
328
329 void TaskQueue_Execute(qboolean force)
330 {
331         // if we have no threads to run the tasks, just start executing them now
332         if (taskqueue_state.numthreads == 0 || force)
333         {
334                 for (;;)
335                 {
336                         taskqueue_task_t *t = NULL;
337                         Thread_AtomicLock(&taskqueue_state.command_lock);
338                         t = TaskQueue_GetPending();
339                         Thread_AtomicUnlock(&taskqueue_state.command_lock);
340                         if (!t)
341                                 break;
342                         TaskQueue_ExecuteTask(t);
343                 }
344         }
345 }
346
347 void TaskQueue_Enqueue(int numtasks, taskqueue_task_t *tasks)
348 {
349         int i;
350         // try not to spinlock for a long time by breaking up large enqueues
351         while (numtasks > 64)
352         {
353                 TaskQueue_Enqueue(64, tasks);
354                 tasks += 64;
355                 numtasks -= 64;
356         }
357         Thread_AtomicLock(&taskqueue_state.command_lock);
358         for (i = 0; i < numtasks; i++)
359         {
360                 taskqueue_task_t *t = &tasks[i];
361                 if (taskqueue_linkedlist.integer)
362                 {
363                         // push to list.prev
364                         t->next = &taskqueue_state.list;
365                         t->prev = taskqueue_state.list.prev;
366                         t->next->prev = t;
367                         t->prev->next = t;
368                 }
369                 else
370                 {
371                         if (taskqueue_state.queue_used >= taskqueue_state.queue_max)
372                         {
373                                 taskqueue_state.queue_max *= 2;
374                                 if (taskqueue_state.queue_max < 1024)
375                                         taskqueue_state.queue_max = 1024;
376                                 taskqueue_state.queue_tasks = (taskqueue_task_t **)Mem_Realloc(cls.permanentmempool, taskqueue_state.queue_tasks, taskqueue_state.queue_max * sizeof(taskqueue_task_t *));
377                         }
378                         taskqueue_state.queue_tasks[taskqueue_state.queue_used++] = t;
379                 }
380         }
381         Thread_AtomicUnlock(&taskqueue_state.command_lock);
382 }
383
384 // if the task can not be completed due yet to preconditions, just enqueue it again...
385 void TaskQueue_Yield(taskqueue_task_t *t)
386 {
387         t->yieldcount++;
388         TaskQueue_Enqueue(1, t);
389 }
390
391 void TaskQueue_WaitForTaskDone(taskqueue_task_t *t)
392 {
393         qboolean done = false;
394         while (!done)
395         {
396                 Thread_AtomicLock(&taskqueue_state.command_lock);
397                 done = t->done != 0;
398                 Thread_AtomicUnlock(&taskqueue_state.command_lock);
399                 // if there are no threads, just execute the tasks immediately
400                 if (!done && taskqueue_state.numthreads == 0)
401                         TaskQueue_Execute(true);
402         }
403 }
404
405 void TaskQueue_Frame(qboolean shutdown)
406 {
407         int numthreads = shutdown ? 0 : bound(0, taskqueue_maxthreads.integer, sizeof(taskqueue_state.threads)/sizeof(taskqueue_state.threads[0]));
408 #ifdef THREADDISABLE
409         numthreads = 0;
410 #endif
411         if (taskqueue_state.numthreads != numthreads)
412         {
413                 int i;
414                 Thread_AtomicLock(&taskqueue_state.command_lock);
415                 taskqueue_state.threads_quit = 1;
416                 Thread_AtomicUnlock(&taskqueue_state.command_lock);
417                 for (i = 0; i < taskqueue_state.numthreads; i++)
418                 {
419                         if (taskqueue_state.threads[i].handle)
420                                 Thread_WaitThread(taskqueue_state.threads[i].handle, 0);
421                         taskqueue_state.threads[i].handle = NULL;
422                 }
423                 Thread_AtomicLock(&taskqueue_state.command_lock);
424                 taskqueue_state.threads_quit = 0;
425                 Thread_AtomicUnlock(&taskqueue_state.command_lock);
426                 taskqueue_state.numthreads = numthreads;
427                 for (i = 0; i < taskqueue_state.numthreads; i++)
428                         taskqueue_state.threads[i].handle = Thread_CreateThread(TaskQueue_ThreadFunc, &taskqueue_state.threads[i]);
429                 // if there are still pending tasks (e.g. no threads), execute them on main thread now
430                 TaskQueue_Execute(true);
431         }
432 }
433
434 void TaskQueue_Setup(taskqueue_task_t *t, taskqueue_task_t *preceding, void(*func)(taskqueue_task_t *), size_t i0, size_t i1, void *p0, void *p1)
435 {
436         memset(t, 0, sizeof(*t));
437         t->preceding = preceding;
438         t->func = func;
439         t->i[0] = i0;
440         t->i[1] = i1;
441         t->p[0] = p0;
442         t->p[1] = p1;
443 }
444
445 void TaskQueue_Task_CheckTasksDone(taskqueue_task_t *t)
446 {
447         size_t numtasks = t->i[0];
448         taskqueue_task_t *tasks = t->p[0];
449         while (numtasks > 0)
450         {
451                 // check the last task first as it's usually going to be the last to finish, so we do the least work by checking it first
452                 if (!tasks[numtasks - 1].done)
453                 {
454                         // update our partial progress, then yield to another pending task.
455                         t->i[0] = numtasks;
456                         TaskQueue_Yield(t);
457                         return;
458                 }
459                 numtasks--;
460         }
461         t->started = 1;
462         t->done = 1;
463 }