]> git.xonotic.org Git - xonotic/darkplaces.git/blobdiff - gl_rmain.c
changed r_refdef.stats struct to be an array indexed by r_stat_* enums
[xonotic/darkplaces.git] / gl_rmain.c
index 17dc8e5e489cb17a36cfff177e7724e6e78c038d..3b44beeb927026b874c3e019a881d3ea1493875a 100644 (file)
@@ -162,6 +162,7 @@ cvar_t r_viewscale_fpsscaling_stepsize = {CVAR_SAVE, "r_viewscale_fpsscaling_ste
 cvar_t r_viewscale_fpsscaling_stepmax = {CVAR_SAVE, "r_viewscale_fpsscaling_stepmax", "1.00", "largest adjustment to hit the target framerate (this value prevents wild overshooting of the estimate)"};
 cvar_t r_viewscale_fpsscaling_target = {CVAR_SAVE, "r_viewscale_fpsscaling_target", "70", "desired framerate"};
 
+cvar_t r_glsl_skeletal = {CVAR_SAVE, "r_glsl_skeletal", "1", "render skeletal models faster using a gpu-skinning technique"};
 cvar_t r_glsl_deluxemapping = {CVAR_SAVE, "r_glsl_deluxemapping", "1", "use per pixel lighting on deluxemap-compiled q3bsp maps (or a value of 2 forces deluxemap shading even without deluxemaps)"};
 cvar_t r_glsl_offsetmapping = {CVAR_SAVE, "r_glsl_offsetmapping", "0", "offset mapping effect (also known as parallax mapping or virtual displacement mapping)"};
 cvar_t r_glsl_offsetmapping_steps = {CVAR_SAVE, "r_glsl_offsetmapping_steps", "2", "offset mapping steps (note: too high values may be not supported by your GPU)"};
@@ -227,6 +228,7 @@ cvar_t r_test = {0, "r_test", "0", "internal development use only, leave it alon
 
 cvar_t r_batch_multidraw = {CVAR_SAVE, "r_batch_multidraw", "1", "issue multiple glDrawElements calls when rendering a batch of surfaces with the same texture (otherwise the index data is copied to make it one draw)"};
 cvar_t r_batch_multidraw_mintriangles = {CVAR_SAVE, "r_batch_multidraw_mintriangles", "0", "minimum number of triangles to activate multidraw path (copying small groups of triangles may be faster)"};
+cvar_t r_batch_debugdynamicvertexpath = {CVAR_SAVE, "r_batch_debugdynamicvertexpath", "0", "force the dynamic batching code path for debugging purposes"};
 
 cvar_t r_glsl_saturation = {CVAR_SAVE, "r_glsl_saturation", "1", "saturation multiplier (only working in glsl!)"};
 cvar_t r_glsl_saturation_redcompensate = {CVAR_SAVE, "r_glsl_saturation_redcompensate", "0", "a 'vampire sight' addition to desaturation effect, does compensation for red color, r_glsl_restart is required"};
@@ -600,13 +602,17 @@ static void R_BuildFogHeightTexture(void)
 
 //=======================================================================================================================================================
 
-static const char *builtinshaderstring =
+static const char *builtinshaderstrings[] =
+{
 #include "shader_glsl.h"
-;
+0
+};
 
-const char *builtinhlslshaderstring =
+const char *builtinhlslshaderstrings[] =
+{
 #include "shader_hlsl.h"
-;
+0
+};
 
 char *glslshaderstring = NULL;
 char *hlslshaderstring = NULL;
@@ -622,9 +628,7 @@ shaderpermutationinfo_t;
 
 typedef struct shadermodeinfo_s
 {
-       const char *vertexfilename;
-       const char *geometryfilename;
-       const char *fragmentfilename;
+       const char *filename;
        const char *pretext;
        const char *name;
 }
@@ -662,52 +666,51 @@ shaderpermutationinfo_t shaderpermutationinfo[SHADERPERMUTATION_COUNT] =
        {"#define USEBOUNCEGRIDDIRECTIONAL\n", " bouncegriddirectional"}, // TODO make this a static parm
        {"#define USETRIPPY\n", " trippy"},
        {"#define USEDEPTHRGB\n", " depthrgb"},
-       {"#define USEALPHAGENVERTEX\n", "alphagenvertex"}
+       {"#define USEALPHAGENVERTEX\n", " alphagenvertex"},
+       {"#define USESKELETAL\n", " skeletal"}
 };
 
 // NOTE: MUST MATCH ORDER OF SHADERMODE_* ENUMS!
 shadermodeinfo_t glslshadermodeinfo[SHADERMODE_COUNT] =
 {
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_GENERIC\n", " generic"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_POSTPROCESS\n", " postprocess"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_DEPTH_OR_SHADOW\n", " depth/shadow"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_FLATCOLOR\n", " flatcolor"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_VERTEXCOLOR\n", " vertexcolor"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_LIGHTMAP\n", " lightmap"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_FAKELIGHT\n", " fakelight"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_MODELSPACE\n", " lightdirectionmap_modelspace"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_TANGENTSPACE\n", " lightdirectionmap_tangentspace"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_LIGHTMAP\n", " lightdirectionmap_forced_lightmap"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_VERTEXCOLOR\n", " lightdirectionmap_forced_vertexcolor"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_LIGHTDIRECTION\n", " lightdirection"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_LIGHTSOURCE\n", " lightsource"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_REFRACTION\n", " refraction"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_WATER\n", " water"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_SHOWDEPTH\n", " showdepth"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_DEFERREDGEOMETRY\n", " deferredgeometry"},
-       {"glsl/default.glsl", NULL, "glsl/default.glsl", "#define MODE_DEFERREDLIGHTSOURCE\n", " deferredlightsource"},
+       {"glsl/default.glsl", "#define MODE_GENERIC\n", " generic"},
+       {"glsl/default.glsl", "#define MODE_POSTPROCESS\n", " postprocess"},
+       {"glsl/default.glsl", "#define MODE_DEPTH_OR_SHADOW\n", " depth/shadow"},
+       {"glsl/default.glsl", "#define MODE_FLATCOLOR\n", " flatcolor"},
+       {"glsl/default.glsl", "#define MODE_VERTEXCOLOR\n", " vertexcolor"},
+       {"glsl/default.glsl", "#define MODE_LIGHTMAP\n", " lightmap"},
+       {"glsl/default.glsl", "#define MODE_FAKELIGHT\n", " fakelight"},
+       {"glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_MODELSPACE\n", " lightdirectionmap_modelspace"},
+       {"glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_TANGENTSPACE\n", " lightdirectionmap_tangentspace"},
+       {"glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_LIGHTMAP\n", " lightdirectionmap_forced_lightmap"},
+       {"glsl/default.glsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_VERTEXCOLOR\n", " lightdirectionmap_forced_vertexcolor"},
+       {"glsl/default.glsl", "#define MODE_LIGHTDIRECTION\n", " lightdirection"},
+       {"glsl/default.glsl", "#define MODE_LIGHTSOURCE\n", " lightsource"},
+       {"glsl/default.glsl", "#define MODE_REFRACTION\n", " refraction"},
+       {"glsl/default.glsl", "#define MODE_WATER\n", " water"},
+       {"glsl/default.glsl", "#define MODE_DEFERREDGEOMETRY\n", " deferredgeometry"},
+       {"glsl/default.glsl", "#define MODE_DEFERREDLIGHTSOURCE\n", " deferredlightsource"},
 };
 
 shadermodeinfo_t hlslshadermodeinfo[SHADERMODE_COUNT] =
 {
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_GENERIC\n", " generic"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_POSTPROCESS\n", " postprocess"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_DEPTH_OR_SHADOW\n", " depth/shadow"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_FLATCOLOR\n", " flatcolor"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_VERTEXCOLOR\n", " vertexcolor"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_LIGHTMAP\n", " lightmap"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_FAKELIGHT\n", " fakelight"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_MODELSPACE\n", " lightdirectionmap_modelspace"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_TANGENTSPACE\n", " lightdirectionmap_tangentspace"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_LIGHTMAP\n", " lightdirectionmap_forced_lightmap"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_VERTEXCOLOR\n", " lightdirectionmap_forced_vertexcolor"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_LIGHTDIRECTION\n", " lightdirection"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_LIGHTSOURCE\n", " lightsource"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_REFRACTION\n", " refraction"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_WATER\n", " water"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_SHOWDEPTH\n", " showdepth"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_DEFERREDGEOMETRY\n", " deferredgeometry"},
-       {"hlsl/default.hlsl", NULL, "hlsl/default.hlsl", "#define MODE_DEFERREDLIGHTSOURCE\n", " deferredlightsource"},
+       {"hlsl/default.hlsl", "#define MODE_GENERIC\n", " generic"},
+       {"hlsl/default.hlsl", "#define MODE_POSTPROCESS\n", " postprocess"},
+       {"hlsl/default.hlsl", "#define MODE_DEPTH_OR_SHADOW\n", " depth/shadow"},
+       {"hlsl/default.hlsl", "#define MODE_FLATCOLOR\n", " flatcolor"},
+       {"hlsl/default.hlsl", "#define MODE_VERTEXCOLOR\n", " vertexcolor"},
+       {"hlsl/default.hlsl", "#define MODE_LIGHTMAP\n", " lightmap"},
+       {"hlsl/default.hlsl", "#define MODE_FAKELIGHT\n", " fakelight"},
+       {"hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_MODELSPACE\n", " lightdirectionmap_modelspace"},
+       {"hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_TANGENTSPACE\n", " lightdirectionmap_tangentspace"},
+       {"hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_LIGHTMAP\n", " lightdirectionmap_forced_lightmap"},
+       {"hlsl/default.hlsl", "#define MODE_LIGHTDIRECTIONMAP_FORCED_VERTEXCOLOR\n", " lightdirectionmap_forced_vertexcolor"},
+       {"hlsl/default.hlsl", "#define MODE_LIGHTDIRECTION\n", " lightdirection"},
+       {"hlsl/default.hlsl", "#define MODE_LIGHTSOURCE\n", " lightsource"},
+       {"hlsl/default.hlsl", "#define MODE_REFRACTION\n", " refraction"},
+       {"hlsl/default.hlsl", "#define MODE_WATER\n", " water"},
+       {"hlsl/default.hlsl", "#define MODE_DEFERREDGEOMETRY\n", " deferredgeometry"},
+       {"hlsl/default.hlsl", "#define MODE_DEFERREDLIGHTSOURCE\n", " deferredlightsource"},
 };
 
 struct r_glsl_permutation_s;
@@ -821,6 +824,7 @@ typedef struct r_glsl_permutation_s
        int loc_ShadowMap_Parameters;
        int loc_ShadowMap_TextureScale;
        int loc_SpecularPower;
+       int loc_Skeletal_Transform12;
        int loc_UserVec1;
        int loc_UserVec2;
        int loc_UserVec3;
@@ -971,25 +975,68 @@ static r_glsl_permutation_t *R_GLSL_FindPermutation(unsigned int mode, unsigned
        return p;
 }
 
-static char *R_GLSL_GetText(const char *filename, qboolean printfromdisknotice)
+static char *R_ShaderStrCat(const char **strings)
+{
+       char *string, *s;
+       const char **p = strings;
+       const char *t;
+       size_t len = 0;
+       for (p = strings;(t = *p);p++)
+               len += strlen(t);
+       len++;
+       s = string = (char *)Mem_Alloc(r_main_mempool, len);
+       len = 0;
+       for (p = strings;(t = *p);p++)
+       {
+               len = strlen(t);
+               memcpy(s, t, len);
+               s += len;
+       }
+       *s = 0;
+       return string;
+}
+
+static char *R_GetShaderText(const char *filename, qboolean printfromdisknotice, qboolean builtinonly)
 {
        char *shaderstring;
        if (!filename || !filename[0])
                return NULL;
+       // LordHavoc: note that FS_LoadFile appends a 0 byte to make it a valid string, so does R_ShaderStrCat
        if (!strcmp(filename, "glsl/default.glsl"))
        {
+               if (builtinonly)
+                       return R_ShaderStrCat(builtinshaderstrings);
                if (!glslshaderstring)
                {
                        glslshaderstring = (char *)FS_LoadFile(filename, r_main_mempool, false, NULL);
                        if (glslshaderstring)
                                Con_DPrintf("Loading shaders from file %s...\n", filename);
                        else
-                               glslshaderstring = (char *)builtinshaderstring;
+                               glslshaderstring = R_ShaderStrCat(builtinshaderstrings);
                }
                shaderstring = (char *) Mem_Alloc(r_main_mempool, strlen(glslshaderstring) + 1);
                memcpy(shaderstring, glslshaderstring, strlen(glslshaderstring) + 1);
                return shaderstring;
        }
+       if (!strcmp(filename, "hlsl/default.hlsl"))
+       {
+               if (builtinonly)
+                       return R_ShaderStrCat(builtinhlslshaderstrings);
+               if (!hlslshaderstring)
+               {
+                       hlslshaderstring = (char *)FS_LoadFile(filename, r_main_mempool, false, NULL);
+                       if (hlslshaderstring)
+                               Con_DPrintf("Loading shaders from file %s...\n", filename);
+                       else
+                               hlslshaderstring = R_ShaderStrCat(builtinhlslshaderstrings);
+               }
+               shaderstring = (char *) Mem_Alloc(r_main_mempool, strlen(hlslshaderstring) + 1);
+               memcpy(shaderstring, hlslshaderstring, strlen(hlslshaderstring) + 1);
+               return shaderstring;
+       }
+       // we don't have builtin strings for any other files
+       if (builtinonly)
+               return NULL;
        shaderstring = (char *)FS_LoadFile(filename, r_main_mempool, false, NULL);
        if (shaderstring)
        {
@@ -1005,7 +1052,7 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode
        int i;
        int sampler;
        shadermodeinfo_t *modeinfo = glslshadermodeinfo + mode;
-       char *vertexstring, *geometrystring, *fragmentstring;
+       char *sourcestring;
        char permutationname[256];
        int vertstrings_count = 0;
        int geomstrings_count = 0;
@@ -1020,11 +1067,9 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode
        p->program = 0;
 
        permutationname[0] = 0;
-       vertexstring   = R_GLSL_GetText(modeinfo->vertexfilename, true);
-       geometrystring = R_GLSL_GetText(modeinfo->geometryfilename, false);
-       fragmentstring = R_GLSL_GetText(modeinfo->fragmentfilename, false);
+       sourcestring  = R_GetShaderText(modeinfo->filename, true, false);
 
-       strlcat(permutationname, modeinfo->vertexfilename, sizeof(permutationname));
+       strlcat(permutationname, modeinfo->filename, sizeof(permutationname));
 
        // if we can do #version 130, we should (this improves quality of offset/reliefmapping thanks to textureGrad)
        if(vid.support.gl20shaders130)
@@ -1078,17 +1123,9 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode
        fragstrings_count += shaderstaticparms_count;
 
        // now append the shader text itself
-       vertstrings_list[vertstrings_count++] = vertexstring;
-       geomstrings_list[geomstrings_count++] = geometrystring;
-       fragstrings_list[fragstrings_count++] = fragmentstring;
-
-       // if any sources were NULL, clear the respective list
-       if (!vertexstring)
-               vertstrings_count = 0;
-       if (!geometrystring)
-               geomstrings_count = 0;
-       if (!fragmentstring)
-               fragstrings_count = 0;
+       vertstrings_list[vertstrings_count++] = sourcestring;
+       geomstrings_list[geomstrings_count++] = sourcestring;
+       fragstrings_list[fragstrings_count++] = sourcestring;
 
        // compile the shader program
        if (vertstrings_count + geomstrings_count + fragstrings_count)
@@ -1168,6 +1205,7 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode
                p->loc_ShadowMap_Parameters       = qglGetUniformLocation(p->program, "ShadowMap_Parameters");
                p->loc_ShadowMap_TextureScale     = qglGetUniformLocation(p->program, "ShadowMap_TextureScale");
                p->loc_SpecularPower              = qglGetUniformLocation(p->program, "SpecularPower");
+               p->loc_Skeletal_Transform12       = qglGetUniformLocation(p->program, "Skeletal_Transform12");
                p->loc_UserVec1                   = qglGetUniformLocation(p->program, "UserVec1");
                p->loc_UserVec2                   = qglGetUniformLocation(p->program, "UserVec2");
                p->loc_UserVec3                   = qglGetUniformLocation(p->program, "UserVec3");
@@ -1253,12 +1291,8 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode
                Con_Printf("^1GLSL shader %s failed!  some features may not work properly.\n", permutationname);
 
        // free the strings
-       if (vertexstring)
-               Mem_Free(vertexstring);
-       if (geometrystring)
-               Mem_Free(geometrystring);
-       if (fragmentstring)
-               Mem_Free(fragmentstring);
+       if (sourcestring)
+               Mem_Free(sourcestring);
 }
 
 static void R_SetupShader_SetPermutationGLSL(unsigned int mode, unsigned int permutation)
@@ -1290,7 +1324,7 @@ static void R_SetupShader_SetPermutationGLSL(unsigned int mode, unsigned int per
                                }
                                if (i >= SHADERPERMUTATION_COUNT)
                                {
-                                       //Con_Printf("Could not find a working OpenGL 2.0 shader for permutation %s %s\n", shadermodeinfo[mode].vertexfilename, shadermodeinfo[mode].pretext);
+                                       //Con_Printf("Could not find a working OpenGL 2.0 shader for permutation %s %s\n", shadermodeinfo[mode].filename, shadermodeinfo[mode].pretext);
                                        r_glsl_permutation = R_GLSL_FindPermutation(mode, permutation);
                                        qglUseProgram(0);CHECKGLERROR
                                        return; // no bit left to clear, entire mode is broken
@@ -1431,35 +1465,6 @@ static r_hlsl_permutation_t *R_HLSL_FindPermutation(unsigned int mode, unsigned
        return p;
 }
 
-static char *R_HLSL_GetText(const char *filename, qboolean printfromdisknotice)
-{
-       char *shaderstring;
-       if (!filename || !filename[0])
-               return NULL;
-       if (!strcmp(filename, "hlsl/default.hlsl"))
-       {
-               if (!hlslshaderstring)
-               {
-                       hlslshaderstring = (char *)FS_LoadFile(filename, r_main_mempool, false, NULL);
-                       if (hlslshaderstring)
-                               Con_DPrintf("Loading shaders from file %s...\n", filename);
-                       else
-                               hlslshaderstring = (char *)builtinhlslshaderstring;
-               }
-               shaderstring = (char *) Mem_Alloc(r_main_mempool, strlen(hlslshaderstring) + 1);
-               memcpy(shaderstring, hlslshaderstring, strlen(hlslshaderstring) + 1);
-               return shaderstring;
-       }
-       shaderstring = (char *)FS_LoadFile(filename, r_main_mempool, false, NULL);
-       if (shaderstring)
-       {
-               if (printfromdisknotice)
-                       Con_DPrintf("from disk %s... ", filename);
-               return shaderstring;
-       }
-       return shaderstring;
-}
-
 #include <d3dx9.h>
 //#include <d3dx9shader.h>
 //#include <d3dx9mesh.h>
@@ -1528,6 +1533,18 @@ static void R_HLSL_CacheShader(r_hlsl_permutation_t *p, const char *cachename, c
                        {"D3DXCompileShader",                   (void **) &qD3DXCompileShader},
                        {NULL, NULL}
                };
+               // LordHavoc: the June 2010 SDK lacks these macros to make ID3DXBuffer usable in C, and to make it work in both C and C++ the macros are needed...
+#ifndef ID3DXBuffer_GetBufferPointer
+#if !defined(__cplusplus) || defined(CINTERFACE)
+#define ID3DXBuffer_GetBufferPointer(p)   (p)->lpVtbl->GetBufferPointer(p)
+#define ID3DXBuffer_GetBufferSize(p)      (p)->lpVtbl->GetBufferSize(p)
+#define ID3DXBuffer_Release(p)            (p)->lpVtbl->Release(p)
+#else
+#define ID3DXBuffer_GetBufferPointer(p)   (p)->GetBufferPointer()
+#define ID3DXBuffer_GetBufferSize(p)      (p)->GetBufferSize()
+#define ID3DXBuffer_Release(p)            (p)->Release()
+#endif
+#endif
                if (Sys_LoadLibrary(dllnames_d3dx9, &d3dx9_dll, d3dx9_dllfuncs))
                {
                        DWORD shaderflags = 0;
@@ -1608,7 +1625,7 @@ static void R_HLSL_CompilePermutation(r_hlsl_permutation_t *p, unsigned int mode
        int geomstring_length = 0;
        int fragstring_length = 0;
        char *t;
-       char *vertexstring, *geometrystring, *fragmentstring;
+       char *sourcestring;
        char *vertstring, *geomstring, *fragstring;
        char permutationname[256];
        char cachename[256];
@@ -1627,11 +1644,9 @@ static void R_HLSL_CompilePermutation(r_hlsl_permutation_t *p, unsigned int mode
 
        permutationname[0] = 0;
        cachename[0] = 0;
-       vertexstring   = R_HLSL_GetText(modeinfo->vertexfilename, true);
-       geometrystring = R_HLSL_GetText(modeinfo->geometryfilename, false);
-       fragmentstring = R_HLSL_GetText(modeinfo->fragmentfilename, false);
+       sourcestring = R_GetShaderText(modeinfo->filename, true, false);
 
-       strlcat(permutationname, modeinfo->vertexfilename, sizeof(permutationname));
+       strlcat(permutationname, modeinfo->filename, sizeof(permutationname));
        strlcat(cachename, "hlsl/", sizeof(cachename));
 
        // define HLSL so that the shader can tell apart the HLSL compiler and the Cg compiler
@@ -1690,17 +1705,9 @@ static void R_HLSL_CompilePermutation(r_hlsl_permutation_t *p, unsigned int mode
                        cachename[i] = '_';
 
        // now append the shader text itself
-       vertstrings_list[vertstrings_count++] = vertexstring;
-       geomstrings_list[geomstrings_count++] = geometrystring;
-       fragstrings_list[fragstrings_count++] = fragmentstring;
-
-       // if any sources were NULL, clear the respective list
-       if (!vertexstring)
-               vertstrings_count = 0;
-       if (!geometrystring)
-               geomstrings_count = 0;
-       if (!fragmentstring)
-               fragstrings_count = 0;
+       vertstrings_list[vertstrings_count++] = sourcestring;
+       geomstrings_list[geomstrings_count++] = sourcestring;
+       fragstrings_list[fragstrings_count++] = sourcestring;
 
        vertstring_length = 0;
        for (i = 0;i < vertstrings_count;i++)
@@ -1738,12 +1745,8 @@ static void R_HLSL_CompilePermutation(r_hlsl_permutation_t *p, unsigned int mode
                Mem_Free(geomstring);
        if (fragstring)
                Mem_Free(fragstring);
-       if (vertexstring)
-               Mem_Free(vertexstring);
-       if (geometrystring)
-               Mem_Free(geometrystring);
-       if (fragmentstring)
-               Mem_Free(fragmentstring);
+       if (sourcestring)
+               Mem_Free(sourcestring);
 }
 
 static inline void hlslVSSetParameter16f(D3DVSREGISTER_t r, const float *a) {IDirect3DDevice9_SetVertexShaderConstantF(vid_d3d9dev, r, a, 4);}
@@ -1789,7 +1792,7 @@ void R_SetupShader_SetPermutationHLSL(unsigned int mode, unsigned int permutatio
                                }
                                if (i >= SHADERPERMUTATION_COUNT)
                                {
-                                       //Con_Printf("Could not find a working HLSL shader for permutation %s %s\n", shadermodeinfo[mode].vertexfilename, shadermodeinfo[mode].pretext);
+                                       //Con_Printf("Could not find a working HLSL shader for permutation %s %s\n", shadermodeinfo[mode].filename, shadermodeinfo[mode].pretext);
                                        r_hlsl_permutation = R_HLSL_FindPermutation(mode, permutation);
                                        return; // no bit left to clear, entire mode is broken
                                }
@@ -1815,10 +1818,10 @@ static void R_SetupShader_SetPermutationSoft(unsigned int mode, unsigned int per
 void R_GLSL_Restart_f(void)
 {
        unsigned int i, limit;
-       if (glslshaderstring && glslshaderstring != builtinshaderstring)
+       if (glslshaderstring)
                Mem_Free(glslshaderstring);
        glslshaderstring = NULL;
-       if (hlslshaderstring && hlslshaderstring != builtinhlslshaderstring)
+       if (hlslshaderstring)
                Mem_Free(hlslshaderstring);
        hlslshaderstring = NULL;
        switch(vid.renderpath)
@@ -1878,42 +1881,44 @@ void R_GLSL_Restart_f(void)
 
 static void R_GLSL_DumpShader_f(void)
 {
-       int i;
+       int i, language, mode, dupe;
+       char *text;
+       shadermodeinfo_t *modeinfo;
        qfile_t *file;
 
-       file = FS_OpenRealFile("glsl/default.glsl", "w", false);
-       if (file)
+       for (language = 0;language < 2;language++)
        {
-               FS_Print(file, "/* The engine may define the following macros:\n");
-               FS_Print(file, "#define VERTEX_SHADER\n#define GEOMETRY_SHADER\n#define FRAGMENT_SHADER\n");
-               for (i = 0;i < SHADERMODE_COUNT;i++)
-                       FS_Print(file, glslshadermodeinfo[i].pretext);
-               for (i = 0;i < SHADERPERMUTATION_COUNT;i++)
-                       FS_Print(file, shaderpermutationinfo[i].pretext);
-               FS_Print(file, "*/\n");
-               FS_Print(file, builtinshaderstring);
-               FS_Close(file);
-               Con_Printf("glsl/default.glsl written\n");
-       }
-       else
-               Con_Printf("failed to write to glsl/default.glsl\n");
-
-       file = FS_OpenRealFile("hlsl/default.hlsl", "w", false);
-       if (file)
-       {
-               FS_Print(file, "/* The engine may define the following macros:\n");
-               FS_Print(file, "#define VERTEX_SHADER\n#define GEOMETRY_SHADER\n#define FRAGMENT_SHADER\n");
-               for (i = 0;i < SHADERMODE_COUNT;i++)
-                       FS_Print(file, hlslshadermodeinfo[i].pretext);
-               for (i = 0;i < SHADERPERMUTATION_COUNT;i++)
-                       FS_Print(file, shaderpermutationinfo[i].pretext);
-               FS_Print(file, "*/\n");
-               FS_Print(file, builtinhlslshaderstring);
-               FS_Close(file);
-               Con_Printf("hlsl/default.hlsl written\n");
+               modeinfo = (language == 0 ? glslshadermodeinfo : hlslshadermodeinfo);
+               for (mode = 0;mode < SHADERMODE_COUNT;mode++)
+               {
+                       // don't dump the same file multiple times (most or all shaders come from the same file)
+                       for (dupe = mode - 1;dupe >= 0;dupe--)
+                               if (!strcmp(modeinfo[mode].filename, modeinfo[dupe].filename))
+                                       break;
+                       if (dupe >= 0)
+                               continue;
+                       text = R_GetShaderText(modeinfo[mode].filename, false, true);
+                       if (!text)
+                               continue;
+                       file = FS_OpenRealFile(modeinfo[mode].filename, "w", false);
+                       if (file)
+                       {
+                               FS_Print(file, "/* The engine may define the following macros:\n");
+                               FS_Print(file, "#define VERTEX_SHADER\n#define GEOMETRY_SHADER\n#define FRAGMENT_SHADER\n");
+                               for (i = 0;i < SHADERMODE_COUNT;i++)
+                                       FS_Print(file, modeinfo[i].pretext);
+                               for (i = 0;i < SHADERPERMUTATION_COUNT;i++)
+                                       FS_Print(file, shaderpermutationinfo[i].pretext);
+                               FS_Print(file, "*/\n");
+                               FS_Print(file, text);
+                               FS_Close(file);
+                               Con_Printf("%s written\n", modeinfo[mode].filename);
+                       }
+                       else
+                               Con_Printf("failed to write to %s\n", modeinfo[mode].filename);
+                       Mem_Free(text);
+               }
        }
-       else
-               Con_Printf("failed to write to hlsl/default.hlsl\n");
 }
 
 void R_SetupShader_Generic(rtexture_t *first, rtexture_t *second, int texturemode, int rgbscale, qboolean usegamma, qboolean notrippy, qboolean suppresstexalpha)
@@ -1989,13 +1994,16 @@ void R_SetupShader_Generic_NoTexture(qboolean usegamma, qboolean notrippy)
        R_SetupShader_Generic(NULL, NULL, GL_MODULATE, 1, usegamma, notrippy, false);
 }
 
-void R_SetupShader_DepthOrShadow(qboolean notrippy, qboolean depthrgb)
+void R_SetupShader_DepthOrShadow(qboolean notrippy, qboolean depthrgb, qboolean skeletal)
 {
        unsigned int permutation = 0;
        if (r_trippy.integer && !notrippy)
                permutation |= SHADERPERMUTATION_TRIPPY;
        if (depthrgb)
                permutation |= SHADERPERMUTATION_DEPTHRGB;
+       if (skeletal)
+               permutation |= SHADERPERMUTATION_SKELETAL;
+
        if (vid.allowalphatocoverage)
                GL_AlphaToCoverage(false);
        switch (vid.renderpath)
@@ -2029,41 +2037,6 @@ void R_SetupShader_DepthOrShadow(qboolean notrippy, qboolean depthrgb)
        }
 }
 
-void R_SetupShader_ShowDepth(qboolean notrippy)
-{
-       int permutation = 0;
-       if (r_trippy.integer && !notrippy)
-               permutation |= SHADERPERMUTATION_TRIPPY;
-       if (vid.allowalphatocoverage)
-               GL_AlphaToCoverage(false);
-       switch (vid.renderpath)
-       {
-       case RENDERPATH_D3D9:
-#ifdef SUPPORTHLSL
-               R_SetupShader_SetPermutationHLSL(SHADERMODE_SHOWDEPTH, permutation);
-#endif
-               break;
-       case RENDERPATH_D3D10:
-               Con_DPrintf("FIXME D3D10 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__);
-               break;
-       case RENDERPATH_D3D11:
-               Con_DPrintf("FIXME D3D11 %s:%i %s\n", __FILE__, __LINE__, __FUNCTION__);
-               break;
-       case RENDERPATH_GL20:
-       case RENDERPATH_GLES2:
-               R_SetupShader_SetPermutationGLSL(SHADERMODE_SHOWDEPTH, permutation);
-               break;
-       case RENDERPATH_GL13:
-       case RENDERPATH_GLES1:
-               break;
-       case RENDERPATH_GL11:
-               break;
-       case RENDERPATH_SOFT:
-               R_SetupShader_SetPermutationSoft(SHADERMODE_SHOWDEPTH, permutation);
-               break;
-       }
-}
-
 extern qboolean r_shadow_usingdeferredprepass;
 extern rtexture_t *r_shadow_attenuationgradienttexture;
 extern rtexture_t *r_shadow_attenuation2dtexture;
@@ -2693,12 +2666,18 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
                        R_Mesh_TexCoordPointer(2, 3, GL_FLOAT, sizeof(float[3]), rsurface.batchtvector3f, rsurface.batchtvector3f_vertexbuffer, rsurface.batchtvector3f_bufferoffset);
                        R_Mesh_TexCoordPointer(3, 3, GL_FLOAT, sizeof(float[3]), rsurface.batchnormal3f, rsurface.batchnormal3f_vertexbuffer, rsurface.batchnormal3f_bufferoffset);
                        R_Mesh_TexCoordPointer(4, 2, GL_FLOAT, sizeof(float[2]), rsurface.batchtexcoordlightmap2f, rsurface.batchtexcoordlightmap2f_vertexbuffer, rsurface.batchtexcoordlightmap2f_bufferoffset);
+                       R_Mesh_TexCoordPointer(5, 2, GL_FLOAT, sizeof(float[2]), NULL, NULL, 0);
+                       R_Mesh_TexCoordPointer(6, 4, GL_UNSIGNED_BYTE | 0x80000000, sizeof(unsigned char[4]), rsurface.batchskeletalindex4ub, rsurface.batchskeletalindex4ub_vertexbuffer, rsurface.batchskeletalindex4ub_bufferoffset);
+                       R_Mesh_TexCoordPointer(7, 4, GL_UNSIGNED_BYTE, sizeof(unsigned char[4]), rsurface.batchskeletalweight4ub, rsurface.batchskeletalweight4ub_vertexbuffer, rsurface.batchskeletalweight4ub_bufferoffset);
                }
                else
                {
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_VERTEXMESH_VERTEXCOLOR : 0) | BATCHNEED_VERTEXMESH_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_VERTEXMESH_LIGHTMAP : 0) | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_VERTEXMESH_VERTEXCOLOR : 0) | BATCHNEED_VERTEXMESH_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_VERTEXMESH_LIGHTMAP : 0) | (rsurface.entityskeletaltransform3x4 ? BATCHNEED_VERTEXMESH_SKELETAL : 0) | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                        R_Mesh_PrepareVertices_Mesh(rsurface.batchnumvertices, rsurface.batchvertexmesh, rsurface.batchvertexmeshbuffer);
                }
+               // this has to be after RSurf_PrepareVerticesForBatch
+               if (rsurface.batchskeletaltransform3x4)
+                       permutation |= SHADERPERMUTATION_SKELETAL;
                R_SetupShader_SetPermutationGLSL(mode, permutation);
                if (r_glsl_permutation->loc_ModelToReflectCube >= 0) {Matrix4x4_ToArrayFloatGL(&rsurface.matrix, m16f);qglUniformMatrix4fv(r_glsl_permutation->loc_ModelToReflectCube, 1, false, m16f);}
                if (mode == SHADERMODE_LIGHTSOURCE)
@@ -2840,6 +2819,8 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
                        }
                }
                if (r_glsl_permutation->tex_Texture_BounceGrid  >= 0) R_Mesh_TexBind(r_glsl_permutation->tex_Texture_BounceGrid, r_shadow_bouncegridtexture);
+               if (r_glsl_permutation->loc_Skeletal_Transform12 >= 0 && rsurface.batchskeletalnumtransforms > 0)
+                       qglUniform4fv(r_glsl_permutation->loc_Skeletal_Transform12, rsurface.batchskeletalnumtransforms*3, rsurface.batchskeletaltransform3x4);
                CHECKGLERROR
                break;
        case RENDERPATH_GL11:
@@ -4316,6 +4297,8 @@ void GL_Main_Init(void)
        Cvar_RegisterVariable(&r_test);
        Cvar_RegisterVariable(&r_batch_multidraw);
        Cvar_RegisterVariable(&r_batch_multidraw_mintriangles);
+       Cvar_RegisterVariable(&r_batch_debugdynamicvertexpath);
+       Cvar_RegisterVariable(&r_glsl_skeletal);
        Cvar_RegisterVariable(&r_glsl_saturation);
        Cvar_RegisterVariable(&r_glsl_saturation_redcompensate);
        Cvar_RegisterVariable(&r_glsl_vertextextureblend_usebothalphas);
@@ -4560,8 +4543,8 @@ void *R_FrameData_Alloc(size_t size)
        r_framedata_mem->current += size;
 
        // count the usage for stats
-       r_refdef.stats.framedatacurrent = max(r_refdef.stats.framedatacurrent, (int)r_framedata_mem->current);
-       r_refdef.stats.framedatasize = max(r_refdef.stats.framedatasize, (int)r_framedata_mem->size);
+       r_refdef.stats[r_stat_framedatacurrent] = max(r_refdef.stats[r_stat_framedatacurrent], (int)r_framedata_mem->current);
+       r_refdef.stats[r_stat_framedatasize] = max(r_refdef.stats[r_stat_framedatasize], (int)r_framedata_mem->size);
 
        return (void *)data;
 }
@@ -4616,6 +4599,7 @@ void R_AnimCache_ClearCache(void)
                ent->animcache_vertexmesh = NULL;
                ent->animcache_vertex3fbuffer = NULL;
                ent->animcache_vertexmeshbuffer = NULL;
+               ent->animcache_skeletaltransform3x4 = NULL;
        }
 }
 
@@ -4632,6 +4616,9 @@ static void R_AnimCache_UpdateEntityMeshBuffers(entity_render_t *ent, int numver
        // TODO: upload vertex3f buffer?
        if (ent->animcache_vertexmesh)
        {
+               r_refdef.stats[r_stat_animcache_vertexmesh_count] += 1;
+               r_refdef.stats[r_stat_animcache_vertexmesh_vertices] += numvertices;
+               r_refdef.stats[r_stat_animcache_vertexmesh_maxvertices] = max(r_refdef.stats[r_stat_animcache_vertexmesh_maxvertices], numvertices);
                memcpy(ent->animcache_vertexmesh, ent->model->surfmesh.vertexmesh, sizeof(r_vertexmesh_t)*numvertices);
                for (i = 0;i < numvertices;i++)
                        memcpy(ent->animcache_vertexmesh[i].vertex3f, ent->animcache_vertex3f + 3*i, sizeof(float[3]));
@@ -4652,6 +4639,109 @@ qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qbool
 {
        dp_model_t *model = ent->model;
        int numvertices;
+
+       // cache skeletal animation data first (primarily for gpu-skinning)
+       if (!ent->animcache_skeletaltransform3x4 && model->num_bones > 0 && model->surfmesh.data_skeletalindex4ub)
+       {
+               int i;
+               int blends;
+               const skeleton_t *skeleton = ent->skeleton;
+               const frameblend_t *frameblend = ent->frameblend;
+               float *boneposerelative;
+               float m[12];
+               static float bonepose[256][12];
+               r_refdef.stats[r_stat_animcache_skeletal_count] += 1;
+               r_refdef.stats[r_stat_animcache_skeletal_bones] += model->num_bones;
+               r_refdef.stats[r_stat_animcache_skeletal_maxbones] = max(r_refdef.stats[r_stat_animcache_skeletal_maxbones], model->num_bones);
+               ent->animcache_skeletaltransform3x4 = (float *)R_FrameData_Alloc(sizeof(float[3][4]) * model->num_bones);
+               boneposerelative = ent->animcache_skeletaltransform3x4;
+               if (skeleton && !skeleton->relativetransforms)
+                       skeleton = NULL;
+               // resolve hierarchy and make relative transforms (deforms) which the shader wants
+               if (skeleton)
+               {
+                       for (i = 0;i < model->num_bones;i++)
+                       {
+                               Matrix4x4_ToArray12FloatD3D(&skeleton->relativetransforms[i], m);
+                               if (model->data_bones[i].parent >= 0)
+                                       R_ConcatTransforms(bonepose[model->data_bones[i].parent], m, bonepose[i]);
+                               else
+                                       memcpy(bonepose[i], m, sizeof(m));
+
+                               // create a relative deformation matrix to describe displacement
+                               // from the base mesh, which is used by the actual weighting
+                               R_ConcatTransforms(bonepose[i], model->data_baseboneposeinverse + i * 12, boneposerelative + i * 12);
+                       }
+               }
+               else
+               {
+                       for (i = 0;i < model->num_bones;i++)
+                       {
+                               const short * RESTRICT pose7s = model->data_poses7s + 7 * (frameblend[0].subframe * model->num_bones + i);
+                               float lerp = frameblend[0].lerp,
+                                       tx = pose7s[0], ty = pose7s[1], tz = pose7s[2],
+                                       rx = pose7s[3] * lerp,
+                                       ry = pose7s[4] * lerp,
+                                       rz = pose7s[5] * lerp,
+                                       rw = pose7s[6] * lerp,
+                                       dx = tx*rw + ty*rz - tz*ry,
+                                       dy = -tx*rz + ty*rw + tz*rx,
+                                       dz = tx*ry - ty*rx + tz*rw,
+                                       dw = -tx*rx - ty*ry - tz*rz,
+                                       scale, sx, sy, sz, sw;
+                               for (blends = 1;blends < MAX_FRAMEBLENDS && frameblend[blends].lerp > 0;blends++)
+                               {
+                                       const short * RESTRICT pose7s = model->data_poses7s + 7 * (frameblend[blends].subframe * model->num_bones + i);
+                                       float lerp = frameblend[blends].lerp,
+                                               tx = pose7s[0], ty = pose7s[1], tz = pose7s[2],
+                                               qx = pose7s[3], qy = pose7s[4], qz = pose7s[5], qw = pose7s[6];
+                                       if(rx*qx + ry*qy + rz*qz + rw*qw < 0) lerp = -lerp;
+                                       qx *= lerp;
+                                       qy *= lerp;
+                                       qz *= lerp;
+                                       qw *= lerp;
+                                       rx += qx;
+                                       ry += qy;
+                                       rz += qz;
+                                       rw += qw;
+                                       dx += tx*qw + ty*qz - tz*qy;
+                                       dy += -tx*qz + ty*qw + tz*qx;
+                                       dz += tx*qy - ty*qx + tz*qw;
+                                       dw += -tx*qx - ty*qy - tz*qz;
+                               }
+                               scale = 1.0f / (rx*rx + ry*ry + rz*rz + rw*rw);
+                               sx = rx * scale;
+                               sy = ry * scale;
+                               sz = rz * scale;
+                               sw = rw * scale;
+                               m[0] = sw*rw + sx*rx - sy*ry - sz*rz;
+                               m[1] = 2*(sx*ry - sw*rz);
+                               m[2] = 2*(sx*rz + sw*ry);
+                               m[3] = model->num_posescale*(dx*sw - dy*sz + dz*sy - dw*sx);
+                               m[4] = 2*(sx*ry + sw*rz);
+                               m[5] = sw*rw + sy*ry - sx*rx - sz*rz;
+                               m[6] = 2*(sy*rz - sw*rx);
+                               m[7] = model->num_posescale*(dx*sz + dy*sw - dz*sx - dw*sy);
+                               m[8] = 2*(sx*rz - sw*ry);
+                               m[9] = 2*(sy*rz + sw*rx);
+                               m[10] = sw*rw + sz*rz - sx*rx - sy*ry;
+                               m[11] = model->num_posescale*(dy*sx + dz*sw - dx*sy - dw*sz);
+                               if (i == r_skeletal_debugbone.integer)
+                                       m[r_skeletal_debugbonecomponent.integer % 12] += r_skeletal_debugbonevalue.value;
+                               m[3] *= r_skeletal_debugtranslatex.value;
+                               m[7] *= r_skeletal_debugtranslatey.value;
+                               m[11] *= r_skeletal_debugtranslatez.value;
+                               if (model->data_bones[i].parent >= 0)
+                                       R_ConcatTransforms(bonepose[model->data_bones[i].parent], m, bonepose[i]);
+                               else
+                                       memcpy(bonepose[i], m, sizeof(m));
+                               // create a relative deformation matrix to describe displacement
+                               // from the base mesh, which is used by the actual weighting
+                               R_ConcatTransforms(bonepose[i], model->data_baseboneposeinverse + i * 12, boneposerelative + i * 12);
+                       }
+               }
+       }
+
        // see if it's already cached this frame
        if (ent->animcache_vertex3f)
        {
@@ -4674,6 +4764,9 @@ qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qbool
                                }
                                model->AnimateVertices(model, ent->frameblend, ent->skeleton, NULL, wantnormals ? ent->animcache_normal3f : NULL, wanttangents ? ent->animcache_svector3f : NULL, wanttangents ? ent->animcache_tvector3f : NULL);
                                R_AnimCache_UpdateEntityMeshBuffers(ent, model->surfmesh.num_vertices);
+                               r_refdef.stats[r_stat_animcache_shade_count] += 1;
+                               r_refdef.stats[r_stat_animcache_shade_vertices] += numvertices;
+                               r_refdef.stats[r_stat_animcache_shade_maxvertices] = max(r_refdef.stats[r_stat_animcache_shade_maxvertices], numvertices);
                        }
                }
        }
@@ -4682,6 +4775,24 @@ qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qbool
                // see if this ent is worth caching
                if (!model || !model->Draw || !model->surfmesh.isanimated || !model->AnimateVertices)
                        return false;
+               // skip entity if the shader backend has a cheaper way
+               if (model->surfmesh.data_skeletalindex4ub && r_glsl_skeletal.integer && !r_showsurfaces.integer) // FIXME add r_showsurfaces support to GLSL skeletal!
+               {
+                       switch (vid.renderpath)
+                       {
+                       case RENDERPATH_GL20:
+                               return false;
+                       case RENDERPATH_GL11:
+                       case RENDERPATH_GL13:
+                       case RENDERPATH_GLES1:
+                       case RENDERPATH_GLES2:
+                       case RENDERPATH_D3D9:
+                       case RENDERPATH_D3D10:
+                       case RENDERPATH_D3D11:
+                       case RENDERPATH_SOFT:
+                               break;
+                       }
+               }
                // get some memory for this entity and generate mesh data
                numvertices = model->surfmesh.num_vertices;
                ent->animcache_vertex3f = (float *)R_FrameData_Alloc(sizeof(float[3])*numvertices);
@@ -4694,6 +4805,15 @@ qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qbool
                }
                model->AnimateVertices(model, ent->frameblend, ent->skeleton, ent->animcache_vertex3f, ent->animcache_normal3f, ent->animcache_svector3f, ent->animcache_tvector3f);
                R_AnimCache_UpdateEntityMeshBuffers(ent, model->surfmesh.num_vertices);
+               if (wantnormals || wanttangents)
+               {
+                       r_refdef.stats[r_stat_animcache_shade_count] += 1;
+                       r_refdef.stats[r_stat_animcache_shade_vertices] += numvertices;
+                       r_refdef.stats[r_stat_animcache_shade_maxvertices] = max(r_refdef.stats[r_stat_animcache_shade_maxvertices], numvertices);
+               }
+               r_refdef.stats[r_stat_animcache_shape_count] += 1;
+               r_refdef.stats[r_stat_animcache_shape_vertices] += numvertices;
+               r_refdef.stats[r_stat_animcache_shape_maxvertices] = max(r_refdef.stats[r_stat_animcache_shape_maxvertices], numvertices);
        }
        return true;
 }
@@ -4980,7 +5100,7 @@ static void R_DrawModels(void)
                if (!r_refdef.viewcache.entityvisible[i])
                        continue;
                ent = r_refdef.scene.entities[i];
-               r_refdef.stats.entities++;
+               r_refdef.stats[r_stat_entities]++;
                /*
                if (ent->model && !strncmp(ent->model->name, "models/proto_", 13))
                {
@@ -6241,14 +6361,14 @@ static void R_Bloom_MakeTexture(void)
        rtexture_t *intex;
        float colorscale = r_bloom_colorscale.value;
 
-       r_refdef.stats.bloom++;
+       r_refdef.stats[r_stat_bloom]++;
     
 #if 0
     // this copy is unnecessary since it happens in R_BlendView already
        if (!r_fb.fbo)
        {
                R_Mesh_CopyToTexture(r_fb.colortexture, 0, 0, r_refdef.view.viewport.x, r_refdef.view.viewport.y, r_refdef.view.viewport.width, r_refdef.view.viewport.height);
-               r_refdef.stats.bloom_copypixels += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
+               r_refdef.stats[r_stat_bloom_copypixels] += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
        }
 #endif
 
@@ -6280,14 +6400,14 @@ static void R_Bloom_MakeTexture(void)
        // TODO: do boxfilter scale-down in shader?
        R_SetupShader_Generic(r_fb.colortexture, NULL, GL_MODULATE, 1, false, true, true);
        R_Mesh_Draw(0, 4, 0, 2, polygonelement3i, NULL, 0, polygonelement3s, NULL, 0);
-       r_refdef.stats.bloom_drawpixels += r_fb.bloomwidth * r_fb.bloomheight;
+       r_refdef.stats[r_stat_bloom_drawpixels] += r_fb.bloomwidth * r_fb.bloomheight;
 
        // we now have a properly scaled bloom image
        if (!r_fb.bloomfbo[r_fb.bloomindex])
        {
                // copy it into the bloom texture
                R_Mesh_CopyToTexture(r_fb.bloomtexture[r_fb.bloomindex], 0, 0, r_fb.bloomviewport.x, r_fb.bloomviewport.y, r_fb.bloomviewport.width, r_fb.bloomviewport.height);
-               r_refdef.stats.bloom_copypixels += r_fb.bloomviewport.width * r_fb.bloomviewport.height;
+               r_refdef.stats[r_stat_bloom_copypixels] += r_fb.bloomviewport.width * r_fb.bloomviewport.height;
        }
 
        // multiply bloom image by itself as many times as desired
@@ -6313,13 +6433,13 @@ static void R_Bloom_MakeTexture(void)
                R_Mesh_PrepareVertices_Generic_Arrays(4, r_screenvertex3f, NULL, r_fb.bloomtexcoord2f);
                R_SetupShader_Generic(intex, NULL, GL_MODULATE, 1, false, true, false);
                R_Mesh_Draw(0, 4, 0, 2, polygonelement3i, NULL, 0, polygonelement3s, NULL, 0);
-               r_refdef.stats.bloom_drawpixels += r_fb.bloomwidth * r_fb.bloomheight;
+               r_refdef.stats[r_stat_bloom_drawpixels] += r_fb.bloomwidth * r_fb.bloomheight;
 
                if (!r_fb.bloomfbo[r_fb.bloomindex])
                {
                        // copy the darkened image to a texture
                        R_Mesh_CopyToTexture(r_fb.bloomtexture[r_fb.bloomindex], 0, 0, r_fb.bloomviewport.x, r_fb.bloomviewport.y, r_fb.bloomviewport.width, r_fb.bloomviewport.height);
-                       r_refdef.stats.bloom_copypixels += r_fb.bloomviewport.width * r_fb.bloomviewport.height;
+                       r_refdef.stats[r_stat_bloom_copypixels] += r_fb.bloomviewport.width * r_fb.bloomviewport.height;
                }
        }
 
@@ -6365,7 +6485,7 @@ static void R_Bloom_MakeTexture(void)
                        GL_Color(r, r, r, 1);
                        R_Mesh_PrepareVertices_Generic_Arrays(4, r_screenvertex3f, NULL, r_fb.offsettexcoord2f);
                        R_Mesh_Draw(0, 4, 0, 2, polygonelement3i, NULL, 0, polygonelement3s, NULL, 0);
-                       r_refdef.stats.bloom_drawpixels += r_fb.bloomwidth * r_fb.bloomheight;
+                       r_refdef.stats[r_stat_bloom_drawpixels] += r_fb.bloomwidth * r_fb.bloomheight;
                        GL_BlendFunc(GL_ONE, GL_ONE);
                }
 
@@ -6373,7 +6493,7 @@ static void R_Bloom_MakeTexture(void)
                {
                        // copy the vertically or horizontally blurred bloom view to a texture
                        R_Mesh_CopyToTexture(r_fb.bloomtexture[r_fb.bloomindex], 0, 0, r_fb.bloomviewport.x, r_fb.bloomviewport.y, r_fb.bloomviewport.width, r_fb.bloomviewport.height);
-                       r_refdef.stats.bloom_copypixels += r_fb.bloomviewport.width * r_fb.bloomviewport.height;
+                       r_refdef.stats[r_stat_bloom_copypixels] += r_fb.bloomviewport.width * r_fb.bloomviewport.height;
                }
        }
 }
@@ -6405,7 +6525,7 @@ static void R_BlendView(int fbo, rtexture_t *depthtexture, rtexture_t *colortext
                        if (!r_fb.fbo)
                        {
                                R_Mesh_CopyToTexture(r_fb.colortexture, 0, 0, r_refdef.view.viewport.x, r_refdef.view.viewport.y, r_refdef.view.viewport.width, r_refdef.view.viewport.height);
-                               r_refdef.stats.bloom_copypixels += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
+                               r_refdef.stats[r_stat_bloom_copypixels] += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
                        }
 
                        if(!R_Stereo_Active() && (r_motionblur.value > 0 || r_damageblur.value > 0) && r_fb.ghosttexture)
@@ -6471,7 +6591,7 @@ static void R_BlendView(int fbo, rtexture_t *depthtexture, rtexture_t *colortext
                                        }
                                        R_SetupShader_Generic(r_fb.ghosttexture, NULL, GL_MODULATE, 1, false, true, true);
                                        R_Mesh_Draw(0, 4, 0, 2, polygonelement3i, NULL, 0, polygonelement3s, NULL, 0);
-                                       r_refdef.stats.bloom_drawpixels += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
+                                       r_refdef.stats[r_stat_bloom_drawpixels] += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
                                }
 
                                // updates old view angles for next pass
@@ -6479,7 +6599,7 @@ static void R_BlendView(int fbo, rtexture_t *depthtexture, rtexture_t *colortext
 
                                // copy view into the ghost texture
                                R_Mesh_CopyToTexture(r_fb.ghosttexture, 0, 0, r_refdef.view.viewport.x, r_refdef.view.viewport.y, r_refdef.view.viewport.width, r_refdef.view.viewport.height);
-                               r_refdef.stats.bloom_copypixels += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
+                               r_refdef.stats[r_stat_bloom_copypixels] += r_refdef.view.viewport.width * r_refdef.view.viewport.height;
                                r_fb.ghosttexture_valid = true;
                        }
                }
@@ -6587,7 +6707,7 @@ static void R_BlendView(int fbo, rtexture_t *depthtexture, rtexture_t *colortext
                        break;
                }
                R_Mesh_Draw(0, 4, 0, 2, polygonelement3i, NULL, 0, polygonelement3s, NULL, 0);
-               r_refdef.stats.bloom_drawpixels += r_refdef.view.width * r_refdef.view.height;
+               r_refdef.stats[r_stat_bloom_drawpixels] += r_refdef.view.width * r_refdef.view.height;
                break;
        case RENDERPATH_GL11:
        case RENDERPATH_GL13:
@@ -7022,7 +7142,7 @@ void R_RenderScene(int fbo, rtexture_t *depthtexture, rtexture_t *colortexture)
        if (r_timereport_active)
                R_TimeReport("beginscene");
 
-       r_refdef.stats.renders++;
+       r_refdef.stats[r_stat_renders]++;
 
        R_UpdateFog();
 
@@ -8125,6 +8245,8 @@ void RSurf_ActiveWorldEntity(void)
        rsurface.ent_alttextures = false;
        rsurface.basepolygonfactor = r_refdef.polygonfactor;
        rsurface.basepolygonoffset = r_refdef.polygonoffset;
+       rsurface.entityskeletaltransform3x4 = NULL;
+       rsurface.entityskeletalnumtransforms = 0;
        rsurface.modelvertex3f  = model->surfmesh.data_vertex3f;
        rsurface.modelvertex3f_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
        rsurface.modelvertex3f_bufferoffset = model->surfmesh.vbooffset_vertex3f;
@@ -8146,6 +8268,12 @@ void RSurf_ActiveWorldEntity(void)
        rsurface.modeltexcoordlightmap2f  = model->surfmesh.data_texcoordlightmap2f;
        rsurface.modeltexcoordlightmap2f_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
        rsurface.modeltexcoordlightmap2f_bufferoffset = model->surfmesh.vbooffset_texcoordlightmap2f;
+       rsurface.modelskeletalindex4ub = model->surfmesh.data_skeletalindex4ub;
+       rsurface.modelskeletalindex4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalindex4ub_bufferoffset = model->surfmesh.vbooffset_skeletalindex4ub;
+       rsurface.modelskeletalweight4ub = model->surfmesh.data_skeletalweight4ub;
+       rsurface.modelskeletalweight4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalweight4ub_bufferoffset = model->surfmesh.vbooffset_skeletalweight4ub;
        rsurface.modelelement3i = model->surfmesh.data_element3i;
        rsurface.modelelement3i_indexbuffer = model->surfmesh.data_element3i_indexbuffer;
        rsurface.modelelement3i_bufferoffset = model->surfmesh.data_element3i_bufferoffset;
@@ -8186,6 +8314,12 @@ void RSurf_ActiveWorldEntity(void)
        rsurface.batchtexcoordlightmap2f = NULL;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
        rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+       rsurface.batchskeletalindex4ub = NULL;
+       rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalindex4ub_bufferoffset = 0;
+       rsurface.batchskeletalweight4ub = NULL;
+       rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalweight4ub_bufferoffset = 0;
        rsurface.batchvertexmesh = NULL;
        rsurface.batchvertexmeshbuffer = NULL;
        rsurface.batchvertex3fbuffer = NULL;
@@ -8241,7 +8375,10 @@ void RSurf_ActiveModelEntity(const entity_render_t *ent, qboolean wantnormals, q
                rsurface.basepolygonfactor += r_polygonoffset_submodel_factor.value;
                rsurface.basepolygonoffset += r_polygonoffset_submodel_offset.value;
        }
-       if (model->surfmesh.isanimated && model->AnimateVertices)
+       // if the animcache code decided it should use the shader path, skip the deform step
+       rsurface.entityskeletaltransform3x4 = ent->animcache_vertex3f ? NULL : ent->animcache_skeletaltransform3x4;
+       rsurface.entityskeletalnumtransforms = rsurface.entityskeletaltransform3x4 ? model->num_bones : 0;
+       if (model->surfmesh.isanimated && model->AnimateVertices && !rsurface.entityskeletaltransform3x4)
        {
                if (ent->animcache_vertex3f)
                {
@@ -8324,6 +8461,12 @@ void RSurf_ActiveModelEntity(const entity_render_t *ent, qboolean wantnormals, q
        rsurface.modeltexcoordlightmap2f  = model->surfmesh.data_texcoordlightmap2f;
        rsurface.modeltexcoordlightmap2f_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
        rsurface.modeltexcoordlightmap2f_bufferoffset = model->surfmesh.vbooffset_texcoordlightmap2f;
+       rsurface.modelskeletalindex4ub = model->surfmesh.data_skeletalindex4ub;
+       rsurface.modelskeletalindex4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalindex4ub_bufferoffset = model->surfmesh.vbooffset_skeletalindex4ub;
+       rsurface.modelskeletalweight4ub = model->surfmesh.data_skeletalweight4ub;
+       rsurface.modelskeletalweight4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalweight4ub_bufferoffset = model->surfmesh.vbooffset_skeletalweight4ub;
        rsurface.modelelement3i = model->surfmesh.data_element3i;
        rsurface.modelelement3i_indexbuffer = model->surfmesh.data_element3i_indexbuffer;
        rsurface.modelelement3i_bufferoffset = model->surfmesh.data_element3i_bufferoffset;
@@ -8360,6 +8503,12 @@ void RSurf_ActiveModelEntity(const entity_render_t *ent, qboolean wantnormals, q
        rsurface.batchtexcoordlightmap2f = NULL;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
        rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+       rsurface.batchskeletalindex4ub = NULL;
+       rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalindex4ub_bufferoffset = 0;
+       rsurface.batchskeletalweight4ub = NULL;
+       rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalweight4ub_bufferoffset = 0;
        rsurface.batchvertexmesh = NULL;
        rsurface.batchvertexmeshbuffer = NULL;
        rsurface.batchvertex3fbuffer = NULL;
@@ -8408,6 +8557,8 @@ void RSurf_ActiveCustomEntity(const matrix4x4_t *matrix, const matrix4x4_t *inve
        rsurface.ent_alttextures = false;
        rsurface.basepolygonfactor = r_refdef.polygonfactor;
        rsurface.basepolygonoffset = r_refdef.polygonoffset;
+       rsurface.entityskeletaltransform3x4 = NULL;
+       rsurface.entityskeletalnumtransforms = 0;
        if (wanttangents)
        {
                rsurface.modelvertex3f = (float *)vertex3f;
@@ -8450,6 +8601,12 @@ void RSurf_ActiveCustomEntity(const matrix4x4_t *matrix, const matrix4x4_t *inve
        rsurface.modeltexcoordlightmap2f  = NULL;
        rsurface.modeltexcoordlightmap2f_vertexbuffer = 0;
        rsurface.modeltexcoordlightmap2f_bufferoffset = 0;
+       rsurface.modelskeletalindex4ub = NULL;
+       rsurface.modelskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.modelskeletalindex4ub_bufferoffset = 0;
+       rsurface.modelskeletalweight4ub = NULL;
+       rsurface.modelskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.modelskeletalweight4ub_bufferoffset = 0;
        rsurface.modelelement3i = (int *)element3i;
        rsurface.modelelement3i_indexbuffer = NULL;
        rsurface.modelelement3i_bufferoffset = 0;
@@ -8484,6 +8641,12 @@ void RSurf_ActiveCustomEntity(const matrix4x4_t *matrix, const matrix4x4_t *inve
        rsurface.batchtexcoordlightmap2f = NULL;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
        rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+       rsurface.batchskeletalindex4ub = NULL;
+       rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalindex4ub_bufferoffset = 0;
+       rsurface.batchskeletalweight4ub = NULL;
+       rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalweight4ub_bufferoffset = 0;
        rsurface.batchvertexmesh = NULL;
        rsurface.batchvertexmeshbuffer = NULL;
        rsurface.batchvertex3fbuffer = NULL;
@@ -8568,6 +8731,7 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        int surfacefirstvertex;
        int surfaceendvertex;
        int surfacenumvertices;
+       int batchnumsurfaces = texturenumsurfaces;
        int batchnumvertices;
        int batchnumtriangles;
        int needsupdate;
@@ -8579,6 +8743,7 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        float scale;
        float center[3], forward[3], right[3], up[3], v[3], newforward[3], newright[3], newup[3];
        float waveparms[4];
+       unsigned char *ub;
        q3shaderinfo_deform_t *deform;
        const msurface_t *surface, *firstsurface;
        r_vertexmesh_t *vertexmesh;
@@ -8608,6 +8773,13 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                batchnumtriangles += surfacenumtriangles;
        }
 
+       r_refdef.stats[r_stat_batch_batches]++;
+       if (gaps)
+               r_refdef.stats[r_stat_batch_withgaps]++;
+       r_refdef.stats[r_stat_batch_surfaces] += batchnumsurfaces;
+       r_refdef.stats[r_stat_batch_vertices] += batchnumvertices;
+       r_refdef.stats[r_stat_batch_triangles] += batchnumtriangles;
+
        // we now know the vertex range used, and if there are any gaps in it
        rsurface.batchfirstvertex = firstvertex;
        rsurface.batchnumvertices = endvertex - firstvertex;
@@ -8621,11 +8793,30 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        // check if any dynamic vertex processing must occur
        dynamicvertex = false;
 
+       // a cvar to force the dynamic vertex path to be taken, for debugging
+       if (r_batch_debugdynamicvertexpath.integer)
+       {
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_cvar] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_cvar] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_cvar] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_cvar] += batchnumtriangles;
+               }
+               dynamicvertex = true;
+       }
+
        // if there is a chance of animated vertex colors, it's a dynamic batch
        if ((batchneed & (BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_ARRAY_VERTEXCOLOR)) && texturesurfacelist[0]->lightmapinfo)
        {
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_lightmapvertex] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_lightmapvertex] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_lightmapvertex] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_lightmapvertex] += batchnumtriangles;
+               }
                dynamicvertex = true;
-               batchneed |= BATCHNEED_NOGAPS;
                needsupdate |= BATCHNEED_VERTEXMESH_VERTEXCOLOR;
        }
 
@@ -8646,37 +8837,79 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                case Q3DEFORM_NONE:
                        break;
                case Q3DEFORM_AUTOSPRITE:
+                       if (!dynamicvertex)
+                       {
+                               r_refdef.stats[r_stat_batch_dynamic_batches_because_deformvertexes_autosprite] += 1;
+                               r_refdef.stats[r_stat_batch_dynamic_surfaces_because_deformvertexes_autosprite] += batchnumsurfaces;
+                               r_refdef.stats[r_stat_batch_dynamic_vertices_because_deformvertexes_autosprite] += batchnumvertices;
+                               r_refdef.stats[r_stat_batch_dynamic_triangles_because_deformvertexes_autosprite] += batchnumtriangles;
+                       }
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_AUTOSPRITE2:
+                       if (!dynamicvertex)
+                       {
+                               r_refdef.stats[r_stat_batch_dynamic_batches_because_deformvertexes_autosprite2] += 1;
+                               r_refdef.stats[r_stat_batch_dynamic_surfaces_because_deformvertexes_autosprite2] += batchnumsurfaces;
+                               r_refdef.stats[r_stat_batch_dynamic_vertices_because_deformvertexes_autosprite2] += batchnumvertices;
+                               r_refdef.stats[r_stat_batch_dynamic_triangles_because_deformvertexes_autosprite2] += batchnumtriangles;
+                       }
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_NORMAL:
+                       if (!dynamicvertex)
+                       {
+                               r_refdef.stats[r_stat_batch_dynamic_batches_because_deformvertexes_normal] += 1;
+                               r_refdef.stats[r_stat_batch_dynamic_surfaces_because_deformvertexes_normal] += batchnumsurfaces;
+                               r_refdef.stats[r_stat_batch_dynamic_vertices_because_deformvertexes_normal] += batchnumvertices;
+                               r_refdef.stats[r_stat_batch_dynamic_triangles_because_deformvertexes_normal] += batchnumtriangles;
+                       }
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_WAVE:
                        if(!R_TestQ3WaveFunc(deform->wavefunc, deform->waveparms))
                                break; // if wavefunc is a nop, ignore this transform
+                       if (!dynamicvertex)
+                       {
+                               r_refdef.stats[r_stat_batch_dynamic_batches_because_deformvertexes_wave] += 1;
+                               r_refdef.stats[r_stat_batch_dynamic_surfaces_because_deformvertexes_wave] += batchnumsurfaces;
+                               r_refdef.stats[r_stat_batch_dynamic_vertices_because_deformvertexes_wave] += batchnumvertices;
+                               r_refdef.stats[r_stat_batch_dynamic_triangles_because_deformvertexes_wave] += batchnumtriangles;
+                       }
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_BULGE:
+                       if (!dynamicvertex)
+                       {
+                               r_refdef.stats[r_stat_batch_dynamic_batches_because_deformvertexes_bulge] += 1;
+                               r_refdef.stats[r_stat_batch_dynamic_surfaces_because_deformvertexes_bulge] += batchnumsurfaces;
+                               r_refdef.stats[r_stat_batch_dynamic_vertices_because_deformvertexes_bulge] += batchnumvertices;
+                               r_refdef.stats[r_stat_batch_dynamic_triangles_because_deformvertexes_bulge] += batchnumtriangles;
+                       }
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_MOVE:
                        if(!R_TestQ3WaveFunc(deform->wavefunc, deform->waveparms))
                                break; // if wavefunc is a nop, ignore this transform
+                       if (!dynamicvertex)
+                       {
+                               r_refdef.stats[r_stat_batch_dynamic_batches_because_deformvertexes_move] += 1;
+                               r_refdef.stats[r_stat_batch_dynamic_surfaces_because_deformvertexes_move] += batchnumsurfaces;
+                               r_refdef.stats[r_stat_batch_dynamic_vertices_because_deformvertexes_move] += batchnumvertices;
+                               r_refdef.stats[r_stat_batch_dynamic_triangles_because_deformvertexes_move] += batchnumtriangles;
+                       }
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX;
                        break;
                }
@@ -8687,36 +8920,90 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        case Q3TCGEN_TEXTURE:
                break;
        case Q3TCGEN_LIGHTMAP:
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_tcgen_lightmap] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_tcgen_lightmap] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_tcgen_lightmap] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_tcgen_lightmap] += batchnumtriangles;
+               }
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_LIGHTMAP | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_LIGHTMAP;
                needsupdate |= BATCHNEED_VERTEXMESH_LIGHTMAP;
                break;
        case Q3TCGEN_VECTOR:
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_tcgen_vector] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_tcgen_vector] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_tcgen_vector] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_tcgen_vector] += batchnumtriangles;
+               }
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_VERTEX;
                needsupdate |= BATCHNEED_VERTEXMESH_TEXCOORD;
                break;
        case Q3TCGEN_ENVIRONMENT:
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_tcgen_environment] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_tcgen_environment] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_tcgen_environment] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_tcgen_environment] += batchnumtriangles;
+               }
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL;
                needsupdate |= BATCHNEED_VERTEXMESH_TEXCOORD;
                break;
        }
        if (rsurface.texture->tcmods[0].tcmod == Q3TCMOD_TURBULENT)
        {
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_tcmod_turbulent] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_tcmod_turbulent] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_tcmod_turbulent] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_tcmod_turbulent] += batchnumtriangles;
+               }
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD;
                needsupdate |= BATCHNEED_VERTEXMESH_TEXCOORD;
        }
 
        if (!rsurface.modelvertexmesh && (batchneed & (BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_VERTEXMESH_TEXCOORD | BATCHNEED_VERTEXMESH_LIGHTMAP)))
        {
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_interleavedarrays] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_interleavedarrays] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_interleavedarrays] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_interleavedarrays] += batchnumtriangles;
+               }
                dynamicvertex = true;
-               batchneed |= BATCHNEED_NOGAPS;
                needsupdate |= (batchneed & (BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_VERTEXMESH_TEXCOORD | BATCHNEED_VERTEXMESH_LIGHTMAP));
        }
 
-       if (dynamicvertex || gaps || rsurface.batchfirstvertex)
+       // when the model data has no vertex buffer (dynamic mesh), we need to
+       // eliminate gaps
+       if (vid.useinterleavedarrays ? !rsurface.modelvertexmeshbuffer : !rsurface.modelvertex3f_vertexbuffer)
+               batchneed |= BATCHNEED_NOGAPS;
+
+       // the caller can specify BATCHNEED_NOGAPS to force a batch with
+       // firstvertex = 0 and endvertex = numvertices (no gaps, no firstvertex),
+       // we ensure this by treating the vertex batch as dynamic...
+       if ((batchneed & BATCHNEED_NOGAPS) && (gaps || firstvertex > 0))
+       {
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_nogaps] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_nogaps] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_nogaps] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_nogaps] += batchnumtriangles;
+               }
+               dynamicvertex = true;
+       }
+
+       if (dynamicvertex)
        {
                // when copying, we need to consider the regeneration of vertexmesh, any dependencies it may have must be set...
                if (batchneed & BATCHNEED_VERTEXMESH_VERTEX)      batchneed |= BATCHNEED_ARRAY_VERTEX;
@@ -8725,25 +9012,38 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                if (batchneed & BATCHNEED_VERTEXMESH_VERTEXCOLOR) batchneed |= BATCHNEED_ARRAY_VERTEXCOLOR;
                if (batchneed & BATCHNEED_VERTEXMESH_TEXCOORD)    batchneed |= BATCHNEED_ARRAY_TEXCOORD;
                if (batchneed & BATCHNEED_VERTEXMESH_LIGHTMAP)    batchneed |= BATCHNEED_ARRAY_LIGHTMAP;
+               if (batchneed & BATCHNEED_VERTEXMESH_SKELETAL)    batchneed |= BATCHNEED_ARRAY_SKELETAL;
        }
 
-       // when the model data has no vertex buffer (dynamic mesh), we need to
-       // eliminate gaps
-       if (vid.useinterleavedarrays ? !rsurface.modelvertexmeshbuffer : !rsurface.modelvertex3f_vertexbuffer)
-               batchneed |= BATCHNEED_NOGAPS;
-
        // if needsupdate, we have to do a dynamic vertex batch for sure
        if (needsupdate & batchneed)
+       {
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_derived] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_derived] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_derived] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_derived] += batchnumtriangles;
+               }
                dynamicvertex = true;
+       }
 
        // see if we need to build vertexmesh from arrays
        if (!rsurface.modelvertexmesh && (batchneed & (BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_VERTEXMESH_TEXCOORD | BATCHNEED_VERTEXMESH_LIGHTMAP)))
+       {
+               if (!dynamicvertex)
+               {
+                       r_refdef.stats[r_stat_batch_dynamic_batches_because_interleavedarrays] += 1;
+                       r_refdef.stats[r_stat_batch_dynamic_surfaces_because_interleavedarrays] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_dynamic_vertices_because_interleavedarrays] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_dynamic_triangles_because_interleavedarrays] += batchnumtriangles;
+               }
                dynamicvertex = true;
+       }
 
-       // if gaps are unacceptable, and there are gaps, it's a dynamic batch...
-       // also some drivers strongly dislike firstvertex
-       if ((batchneed & BATCHNEED_NOGAPS) && (gaps || firstvertex))
-               dynamicvertex = true;
+       // if we're going to have to apply the skeletal transform manually, we need to batch the skeletal data
+       if (dynamicvertex && rsurface.entityskeletaltransform3x4)
+               batchneed |= BATCHNEED_ARRAY_SKELETAL;
 
        rsurface.batchvertex3f = rsurface.modelvertex3f;
        rsurface.batchvertex3f_vertexbuffer = rsurface.modelvertex3f_vertexbuffer;
@@ -8766,6 +9066,12 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        rsurface.batchtexcoordlightmap2f = rsurface.modeltexcoordlightmap2f;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = rsurface.modeltexcoordlightmap2f_vertexbuffer;
        rsurface.batchtexcoordlightmap2f_bufferoffset = rsurface.modeltexcoordlightmap2f_bufferoffset;
+       rsurface.batchskeletalindex4ub = rsurface.modelskeletalindex4ub;
+       rsurface.batchskeletalindex4ub_vertexbuffer = rsurface.modelskeletalindex4ub_vertexbuffer;
+       rsurface.batchskeletalindex4ub_bufferoffset = rsurface.modelskeletalindex4ub_bufferoffset;
+       rsurface.batchskeletalweight4ub = rsurface.modelskeletalweight4ub;
+       rsurface.batchskeletalweight4ub_vertexbuffer = rsurface.modelskeletalweight4ub_vertexbuffer;
+       rsurface.batchskeletalweight4ub_bufferoffset = rsurface.modelskeletalweight4ub_bufferoffset;
        rsurface.batchvertex3fbuffer = rsurface.modelvertex3fbuffer;
        rsurface.batchvertexmesh = rsurface.modelvertexmesh;
        rsurface.batchvertexmeshbuffer = rsurface.modelvertexmeshbuffer;
@@ -8775,6 +9081,8 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        rsurface.batchelement3s = rsurface.modelelement3s;
        rsurface.batchelement3s_indexbuffer = rsurface.modelelement3s_indexbuffer;
        rsurface.batchelement3s_bufferoffset = rsurface.modelelement3s_bufferoffset;
+       rsurface.batchskeletaltransform3x4 = rsurface.entityskeletaltransform3x4;
+       rsurface.batchskeletalnumtransforms = rsurface.entityskeletalnumtransforms;
 
        // if any dynamic vertex processing has to occur in software, we copy the
        // entire surface list together before processing to rebase the vertices
@@ -8802,6 +9110,10 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                // otherwise use the original static buffer with an appropriate offset
                if (gaps)
                {
+                       r_refdef.stats[r_stat_batch_copytriangles_batches] += 1;
+                       r_refdef.stats[r_stat_batch_copytriangles_surfaces] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_copytriangles_vertices] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_copytriangles_triangles] += batchnumtriangles;
                        if ((batchneed & BATCHNEED_ALLOWMULTIDRAW) && r_batch_multidraw.integer && batchnumtriangles >= r_batch_multidraw_mintriangles.integer)
                        {
                                rsurface.batchmultidraw = true;
@@ -8833,6 +9145,13 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                                        rsurface.batchelement3s[i] = rsurface.batchelement3i[i];
                        }
                }
+               else
+               {
+                       r_refdef.stats[r_stat_batch_fast_batches] += 1;
+                       r_refdef.stats[r_stat_batch_fast_surfaces] += batchnumsurfaces;
+                       r_refdef.stats[r_stat_batch_fast_vertices] += batchnumvertices;
+                       r_refdef.stats[r_stat_batch_fast_triangles] += batchnumtriangles;
+               }
                return;
        }
 
@@ -8840,10 +9159,17 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        // we only directly handle separate array data in this case and then
        // generate interleaved data if needed...
        rsurface.batchgeneratedvertex = true;
+       r_refdef.stats[r_stat_batch_dynamic_batches] += 1;
+       r_refdef.stats[r_stat_batch_dynamic_surfaces] += batchnumsurfaces;
+       r_refdef.stats[r_stat_batch_dynamic_vertices] += batchnumvertices;
+       r_refdef.stats[r_stat_batch_dynamic_triangles] += batchnumtriangles;
 
        // now copy the vertex data into a combined array and make an index array
        // (this is what Quake3 does all the time)
-       //if (gaps || rsurface.batchfirstvertex)
+       // we also apply any skeletal animation here that would have been done in
+       // the vertex shader, because most of the dynamic vertex animation cases
+       // need actual vertex positions and normals
+       //if (dynamicvertex)
        {
                rsurface.batchvertex3fbuffer = NULL;
                rsurface.batchvertexmesh = NULL;
@@ -8869,6 +9195,12 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                rsurface.batchtexcoordlightmap2f = NULL;
                rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
                rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+               rsurface.batchskeletalindex4ub = NULL;
+               rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+               rsurface.batchskeletalindex4ub_bufferoffset = 0;
+               rsurface.batchskeletalweight4ub = NULL;
+               rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+               rsurface.batchskeletalweight4ub_bufferoffset = 0;
                rsurface.batchelement3i = (int *)R_FrameData_Alloc(batchnumtriangles * sizeof(int[3]));
                rsurface.batchelement3i_indexbuffer = NULL;
                rsurface.batchelement3i_bufferoffset = 0;
@@ -8893,6 +9225,11 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                        rsurface.batchtexcoordtexture2f = (float *)R_FrameData_Alloc(batchnumvertices * sizeof(float[2]));
                if (batchneed & BATCHNEED_ARRAY_LIGHTMAP)
                        rsurface.batchtexcoordlightmap2f = (float *)R_FrameData_Alloc(batchnumvertices * sizeof(float[2]));
+               if (batchneed & BATCHNEED_ARRAY_SKELETAL)
+               {
+                       rsurface.batchskeletalindex4ub = (unsigned char *)R_FrameData_Alloc(batchnumvertices * sizeof(unsigned char[4]));
+                       rsurface.batchskeletalweight4ub = (unsigned char *)R_FrameData_Alloc(batchnumvertices * sizeof(unsigned char[4]));
+               }
                numvertices = 0;
                numtriangles = 0;
                for (i = 0;i < texturenumsurfaces;i++)
@@ -8954,6 +9291,22 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                                        else
                                                memset(rsurface.batchtexcoordlightmap2f + 2*numvertices, 0, surfacenumvertices * sizeof(float[2]));
                                }
+                               if (batchneed & BATCHNEED_ARRAY_SKELETAL)
+                               {
+                                       if (rsurface.modelskeletalindex4ub)
+                                       {
+                                               memcpy(rsurface.batchskeletalindex4ub + 4*numvertices, rsurface.modelskeletalindex4ub + 4*surfacefirstvertex, surfacenumvertices * sizeof(unsigned char[4]));
+                                               memcpy(rsurface.batchskeletalweight4ub + 4*numvertices, rsurface.modelskeletalweight4ub + 4*surfacefirstvertex, surfacenumvertices * sizeof(unsigned char[4]));
+                                       }
+                                       else
+                                       {
+                                               memset(rsurface.batchskeletalindex4ub + 4*numvertices, 0, surfacenumvertices * sizeof(unsigned char[4]));
+                                               memset(rsurface.batchskeletalweight4ub + 4*numvertices, 0, surfacenumvertices * sizeof(unsigned char[4]));
+                                               ub = rsurface.batchskeletalweight4ub + 4*numvertices;
+                                               for (j = 0;j < surfacenumvertices;j++)
+                                                       ub[j*4] = 255;
+                                       }
+                               }
                        }
                        RSurf_RenumberElements(rsurface.modelelement3i + 3*surfacefirsttriangle, rsurface.batchelement3i + 3*numtriangles, 3*surfacenumtriangles, numvertices - surfacefirstvertex);
                        numvertices += surfacenumvertices;
@@ -8976,6 +9329,133 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                rsurface.batchnumtriangles = batchnumtriangles;
        }
 
+       // apply skeletal animation that would have been done in the vertex shader
+       if (rsurface.batchskeletaltransform3x4)
+       {
+               const unsigned char *si;
+               const unsigned char *sw;
+               const float *t[4];
+               const float *b = rsurface.batchskeletaltransform3x4;
+               float *vp, *vs, *vt, *vn;
+               float w[4];
+               float m[3][4], n[3][4];
+               float tp[3], ts[3], tt[3], tn[3];
+               r_refdef.stats[r_stat_batch_dynamicskeletal_batches] += 1;
+               r_refdef.stats[r_stat_batch_dynamicskeletal_surfaces] += batchnumsurfaces;
+               r_refdef.stats[r_stat_batch_dynamicskeletal_vertices] += batchnumvertices;
+               r_refdef.stats[r_stat_batch_dynamicskeletal_triangles] += batchnumtriangles;
+               si = rsurface.batchskeletalindex4ub;
+               sw = rsurface.batchskeletalweight4ub;
+               vp = rsurface.batchvertex3f;
+               vs = rsurface.batchsvector3f;
+               vt = rsurface.batchtvector3f;
+               vn = rsurface.batchnormal3f;
+               memset(m[0], 0, sizeof(m));
+               memset(n[0], 0, sizeof(n));
+               for (i = 0;i < batchnumvertices;i++)
+               {
+                       t[0] = b + si[0]*12;
+                       if (sw[0] == 255)
+                       {
+                               // common case - only one matrix
+                               m[0][0] = t[0][ 0];
+                               m[0][1] = t[0][ 1];
+                               m[0][2] = t[0][ 2];
+                               m[0][3] = t[0][ 3];
+                               m[1][0] = t[0][ 4];
+                               m[1][1] = t[0][ 5];
+                               m[1][2] = t[0][ 6];
+                               m[1][3] = t[0][ 7];
+                               m[2][0] = t[0][ 8];
+                               m[2][1] = t[0][ 9];
+                               m[2][2] = t[0][10];
+                               m[2][3] = t[0][11];
+                       }
+                       else if (sw[2] + sw[3])
+                       {
+                               // blend 4 matrices
+                               t[1] = b + si[1]*12;
+                               t[2] = b + si[2]*12;
+                               t[3] = b + si[3]*12;
+                               w[0] = sw[0] * (1.0f / 255.0f);
+                               w[1] = sw[1] * (1.0f / 255.0f);
+                               w[2] = sw[2] * (1.0f / 255.0f);
+                               w[3] = sw[3] * (1.0f / 255.0f);
+                               // blend the matrices
+                               m[0][0] = t[0][ 0] * w[0] + t[1][ 0] * w[1] + t[2][ 0] * w[2] + t[3][ 0] * w[3];
+                               m[0][1] = t[0][ 1] * w[0] + t[1][ 1] * w[1] + t[2][ 1] * w[2] + t[3][ 1] * w[3];
+                               m[0][2] = t[0][ 2] * w[0] + t[1][ 2] * w[1] + t[2][ 2] * w[2] + t[3][ 2] * w[3];
+                               m[0][3] = t[0][ 3] * w[0] + t[1][ 3] * w[1] + t[2][ 3] * w[2] + t[3][ 3] * w[3];
+                               m[1][0] = t[0][ 4] * w[0] + t[1][ 4] * w[1] + t[2][ 4] * w[2] + t[3][ 4] * w[3];
+                               m[1][1] = t[0][ 5] * w[0] + t[1][ 5] * w[1] + t[2][ 5] * w[2] + t[3][ 5] * w[3];
+                               m[1][2] = t[0][ 6] * w[0] + t[1][ 6] * w[1] + t[2][ 6] * w[2] + t[3][ 6] * w[3];
+                               m[1][3] = t[0][ 7] * w[0] + t[1][ 7] * w[1] + t[2][ 7] * w[2] + t[3][ 7] * w[3];
+                               m[2][0] = t[0][ 8] * w[0] + t[1][ 8] * w[1] + t[2][ 8] * w[2] + t[3][ 8] * w[3];
+                               m[2][1] = t[0][ 9] * w[0] + t[1][ 9] * w[1] + t[2][ 9] * w[2] + t[3][ 9] * w[3];
+                               m[2][2] = t[0][10] * w[0] + t[1][10] * w[1] + t[2][10] * w[2] + t[3][10] * w[3];
+                               m[2][3] = t[0][11] * w[0] + t[1][11] * w[1] + t[2][11] * w[2] + t[3][11] * w[3];
+                       }
+                       else
+                       {
+                               // blend 2 matrices
+                               t[1] = b + si[1]*12;
+                               w[0] = sw[0] * (1.0f / 255.0f);
+                               w[1] = sw[1] * (1.0f / 255.0f);
+                               // blend the matrices
+                               m[0][0] = t[0][ 0] * w[0] + t[1][ 0] * w[1];
+                               m[0][1] = t[0][ 1] * w[0] + t[1][ 1] * w[1];
+                               m[0][2] = t[0][ 2] * w[0] + t[1][ 2] * w[1];
+                               m[0][3] = t[0][ 3] * w[0] + t[1][ 3] * w[1];
+                               m[1][0] = t[0][ 4] * w[0] + t[1][ 4] * w[1];
+                               m[1][1] = t[0][ 5] * w[0] + t[1][ 5] * w[1];
+                               m[1][2] = t[0][ 6] * w[0] + t[1][ 6] * w[1];
+                               m[1][3] = t[0][ 7] * w[0] + t[1][ 7] * w[1];
+                               m[2][0] = t[0][ 8] * w[0] + t[1][ 8] * w[1];
+                               m[2][1] = t[0][ 9] * w[0] + t[1][ 9] * w[1];
+                               m[2][2] = t[0][10] * w[0] + t[1][10] * w[1];
+                               m[2][3] = t[0][11] * w[0] + t[1][11] * w[1];
+                       }
+                       si += 4;
+                       sw += 4;
+                       // modify the vertex
+                       VectorCopy(vp, tp);
+                       vp[0] = tp[0] * m[0][0] + tp[1] * m[0][1] + tp[2] * m[0][2] + m[0][3];
+                       vp[1] = tp[0] * m[1][0] + tp[1] * m[1][1] + tp[2] * m[1][2] + m[1][3];
+                       vp[2] = tp[0] * m[2][0] + tp[1] * m[2][1] + tp[2] * m[2][2] + m[2][3];
+                       vp += 3;
+                       if (vn)
+                       {
+                               // the normal transformation matrix is a set of cross products...
+                               CrossProduct(m[1], m[2], n[0]);
+                               CrossProduct(m[2], m[0], n[1]);
+                               CrossProduct(m[0], m[1], n[2]); // is actually transpose(inverse(m)) * det(m)
+                               VectorCopy(vn, tn);
+                               vn[0] = tn[0] * n[0][0] + tn[1] * n[0][1] + tn[2] * n[0][2];
+                               vn[1] = tn[0] * n[1][0] + tn[1] * n[1][1] + tn[2] * n[1][2];
+                               vn[2] = tn[0] * n[2][0] + tn[1] * n[2][1] + tn[2] * n[2][2];
+                               VectorNormalize(vn);
+                               vn += 3;
+                               if (vs)
+                               {
+                                       VectorCopy(vs, ts);
+                                       vs[0] = ts[0] * n[0][0] + ts[1] * n[0][1] + ts[2] * n[0][2];
+                                       vs[1] = ts[0] * n[1][0] + ts[1] * n[1][1] + ts[2] * n[1][2];
+                                       vs[2] = ts[0] * n[2][0] + ts[1] * n[2][1] + ts[2] * n[2][2];
+                                       VectorNormalize(vs);
+                                       vs += 3;
+                                       VectorCopy(vt, tt);
+                                       vt[0] = tt[0] * n[0][0] + tt[1] * n[0][1] + tt[2] * n[0][2];
+                                       vt[1] = tt[0] * n[1][0] + tt[1] * n[1][1] + tt[2] * n[1][2];
+                                       vt[2] = tt[0] * n[2][0] + tt[1] * n[2][1] + tt[2] * n[2][2];
+                                       VectorNormalize(vt);
+                                       vt += 3;
+                               }
+                       }
+               }
+               rsurface.batchskeletaltransform3x4 = NULL;
+               rsurface.batchskeletalnumtransforms = 0;
+       }
+
        // q1bsp surfaces rendered in vertex color mode have to have colors
        // calculated based on lightstyles
        if ((batchneed & (BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_ARRAY_VERTEXCOLOR)) && texturesurfacelist[0]->lightmapinfo)
@@ -9417,6 +9897,14 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                if ((batchneed & BATCHNEED_VERTEXMESH_LIGHTMAP) && rsurface.batchtexcoordlightmap2f)
                        for (j = 0, vertexmesh = rsurface.batchvertexmesh;j < batchnumvertices;j++, vertexmesh++)
                                Vector2Copy(rsurface.batchtexcoordlightmap2f + 2*j, vertexmesh->texcoordlightmap2f);
+               if ((batchneed & BATCHNEED_VERTEXMESH_SKELETAL) && rsurface.batchskeletalindex4ub)
+               {
+                       for (j = 0, vertexmesh = rsurface.batchvertexmesh;j < batchnumvertices;j++, vertexmesh++)
+                       {
+                               Vector4Copy(rsurface.batchskeletalindex4ub + 4*j, vertexmesh->skeletalindex4ub);
+                               Vector4Copy(rsurface.batchskeletalweight4ub + 4*j, vertexmesh->skeletalweight4ub);
+                       }
+               }
        }
 }
 
@@ -9496,7 +9984,7 @@ static int RSurf_FindWaterPlaneForSurface(const msurface_t *surface)
                d = 0;
                if(!prepared)
                {
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, 1, &surface);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX, 1, &surface);
                        prepared = true;
                        if(rsurface.batchnumvertices == 0)
                                break;
@@ -9837,13 +10325,13 @@ static void R_DrawTextureSurfaceList_Sky(int texturenumsurfaces, const msurface_
                R_Mesh_ResetTextureState();
                if (skyrendermasked)
                {
-                       R_SetupShader_DepthOrShadow(false, false);
+                       R_SetupShader_DepthOrShadow(false, false, false);
                        // depth-only (masking)
                        GL_ColorMask(0,0,0,0);
                        // just to make sure that braindead drivers don't draw
                        // anything despite that colormask...
                        GL_BlendFunc(GL_ZERO, GL_ONE);
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, texturenumsurfaces, texturesurfacelist);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                        if (rsurface.batchvertex3fbuffer)
                                R_Mesh_PrepareVertices_Vertex3f(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchvertex3fbuffer);
                        else
@@ -10262,7 +10750,7 @@ static void R_DrawTextureSurfaceList_ShowSurfaces(int texturenumsurfaces, const
        {
                RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, texturenumsurfaces, texturesurfacelist);
                batchvertex = R_Mesh_PrepareVertices_Generic_Lock(rsurface.batchnumvertices);
-               for (j = 0, vi = rsurface.batchfirstvertex;j < rsurface.batchnumvertices;j++, vi++)
+               for (j = 0, vi = 0;j < rsurface.batchnumvertices;j++, vi++)
                {
                        VectorCopy(rsurface.batchvertex3f + 3*vi, batchvertex[vi].vertex3f);
                        Vector4Set(batchvertex[vi].color4f, 0, 0, 0, 1);
@@ -10274,7 +10762,7 @@ static void R_DrawTextureSurfaceList_ShowSurfaces(int texturenumsurfaces, const
        {
                RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, texturenumsurfaces, texturesurfacelist);
                batchvertex = R_Mesh_PrepareVertices_Generic_Lock(rsurface.batchnumvertices);
-               for (j = 0, vi = rsurface.batchfirstvertex;j < rsurface.batchnumvertices;j++, vi++)
+               for (j = 0, vi = 0;j < rsurface.batchnumvertices;j++, vi++)
                {
                        unsigned char c = (vi << 3) * (1.0f / 256.0f);
                        VectorCopy(rsurface.batchvertex3f + 3*vi, batchvertex[vi].vertex3f);
@@ -10457,10 +10945,10 @@ static void R_DrawSurface_TransparentCallback(const entity_render_t *ent, const
                                GL_BlendFunc(GL_ONE, GL_ZERO);
                                GL_DepthMask(true);
 //                             R_Mesh_ResetTextureState();
-                               R_SetupShader_DepthOrShadow(false, false);
                        }
                        RSurf_SetupDepthAndCulling();
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX, texturenumsurfaces, texturesurfacelist);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
+                       R_SetupShader_DepthOrShadow(false, false, !!rsurface.batchskeletaltransform3x4);
                        if (rsurface.batchvertex3fbuffer)
                                R_Mesh_PrepareVertices_Vertex3f(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchvertex3fbuffer);
                        else
@@ -10555,11 +11043,12 @@ static void R_DrawTextureSurfaceList_DepthOnly(int texturenumsurfaces, const msu
        if (r_fb.water.renderingscene && (rsurface.texture->currentmaterialflags & (MATERIALFLAG_WATERSHADER | MATERIALFLAG_REFLECTION)))
                return;
        RSurf_SetupDepthAndCulling();
-       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX, texturenumsurfaces, texturesurfacelist);
+       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
        if (rsurface.batchvertex3fbuffer)
                R_Mesh_PrepareVertices_Vertex3f(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchvertex3fbuffer);
        else
                R_Mesh_PrepareVertices_Vertex3f(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchvertex3f_vertexbuffer);
+       R_SetupShader_DepthOrShadow(false, false, !!rsurface.batchskeletaltransform3x4);
        RSurf_DrawBatch();
 }
 
@@ -11390,7 +11879,7 @@ static void R_DrawModelDecals_Entity(entity_render_t *ent)
 
        if (numtris > 0)
        {
-               r_refdef.stats.drawndecals += numtris;
+               r_refdef.stats[r_stat_drawndecals] += numtris;
 
                // now render the decals all at once
                // (this assumes they all use one particle font texture!)
@@ -11428,7 +11917,7 @@ static void R_DrawModelDecals(void)
        for (i = 0;i < r_refdef.scene.numentities;i++)
                numdecals += r_refdef.scene.entities[i]->decalsystem.numdecals;
 
-       r_refdef.stats.totaldecals += numdecals;
+       r_refdef.stats[r_stat_totaldecals] += numdecals;
 
        if (r_showsurfaces.integer)
                return;
@@ -11748,9 +12237,9 @@ void R_DrawWorldSurfaces(qboolean skysurfaces, qboolean writedepth, qboolean dep
        // add to stats if desired
        if (r_speeds.integer && !skysurfaces && !depthonly)
        {
-               r_refdef.stats.world_surfaces += numsurfacelist;
+               r_refdef.stats[r_stat_world_surfaces] += numsurfacelist;
                for (j = 0;j < numsurfacelist;j++)
-                       r_refdef.stats.world_triangles += r_surfacelist[j]->num_triangles;
+                       r_refdef.stats[r_stat_world_triangles] += r_surfacelist[j]->num_triangles;
        }
 
        rsurface.entity = NULL; // used only by R_GetCurrentTexture and RSurf_ActiveWorldEntity/RSurf_ActiveModelEntity
@@ -11884,9 +12373,9 @@ void R_DrawModelSurfaces(entity_render_t *ent, qboolean skysurfaces, qboolean wr
        // add to stats if desired
        if (r_speeds.integer && !skysurfaces && !depthonly)
        {
-               r_refdef.stats.entities_surfaces += numsurfacelist;
+               r_refdef.stats[r_stat_entities_surfaces] += numsurfacelist;
                for (j = 0;j < numsurfacelist;j++)
-                       r_refdef.stats.entities_triangles += r_surfacelist[j]->num_triangles;
+                       r_refdef.stats[r_stat_entities_triangles] += r_surfacelist[j]->num_triangles;
        }
 
        rsurface.entity = NULL; // used only by R_GetCurrentTexture and RSurf_ActiveWorldEntity/RSurf_ActiveModelEntity