]> git.xonotic.org Git - xonotic/darkplaces.git/blobdiff - gl_rmain.c
implemented a fallback case for r_glsl_skeletal 1 when dynamicvertex
[xonotic/darkplaces.git] / gl_rmain.c
index 6d9147a7f56211804220e59c2228c48ea694d830..7a10c491c4186865e5c8fa85ac3b558e086776e5 100644 (file)
@@ -162,6 +162,7 @@ cvar_t r_viewscale_fpsscaling_stepsize = {CVAR_SAVE, "r_viewscale_fpsscaling_ste
 cvar_t r_viewscale_fpsscaling_stepmax = {CVAR_SAVE, "r_viewscale_fpsscaling_stepmax", "1.00", "largest adjustment to hit the target framerate (this value prevents wild overshooting of the estimate)"};
 cvar_t r_viewscale_fpsscaling_target = {CVAR_SAVE, "r_viewscale_fpsscaling_target", "70", "desired framerate"};
 
+cvar_t r_glsl_skeletal = {CVAR_SAVE, "r_glsl_skeletal", "1", "render skeletal models faster using a gpu-skinning technique"};
 cvar_t r_glsl_deluxemapping = {CVAR_SAVE, "r_glsl_deluxemapping", "1", "use per pixel lighting on deluxemap-compiled q3bsp maps (or a value of 2 forces deluxemap shading even without deluxemaps)"};
 cvar_t r_glsl_offsetmapping = {CVAR_SAVE, "r_glsl_offsetmapping", "0", "offset mapping effect (also known as parallax mapping or virtual displacement mapping)"};
 cvar_t r_glsl_offsetmapping_steps = {CVAR_SAVE, "r_glsl_offsetmapping_steps", "2", "offset mapping steps (note: too high values may be not supported by your GPU)"};
@@ -225,6 +226,10 @@ cvar_t gl_lightmaps = {0, "gl_lightmaps", "0", "draws only lightmaps, no texture
 
 cvar_t r_test = {0, "r_test", "0", "internal development use only, leave it alone (usually does nothing anyway)"};
 
+cvar_t r_batch_multidraw = {CVAR_SAVE, "r_batch_multidraw", "1", "issue multiple glDrawElements calls when rendering a batch of surfaces with the same texture (otherwise the index data is copied to make it one draw)"};
+cvar_t r_batch_multidraw_mintriangles = {CVAR_SAVE, "r_batch_multidraw_mintriangles", "0", "minimum number of triangles to activate multidraw path (copying small groups of triangles may be faster)"};
+cvar_t r_batch_debugdynamicvertexpath = {CVAR_SAVE, "r_batch_debugdynamicvertexpath", "0", "force the dynamic batching code path for debugging purposes"};
+
 cvar_t r_glsl_saturation = {CVAR_SAVE, "r_glsl_saturation", "1", "saturation multiplier (only working in glsl!)"};
 cvar_t r_glsl_saturation_redcompensate = {CVAR_SAVE, "r_glsl_saturation_redcompensate", "0", "a 'vampire sight' addition to desaturation effect, does compensation for red color, r_glsl_restart is required"};
 
@@ -659,7 +664,8 @@ shaderpermutationinfo_t shaderpermutationinfo[SHADERPERMUTATION_COUNT] =
        {"#define USEBOUNCEGRIDDIRECTIONAL\n", " bouncegriddirectional"}, // TODO make this a static parm
        {"#define USETRIPPY\n", " trippy"},
        {"#define USEDEPTHRGB\n", " depthrgb"},
-       {"#define USEALPHAGENVERTEX\n", "alphagenvertex"}
+       {"#define USEALPHAGENVERTEX\n", " alphagenvertex"},
+       {"#define USESKELETAL\n", " skeletal"}
 };
 
 // NOTE: MUST MATCH ORDER OF SHADERMODE_* ENUMS!
@@ -818,6 +824,7 @@ typedef struct r_glsl_permutation_s
        int loc_ShadowMap_Parameters;
        int loc_ShadowMap_TextureScale;
        int loc_SpecularPower;
+       int loc_Skeletal_Transform12;
        int loc_UserVec1;
        int loc_UserVec2;
        int loc_UserVec3;
@@ -1165,6 +1172,7 @@ static void R_GLSL_CompilePermutation(r_glsl_permutation_t *p, unsigned int mode
                p->loc_ShadowMap_Parameters       = qglGetUniformLocation(p->program, "ShadowMap_Parameters");
                p->loc_ShadowMap_TextureScale     = qglGetUniformLocation(p->program, "ShadowMap_TextureScale");
                p->loc_SpecularPower              = qglGetUniformLocation(p->program, "SpecularPower");
+               p->loc_Skeletal_Transform12       = qglGetUniformLocation(p->program, "Skeletal_Transform12");
                p->loc_UserVec1                   = qglGetUniformLocation(p->program, "UserVec1");
                p->loc_UserVec2                   = qglGetUniformLocation(p->program, "UserVec2");
                p->loc_UserVec3                   = qglGetUniformLocation(p->program, "UserVec3");
@@ -2529,7 +2537,7 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
        {
        case RENDERPATH_D3D9:
 #ifdef SUPPORTD3D
-               RSurf_PrepareVerticesForBatch(BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_VERTEXMESH_VERTEXCOLOR : 0) | BATCHNEED_VERTEXMESH_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_VERTEXMESH_LIGHTMAP : 0), texturenumsurfaces, texturesurfacelist);
+               RSurf_PrepareVerticesForBatch(BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_VERTEXMESH_VERTEXCOLOR : 0) | BATCHNEED_VERTEXMESH_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_VERTEXMESH_LIGHTMAP : 0) | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                R_Mesh_PrepareVertices_Mesh(rsurface.batchnumvertices, rsurface.batchvertexmesh, rsurface.batchvertexmeshbuffer);
                R_SetupShader_SetPermutationHLSL(mode, permutation);
                Matrix4x4_ToArrayFloatGL(&rsurface.matrix, m16f);hlslPSSetParameter16f(D3DPSREGISTER_ModelToReflectCube, m16f);
@@ -2682,7 +2690,7 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
        case RENDERPATH_GLES2:
                if (!vid.useinterleavedarrays)
                {
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_ARRAY_VERTEXCOLOR : 0) | BATCHNEED_ARRAY_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_ARRAY_LIGHTMAP : 0), texturenumsurfaces, texturesurfacelist);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_ARRAY_VERTEXCOLOR : 0) | BATCHNEED_ARRAY_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_ARRAY_LIGHTMAP : 0) | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                        R_Mesh_VertexPointer(     3, GL_FLOAT, sizeof(float[3]), rsurface.batchvertex3f, rsurface.batchvertex3f_vertexbuffer, rsurface.batchvertex3f_bufferoffset);
                        R_Mesh_ColorPointer(      4, GL_FLOAT, sizeof(float[4]), rsurface.batchlightmapcolor4f, rsurface.batchlightmapcolor4f_vertexbuffer, rsurface.batchlightmapcolor4f_bufferoffset);
                        R_Mesh_TexCoordPointer(0, 2, GL_FLOAT, sizeof(float[2]), rsurface.batchtexcoordtexture2f, rsurface.batchtexcoordtexture2f_vertexbuffer, rsurface.batchtexcoordtexture2f_bufferoffset);
@@ -2690,12 +2698,18 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
                        R_Mesh_TexCoordPointer(2, 3, GL_FLOAT, sizeof(float[3]), rsurface.batchtvector3f, rsurface.batchtvector3f_vertexbuffer, rsurface.batchtvector3f_bufferoffset);
                        R_Mesh_TexCoordPointer(3, 3, GL_FLOAT, sizeof(float[3]), rsurface.batchnormal3f, rsurface.batchnormal3f_vertexbuffer, rsurface.batchnormal3f_bufferoffset);
                        R_Mesh_TexCoordPointer(4, 2, GL_FLOAT, sizeof(float[2]), rsurface.batchtexcoordlightmap2f, rsurface.batchtexcoordlightmap2f_vertexbuffer, rsurface.batchtexcoordlightmap2f_bufferoffset);
+                       R_Mesh_TexCoordPointer(5, 2, GL_FLOAT, sizeof(float[2]), NULL, NULL, 0);
+                       R_Mesh_TexCoordPointer(6, 4, GL_UNSIGNED_BYTE | 0x80000000, sizeof(unsigned char[4]), rsurface.batchskeletalindex4ub, rsurface.batchskeletalindex4ub_vertexbuffer, rsurface.batchskeletalindex4ub_bufferoffset);
+                       R_Mesh_TexCoordPointer(7, 4, GL_UNSIGNED_BYTE, sizeof(unsigned char[4]), rsurface.batchskeletalweight4ub, rsurface.batchskeletalweight4ub_vertexbuffer, rsurface.batchskeletalweight4ub_bufferoffset);
                }
                else
                {
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_VERTEXMESH_VERTEXCOLOR : 0) | BATCHNEED_VERTEXMESH_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_VERTEXMESH_LIGHTMAP : 0), texturenumsurfaces, texturesurfacelist);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_VERTEXMESH_VERTEXCOLOR : 0) | BATCHNEED_VERTEXMESH_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_VERTEXMESH_LIGHTMAP : 0) | (rsurface.entityskeletaltransform3x4 ? BATCHNEED_VERTEXMESH_SKELETAL : 0) | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                        R_Mesh_PrepareVertices_Mesh(rsurface.batchnumvertices, rsurface.batchvertexmesh, rsurface.batchvertexmeshbuffer);
                }
+               // this has to be after RSurf_PrepareVerticesForBatch
+               if (rsurface.batchskeletaltransform3x4)
+                       permutation |= SHADERPERMUTATION_SKELETAL;
                R_SetupShader_SetPermutationGLSL(mode, permutation);
                if (r_glsl_permutation->loc_ModelToReflectCube >= 0) {Matrix4x4_ToArrayFloatGL(&rsurface.matrix, m16f);qglUniformMatrix4fv(r_glsl_permutation->loc_ModelToReflectCube, 1, false, m16f);}
                if (mode == SHADERMODE_LIGHTSOURCE)
@@ -2837,6 +2851,8 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
                        }
                }
                if (r_glsl_permutation->tex_Texture_BounceGrid  >= 0) R_Mesh_TexBind(r_glsl_permutation->tex_Texture_BounceGrid, r_shadow_bouncegridtexture);
+               if (r_glsl_permutation->loc_Skeletal_Transform12 >= 0 && rsurface.batchskeletalnumtransforms > 0)
+                       qglUniform4fv(r_glsl_permutation->loc_Skeletal_Transform12, rsurface.batchskeletalnumtransforms*3, rsurface.batchskeletaltransform3x4);
                CHECKGLERROR
                break;
        case RENDERPATH_GL11:
@@ -2844,7 +2860,7 @@ void R_SetupShader_Surface(const vec3_t lightcolorbase, qboolean modellighting,
        case RENDERPATH_GLES1:
                break;
        case RENDERPATH_SOFT:
-               RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_ARRAY_VERTEXCOLOR : 0) | BATCHNEED_ARRAY_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_ARRAY_LIGHTMAP : 0), texturenumsurfaces, texturesurfacelist);
+               RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | (rsurface.modellightmapcolor4f ? BATCHNEED_ARRAY_VERTEXCOLOR : 0) | BATCHNEED_ARRAY_TEXCOORD | (rsurface.uselightmaptexture ? BATCHNEED_ARRAY_LIGHTMAP : 0) | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                R_Mesh_PrepareVertices_Mesh_Arrays(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchsvector3f, rsurface.batchtvector3f, rsurface.batchnormal3f, rsurface.batchlightmapcolor4f, rsurface.batchtexcoordtexture2f, rsurface.batchtexcoordlightmap2f);
                R_SetupShader_SetPermutationSoft(mode, permutation);
                {Matrix4x4_ToArrayFloatGL(&rsurface.matrix, m16f);DPSOFTRAST_UniformMatrix4fv(DPSOFTRAST_UNIFORM_ModelToReflectCubeM1, 1, false, m16f);}
@@ -4311,6 +4327,10 @@ void GL_Main_Init(void)
        Cvar_RegisterVariable(&developer_texturelogging);
        Cvar_RegisterVariable(&gl_lightmaps);
        Cvar_RegisterVariable(&r_test);
+       Cvar_RegisterVariable(&r_batch_multidraw);
+       Cvar_RegisterVariable(&r_batch_multidraw_mintriangles);
+       Cvar_RegisterVariable(&r_batch_debugdynamicvertexpath);
+       Cvar_RegisterVariable(&r_glsl_skeletal);
        Cvar_RegisterVariable(&r_glsl_saturation);
        Cvar_RegisterVariable(&r_glsl_saturation_redcompensate);
        Cvar_RegisterVariable(&r_glsl_vertextextureblend_usebothalphas);
@@ -4611,6 +4631,7 @@ void R_AnimCache_ClearCache(void)
                ent->animcache_vertexmesh = NULL;
                ent->animcache_vertex3fbuffer = NULL;
                ent->animcache_vertexmeshbuffer = NULL;
+               ent->animcache_skeletaltransform3x4 = NULL;
        }
 }
 
@@ -4647,6 +4668,106 @@ qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qbool
 {
        dp_model_t *model = ent->model;
        int numvertices;
+
+       // cache skeletal animation data first (primarily for gpu-skinning)
+       if (!ent->animcache_skeletaltransform3x4 && model->num_bones > 0 && model->surfmesh.data_skeletalindex4ub)
+       {
+               int i;
+               int blends;
+               const skeleton_t *skeleton = ent->skeleton;
+               const frameblend_t *frameblend = ent->frameblend;
+               float *boneposerelative;
+               float m[12];
+               static float bonepose[256][12];
+               ent->animcache_skeletaltransform3x4 = R_FrameData_Alloc(sizeof(float[3][4]) * model->num_bones);
+               boneposerelative = ent->animcache_skeletaltransform3x4;
+               if (skeleton && !skeleton->relativetransforms)
+                       skeleton = NULL;
+               // resolve hierarchy and make relative transforms (deforms) which the shader wants
+               if (skeleton)
+               {
+                       for (i = 0;i < model->num_bones;i++)
+                       {
+                               Matrix4x4_ToArray12FloatD3D(&skeleton->relativetransforms[i], m);
+                               if (model->data_bones[i].parent >= 0)
+                                       R_ConcatTransforms(bonepose[model->data_bones[i].parent], m, bonepose[i]);
+                               else
+                                       memcpy(bonepose[i], m, sizeof(m));
+
+                               // create a relative deformation matrix to describe displacement
+                               // from the base mesh, which is used by the actual weighting
+                               R_ConcatTransforms(bonepose[i], model->data_baseboneposeinverse + i * 12, boneposerelative + i * 12);
+                       }
+               }
+               else
+               {
+                       for (i = 0;i < model->num_bones;i++)
+                       {
+                               const short * RESTRICT pose7s = model->data_poses7s + 7 * (frameblend[0].subframe * model->num_bones + i);
+                               float lerp = frameblend[0].lerp,
+                                       tx = pose7s[0], ty = pose7s[1], tz = pose7s[2],
+                                       rx = pose7s[3] * lerp,
+                                       ry = pose7s[4] * lerp,
+                                       rz = pose7s[5] * lerp,
+                                       rw = pose7s[6] * lerp,
+                                       dx = tx*rw + ty*rz - tz*ry,
+                                       dy = -tx*rz + ty*rw + tz*rx,
+                                       dz = tx*ry - ty*rx + tz*rw,
+                                       dw = -tx*rx - ty*ry - tz*rz,
+                                       scale, sx, sy, sz, sw;
+                               for (blends = 1;blends < MAX_FRAMEBLENDS && frameblend[blends].lerp > 0;blends++)
+                               {
+                                       const short * RESTRICT pose7s = model->data_poses7s + 7 * (frameblend[blends].subframe * model->num_bones + i);
+                                       float lerp = frameblend[blends].lerp,
+                                               tx = pose7s[0], ty = pose7s[1], tz = pose7s[2],
+                                               qx = pose7s[3], qy = pose7s[4], qz = pose7s[5], qw = pose7s[6];
+                                       if(rx*qx + ry*qy + rz*qz + rw*qw < 0) lerp = -lerp;
+                                       qx *= lerp;
+                                       qy *= lerp;
+                                       qz *= lerp;
+                                       qw *= lerp;
+                                       rx += qx;
+                                       ry += qy;
+                                       rz += qz;
+                                       rw += qw;
+                                       dx += tx*qw + ty*qz - tz*qy;
+                                       dy += -tx*qz + ty*qw + tz*qx;
+                                       dz += tx*qy - ty*qx + tz*qw;
+                                       dw += -tx*qx - ty*qy - tz*qz;
+                               }
+                               scale = 1.0f / (rx*rx + ry*ry + rz*rz + rw*rw);
+                               sx = rx * scale;
+                               sy = ry * scale;
+                               sz = rz * scale;
+                               sw = rw * scale;
+                               m[0] = sw*rw + sx*rx - sy*ry - sz*rz;
+                               m[1] = 2*(sx*ry - sw*rz);
+                               m[2] = 2*(sx*rz + sw*ry);
+                               m[3] = model->num_posescale*(dx*sw - dy*sz + dz*sy - dw*sx);
+                               m[4] = 2*(sx*ry + sw*rz);
+                               m[5] = sw*rw + sy*ry - sx*rx - sz*rz;
+                               m[6] = 2*(sy*rz - sw*rx);
+                               m[7] = model->num_posescale*(dx*sz + dy*sw - dz*sx - dw*sy);
+                               m[8] = 2*(sx*rz - sw*ry);
+                               m[9] = 2*(sy*rz + sw*rx);
+                               m[10] = sw*rw + sz*rz - sx*rx - sy*ry;
+                               m[11] = model->num_posescale*(dy*sx + dz*sw - dx*sy - dw*sz);
+                               if (i == r_skeletal_debugbone.integer)
+                                       m[r_skeletal_debugbonecomponent.integer % 12] += r_skeletal_debugbonevalue.value;
+                               m[3] *= r_skeletal_debugtranslatex.value;
+                               m[7] *= r_skeletal_debugtranslatey.value;
+                               m[11] *= r_skeletal_debugtranslatez.value;
+                               if (model->data_bones[i].parent >= 0)
+                                       R_ConcatTransforms(bonepose[model->data_bones[i].parent], m, bonepose[i]);
+                               else
+                                       memcpy(bonepose[i], m, sizeof(m));
+                               // create a relative deformation matrix to describe displacement
+                               // from the base mesh, which is used by the actual weighting
+                               R_ConcatTransforms(bonepose[i], model->data_baseboneposeinverse + i * 12, boneposerelative + i * 12);
+                       }
+               }
+       }
+
        // see if it's already cached this frame
        if (ent->animcache_vertex3f)
        {
@@ -4677,6 +4798,24 @@ qboolean R_AnimCache_GetEntity(entity_render_t *ent, qboolean wantnormals, qbool
                // see if this ent is worth caching
                if (!model || !model->Draw || !model->surfmesh.isanimated || !model->AnimateVertices)
                        return false;
+               // skip entity if the shader backend has a cheaper way
+               if (model->surfmesh.data_skeletalindex4ub && r_glsl_skeletal.integer)
+               {
+                       switch (vid.renderpath)
+                       {
+                       case RENDERPATH_GL20:
+                               return false;
+                       case RENDERPATH_GL11:
+                       case RENDERPATH_GL13:
+                       case RENDERPATH_GLES1:
+                       case RENDERPATH_GLES2:
+                       case RENDERPATH_D3D9:
+                       case RENDERPATH_D3D10:
+                       case RENDERPATH_D3D11:
+                       case RENDERPATH_SOFT:
+                               break;
+                       }
+               }
                // get some memory for this entity and generate mesh data
                numvertices = model->surfmesh.num_vertices;
                ent->animcache_vertex3f = (float *)R_FrameData_Alloc(sizeof(float[3])*numvertices);
@@ -5510,7 +5649,7 @@ static void R_Water_StartFrame(void)
        int i;
        int waterwidth, waterheight, texturewidth, textureheight, camerawidth, cameraheight;
        r_waterstate_waterplane_t *p;
-       qboolean usewaterfbo = (r_viewfbo.integer >= 1 || r_water_fbo.integer >= 1) && vid.support.ext_framebuffer_object && vid.samples < 2;
+       qboolean usewaterfbo = (r_viewfbo.integer >= 1 || r_water_fbo.integer >= 1) && vid.support.ext_framebuffer_object && vid.support.arb_texture_non_power_of_two && vid.samples < 2;
 
        if (vid.width > (int)vid.maxtexturesize_2d || vid.height > (int)vid.maxtexturesize_2d)
                return;
@@ -5724,7 +5863,7 @@ static void R_Water_ProcessPlanes(int fbo, rtexture_t *depthtexture, rtexture_t
        int planeindex, qualityreduction = 0, old_r_dynamic = 0, old_r_shadows = 0, old_r_worldrtlight = 0, old_r_dlight = 0, old_r_particles = 0, old_r_decals = 0;
        r_waterstate_waterplane_t *p;
        vec3_t visorigin;
-       qboolean usewaterfbo = (r_viewfbo.integer >= 1 || r_water_fbo.integer >= 1) && vid.support.ext_framebuffer_object && vid.samples < 2;
+       qboolean usewaterfbo = (r_viewfbo.integer >= 1 || r_water_fbo.integer >= 1) && vid.support.ext_framebuffer_object && vid.support.arb_texture_non_power_of_two && vid.samples < 2;
        char vabuf[1024];
 
        originalview = r_refdef.view;
@@ -5996,14 +6135,14 @@ static void R_Bloom_StartFrame(void)
        int i;
        int bloomtexturewidth, bloomtextureheight, screentexturewidth, screentextureheight;
        int viewwidth, viewheight;
-       qboolean useviewfbo = r_viewfbo.integer >= 1 && vid.support.ext_framebuffer_object && vid.samples < 2;
+       qboolean useviewfbo = r_viewfbo.integer >= 1 && vid.support.ext_framebuffer_object && vid.support.arb_texture_non_power_of_two && vid.samples < 2;
        textype_t textype = TEXTYPE_COLORBUFFER;
 
        switch (vid.renderpath)
        {
        case RENDERPATH_GL20:
                r_fb.usedepthtextures = r_usedepthtextures.integer != 0;
-               if (vid.support.ext_framebuffer_object)
+               if (vid.support.ext_framebuffer_object && vid.support.arb_texture_non_power_of_two)
                {
                        if (r_viewfbo.integer == 2) textype = TEXTYPE_COLORBUFFER16F;
                        if (r_viewfbo.integer == 3) textype = TEXTYPE_COLORBUFFER32F;
@@ -8120,6 +8259,8 @@ void RSurf_ActiveWorldEntity(void)
        rsurface.ent_alttextures = false;
        rsurface.basepolygonfactor = r_refdef.polygonfactor;
        rsurface.basepolygonoffset = r_refdef.polygonoffset;
+       rsurface.entityskeletaltransform3x4 = NULL;
+       rsurface.entityskeletalnumtransforms = 0;
        rsurface.modelvertex3f  = model->surfmesh.data_vertex3f;
        rsurface.modelvertex3f_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
        rsurface.modelvertex3f_bufferoffset = model->surfmesh.vbooffset_vertex3f;
@@ -8141,6 +8282,12 @@ void RSurf_ActiveWorldEntity(void)
        rsurface.modeltexcoordlightmap2f  = model->surfmesh.data_texcoordlightmap2f;
        rsurface.modeltexcoordlightmap2f_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
        rsurface.modeltexcoordlightmap2f_bufferoffset = model->surfmesh.vbooffset_texcoordlightmap2f;
+       rsurface.modelskeletalindex4ub = model->surfmesh.data_skeletalindex4ub;
+       rsurface.modelskeletalindex4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalindex4ub_bufferoffset = model->surfmesh.vbooffset_skeletalindex4ub;
+       rsurface.modelskeletalweight4ub = model->surfmesh.data_skeletalweight4ub;
+       rsurface.modelskeletalweight4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalweight4ub_bufferoffset = model->surfmesh.vbooffset_skeletalweight4ub;
        rsurface.modelelement3i = model->surfmesh.data_element3i;
        rsurface.modelelement3i_indexbuffer = model->surfmesh.data_element3i_indexbuffer;
        rsurface.modelelement3i_bufferoffset = model->surfmesh.data_element3i_bufferoffset;
@@ -8181,6 +8328,12 @@ void RSurf_ActiveWorldEntity(void)
        rsurface.batchtexcoordlightmap2f = NULL;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
        rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+       rsurface.batchskeletalindex4ub = NULL;
+       rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalindex4ub_bufferoffset = 0;
+       rsurface.batchskeletalweight4ub = NULL;
+       rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalweight4ub_bufferoffset = 0;
        rsurface.batchvertexmesh = NULL;
        rsurface.batchvertexmeshbuffer = NULL;
        rsurface.batchvertex3fbuffer = NULL;
@@ -8236,7 +8389,10 @@ void RSurf_ActiveModelEntity(const entity_render_t *ent, qboolean wantnormals, q
                rsurface.basepolygonfactor += r_polygonoffset_submodel_factor.value;
                rsurface.basepolygonoffset += r_polygonoffset_submodel_offset.value;
        }
-       if (model->surfmesh.isanimated && model->AnimateVertices)
+       // if the animcache code decided it should use the shader path, skip the deform step
+       rsurface.entityskeletaltransform3x4 = ent->animcache_vertex3f ? NULL : ent->animcache_skeletaltransform3x4;
+       rsurface.entityskeletalnumtransforms = rsurface.entityskeletaltransform3x4 ? model->num_bones : 0;
+       if (model->surfmesh.isanimated && model->AnimateVertices && !rsurface.entityskeletaltransform3x4)
        {
                if (ent->animcache_vertex3f)
                {
@@ -8319,6 +8475,12 @@ void RSurf_ActiveModelEntity(const entity_render_t *ent, qboolean wantnormals, q
        rsurface.modeltexcoordlightmap2f  = model->surfmesh.data_texcoordlightmap2f;
        rsurface.modeltexcoordlightmap2f_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
        rsurface.modeltexcoordlightmap2f_bufferoffset = model->surfmesh.vbooffset_texcoordlightmap2f;
+       rsurface.modelskeletalindex4ub = model->surfmesh.data_skeletalindex4ub;
+       rsurface.modelskeletalindex4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalindex4ub_bufferoffset = model->surfmesh.vbooffset_skeletalindex4ub;
+       rsurface.modelskeletalweight4ub = model->surfmesh.data_skeletalweight4ub;
+       rsurface.modelskeletalweight4ub_vertexbuffer = model->surfmesh.vbo_vertexbuffer;
+       rsurface.modelskeletalweight4ub_bufferoffset = model->surfmesh.vbooffset_skeletalweight4ub;
        rsurface.modelelement3i = model->surfmesh.data_element3i;
        rsurface.modelelement3i_indexbuffer = model->surfmesh.data_element3i_indexbuffer;
        rsurface.modelelement3i_bufferoffset = model->surfmesh.data_element3i_bufferoffset;
@@ -8355,6 +8517,12 @@ void RSurf_ActiveModelEntity(const entity_render_t *ent, qboolean wantnormals, q
        rsurface.batchtexcoordlightmap2f = NULL;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
        rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+       rsurface.batchskeletalindex4ub = NULL;
+       rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalindex4ub_bufferoffset = 0;
+       rsurface.batchskeletalweight4ub = NULL;
+       rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalweight4ub_bufferoffset = 0;
        rsurface.batchvertexmesh = NULL;
        rsurface.batchvertexmeshbuffer = NULL;
        rsurface.batchvertex3fbuffer = NULL;
@@ -8403,6 +8571,8 @@ void RSurf_ActiveCustomEntity(const matrix4x4_t *matrix, const matrix4x4_t *inve
        rsurface.ent_alttextures = false;
        rsurface.basepolygonfactor = r_refdef.polygonfactor;
        rsurface.basepolygonoffset = r_refdef.polygonoffset;
+       rsurface.entityskeletaltransform3x4 = NULL;
+       rsurface.entityskeletalnumtransforms = 0;
        if (wanttangents)
        {
                rsurface.modelvertex3f = (float *)vertex3f;
@@ -8445,6 +8615,12 @@ void RSurf_ActiveCustomEntity(const matrix4x4_t *matrix, const matrix4x4_t *inve
        rsurface.modeltexcoordlightmap2f  = NULL;
        rsurface.modeltexcoordlightmap2f_vertexbuffer = 0;
        rsurface.modeltexcoordlightmap2f_bufferoffset = 0;
+       rsurface.modelskeletalindex4ub = NULL;
+       rsurface.modelskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.modelskeletalindex4ub_bufferoffset = 0;
+       rsurface.modelskeletalweight4ub = NULL;
+       rsurface.modelskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.modelskeletalweight4ub_bufferoffset = 0;
        rsurface.modelelement3i = (int *)element3i;
        rsurface.modelelement3i_indexbuffer = NULL;
        rsurface.modelelement3i_bufferoffset = 0;
@@ -8479,6 +8655,12 @@ void RSurf_ActiveCustomEntity(const matrix4x4_t *matrix, const matrix4x4_t *inve
        rsurface.batchtexcoordlightmap2f = NULL;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
        rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+       rsurface.batchskeletalindex4ub = NULL;
+       rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalindex4ub_bufferoffset = 0;
+       rsurface.batchskeletalweight4ub = NULL;
+       rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+       rsurface.batchskeletalweight4ub_bufferoffset = 0;
        rsurface.batchvertexmesh = NULL;
        rsurface.batchvertexmeshbuffer = NULL;
        rsurface.batchvertex3fbuffer = NULL;
@@ -8574,6 +8756,7 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        float scale;
        float center[3], forward[3], right[3], up[3], v[3], newforward[3], newright[3], newup[3];
        float waveparms[4];
+       unsigned char *ub;
        q3shaderinfo_deform_t *deform;
        const msurface_t *surface, *firstsurface;
        r_vertexmesh_t *vertexmesh;
@@ -8616,11 +8799,14 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        // check if any dynamic vertex processing must occur
        dynamicvertex = false;
 
+       // a cvar to force the dynamic vertex path to be taken, for debugging
+       if (r_batch_debugdynamicvertexpath.integer)
+               dynamicvertex = true;
+
        // if there is a chance of animated vertex colors, it's a dynamic batch
        if ((batchneed & (BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_ARRAY_VERTEXCOLOR)) && texturesurfacelist[0]->lightmapinfo)
        {
                dynamicvertex = true;
-               batchneed |= BATCHNEED_NOGAPS;
                needsupdate |= BATCHNEED_VERTEXMESH_VERTEXCOLOR;
        }
 
@@ -8642,36 +8828,36 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                        break;
                case Q3DEFORM_AUTOSPRITE:
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_VECTOR | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_AUTOSPRITE2:
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_NORMAL:
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_WAVE:
                        if(!R_TestQ3WaveFunc(deform->wavefunc, deform->waveparms))
                                break; // if wavefunc is a nop, ignore this transform
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_BULGE:
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_ARRAY_TEXCOORD;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR;
                        break;
                case Q3DEFORM_MOVE:
                        if(!R_TestQ3WaveFunc(deform->wavefunc, deform->waveparms))
                                break; // if wavefunc is a nop, ignore this transform
                        dynamicvertex = true;
-                       batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS;
+                       batchneed |= BATCHNEED_ARRAY_VERTEX;
                        needsupdate |= BATCHNEED_VERTEXMESH_VERTEX;
                        break;
                }
@@ -8683,35 +8869,45 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                break;
        case Q3TCGEN_LIGHTMAP:
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_LIGHTMAP | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_LIGHTMAP;
                needsupdate |= BATCHNEED_VERTEXMESH_LIGHTMAP;
                break;
        case Q3TCGEN_VECTOR:
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_VERTEX;
                needsupdate |= BATCHNEED_VERTEXMESH_TEXCOORD;
                break;
        case Q3TCGEN_ENVIRONMENT:
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_NORMAL;
                needsupdate |= BATCHNEED_VERTEXMESH_TEXCOORD;
                break;
        }
        if (rsurface.texture->tcmods[0].tcmod == Q3TCMOD_TURBULENT)
        {
                dynamicvertex = true;
-               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD | BATCHNEED_NOGAPS;
+               batchneed |= BATCHNEED_ARRAY_VERTEX | BATCHNEED_ARRAY_TEXCOORD;
                needsupdate |= BATCHNEED_VERTEXMESH_TEXCOORD;
        }
 
        if (!rsurface.modelvertexmesh && (batchneed & (BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_VERTEXMESH_TEXCOORD | BATCHNEED_VERTEXMESH_LIGHTMAP)))
        {
                dynamicvertex = true;
-               batchneed |= BATCHNEED_NOGAPS;
                needsupdate |= (batchneed & (BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_VERTEXMESH_TEXCOORD | BATCHNEED_VERTEXMESH_LIGHTMAP));
        }
 
-       if (dynamicvertex || gaps || rsurface.batchfirstvertex)
+       // when the model data has no vertex buffer (dynamic mesh), we need to
+       // eliminate gaps
+       if (vid.useinterleavedarrays ? !rsurface.modelvertexmeshbuffer : !rsurface.modelvertex3f_vertexbuffer)
+               batchneed |= BATCHNEED_NOGAPS;
+
+       // the caller can specify BATCHNEED_NOGAPS to force a batch with
+       // firstvertex = 0 and endvertex = numvertices (no gaps, no firstvertex),
+       // we ensure this by treating the vertex batch as dynamic...
+       if ((batchneed & BATCHNEED_NOGAPS) && (gaps || firstvertex > 0))
+               dynamicvertex = true;
+
+       if (dynamicvertex)
        {
                // when copying, we need to consider the regeneration of vertexmesh, any dependencies it may have must be set...
                if (batchneed & BATCHNEED_VERTEXMESH_VERTEX)      batchneed |= BATCHNEED_ARRAY_VERTEX;
@@ -8720,13 +8916,9 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                if (batchneed & BATCHNEED_VERTEXMESH_VERTEXCOLOR) batchneed |= BATCHNEED_ARRAY_VERTEXCOLOR;
                if (batchneed & BATCHNEED_VERTEXMESH_TEXCOORD)    batchneed |= BATCHNEED_ARRAY_TEXCOORD;
                if (batchneed & BATCHNEED_VERTEXMESH_LIGHTMAP)    batchneed |= BATCHNEED_ARRAY_LIGHTMAP;
+               if (batchneed & BATCHNEED_VERTEXMESH_SKELETAL)    batchneed |= BATCHNEED_ARRAY_SKELETAL;
        }
 
-       // when the model data has no vertex buffer (dynamic mesh), we need to
-       // eliminate gaps
-       if (vid.useinterleavedarrays ? !rsurface.modelvertexmeshbuffer : !rsurface.modelvertex3f_vertexbuffer)
-               batchneed |= BATCHNEED_NOGAPS;
-
        // if needsupdate, we have to do a dynamic vertex batch for sure
        if (needsupdate & batchneed)
                dynamicvertex = true;
@@ -8735,10 +8927,9 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        if (!rsurface.modelvertexmesh && (batchneed & (BATCHNEED_VERTEXMESH_VERTEX | BATCHNEED_VERTEXMESH_NORMAL | BATCHNEED_VERTEXMESH_VECTOR | BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_VERTEXMESH_TEXCOORD | BATCHNEED_VERTEXMESH_LIGHTMAP)))
                dynamicvertex = true;
 
-       // if gaps are unacceptable, and there are gaps, it's a dynamic batch...
-       // also some drivers strongly dislike firstvertex
-       if ((batchneed & BATCHNEED_NOGAPS) && (gaps || firstvertex))
-               dynamicvertex = true;
+       // if we're going to have to apply the skeletal transform manually, we need to batch the skeletal data
+       if (dynamicvertex && rsurface.entityskeletaltransform3x4)
+               batchneed |= BATCHNEED_ARRAY_SKELETAL;
 
        rsurface.batchvertex3f = rsurface.modelvertex3f;
        rsurface.batchvertex3f_vertexbuffer = rsurface.modelvertex3f_vertexbuffer;
@@ -8761,6 +8952,12 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        rsurface.batchtexcoordlightmap2f = rsurface.modeltexcoordlightmap2f;
        rsurface.batchtexcoordlightmap2f_vertexbuffer = rsurface.modeltexcoordlightmap2f_vertexbuffer;
        rsurface.batchtexcoordlightmap2f_bufferoffset = rsurface.modeltexcoordlightmap2f_bufferoffset;
+       rsurface.batchskeletalindex4ub = rsurface.modelskeletalindex4ub;
+       rsurface.batchskeletalindex4ub_vertexbuffer = rsurface.modelskeletalindex4ub_vertexbuffer;
+       rsurface.batchskeletalindex4ub_bufferoffset = rsurface.modelskeletalindex4ub_bufferoffset;
+       rsurface.batchskeletalweight4ub = rsurface.modelskeletalweight4ub;
+       rsurface.batchskeletalweight4ub_vertexbuffer = rsurface.modelskeletalweight4ub_vertexbuffer;
+       rsurface.batchskeletalweight4ub_bufferoffset = rsurface.modelskeletalweight4ub_bufferoffset;
        rsurface.batchvertex3fbuffer = rsurface.modelvertex3fbuffer;
        rsurface.batchvertexmesh = rsurface.modelvertexmesh;
        rsurface.batchvertexmeshbuffer = rsurface.modelvertexmeshbuffer;
@@ -8770,6 +8967,8 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        rsurface.batchelement3s = rsurface.modelelement3s;
        rsurface.batchelement3s_indexbuffer = rsurface.modelelement3s_indexbuffer;
        rsurface.batchelement3s_bufferoffset = rsurface.modelelement3s_bufferoffset;
+       rsurface.batchskeletaltransform3x4 = rsurface.entityskeletaltransform3x4;
+       rsurface.batchskeletalnumtransforms = rsurface.entityskeletalnumtransforms;
 
        // if any dynamic vertex processing has to occur in software, we copy the
        // entire surface list together before processing to rebase the vertices
@@ -8779,10 +8978,15 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
        // copy the surface list together to avoid wasting upload bandwidth on the
        // vertices in the gaps.
        //
-       // if gaps exist and we have a static vertex buffer, we still have to
-       // combine the index buffer ranges into one dynamic index buffer.
+       // if gaps exist and we have a static vertex buffer, we can choose whether
+       // to combine the index buffer ranges into one dynamic index buffer or
+       // simply issue multiple glDrawElements calls (BATCHNEED_ALLOWMULTIDRAW).
        //
-       // in all cases we end up with data that can be drawn in one call.
+       // in many cases the batch is reduced to one draw call.
+
+       rsurface.batchmultidraw = false;
+       rsurface.batchmultidrawnumsurfaces = 0;
+       rsurface.batchmultidrawsurfacelist = NULL;
 
        if (!dynamicvertex)
        {
@@ -8792,6 +8996,13 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                // otherwise use the original static buffer with an appropriate offset
                if (gaps)
                {
+                       if ((batchneed & BATCHNEED_ALLOWMULTIDRAW) && r_batch_multidraw.integer && batchnumtriangles >= r_batch_multidraw_mintriangles.integer)
+                       {
+                               rsurface.batchmultidraw = true;
+                               rsurface.batchmultidrawnumsurfaces = texturenumsurfaces;
+                               rsurface.batchmultidrawsurfacelist = texturesurfacelist;
+                               return;
+                       }
                        // build a new triangle elements array for this batch
                        rsurface.batchelement3i = (int *)R_FrameData_Alloc(batchnumtriangles * sizeof(int[3]));
                        rsurface.batchfirsttriangle = 0;
@@ -8826,7 +9037,10 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
 
        // now copy the vertex data into a combined array and make an index array
        // (this is what Quake3 does all the time)
-       //if (gaps || rsurface.batchfirstvertex)
+       // we also apply any skeletal animation here that would have been done in
+       // the vertex shader, because most of the dynamic vertex animation cases
+       // need actual vertex positions and normals
+       //if (dynamicvertex)
        {
                rsurface.batchvertex3fbuffer = NULL;
                rsurface.batchvertexmesh = NULL;
@@ -8852,6 +9066,12 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                rsurface.batchtexcoordlightmap2f = NULL;
                rsurface.batchtexcoordlightmap2f_vertexbuffer = NULL;
                rsurface.batchtexcoordlightmap2f_bufferoffset = 0;
+               rsurface.batchskeletalindex4ub = NULL;
+               rsurface.batchskeletalindex4ub_vertexbuffer = NULL;
+               rsurface.batchskeletalindex4ub_bufferoffset = 0;
+               rsurface.batchskeletalweight4ub = NULL;
+               rsurface.batchskeletalweight4ub_vertexbuffer = NULL;
+               rsurface.batchskeletalweight4ub_bufferoffset = 0;
                rsurface.batchelement3i = (int *)R_FrameData_Alloc(batchnumtriangles * sizeof(int[3]));
                rsurface.batchelement3i_indexbuffer = NULL;
                rsurface.batchelement3i_bufferoffset = 0;
@@ -8876,6 +9096,11 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                        rsurface.batchtexcoordtexture2f = (float *)R_FrameData_Alloc(batchnumvertices * sizeof(float[2]));
                if (batchneed & BATCHNEED_ARRAY_LIGHTMAP)
                        rsurface.batchtexcoordlightmap2f = (float *)R_FrameData_Alloc(batchnumvertices * sizeof(float[2]));
+               if (batchneed & BATCHNEED_ARRAY_SKELETAL)
+               {
+                       rsurface.batchskeletalindex4ub = (unsigned char *)R_FrameData_Alloc(batchnumvertices * sizeof(unsigned char[4]));
+                       rsurface.batchskeletalweight4ub = (unsigned char *)R_FrameData_Alloc(batchnumvertices * sizeof(unsigned char[4]));
+               }
                numvertices = 0;
                numtriangles = 0;
                for (i = 0;i < texturenumsurfaces;i++)
@@ -8937,6 +9162,22 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                                        else
                                                memset(rsurface.batchtexcoordlightmap2f + 2*numvertices, 0, surfacenumvertices * sizeof(float[2]));
                                }
+                               if (batchneed & BATCHNEED_ARRAY_SKELETAL)
+                               {
+                                       if (rsurface.modelskeletalindex4ub)
+                                       {
+                                               memcpy(rsurface.batchskeletalindex4ub + 4*numvertices, rsurface.modelskeletalindex4ub + 4*surfacefirstvertex, surfacenumvertices * sizeof(unsigned char[4]));
+                                               memcpy(rsurface.batchskeletalweight4ub + 4*numvertices, rsurface.modelskeletalweight4ub + 4*surfacefirstvertex, surfacenumvertices * sizeof(unsigned char[4]));
+                                       }
+                                       else
+                                       {
+                                               memset(rsurface.batchskeletalindex4ub + 4*numvertices, 0, surfacenumvertices * sizeof(unsigned char[4]));
+                                               memset(rsurface.batchskeletalweight4ub + 4*numvertices, 0, surfacenumvertices * sizeof(unsigned char[4]));
+                                               ub = rsurface.batchskeletalweight4ub + 4*numvertices;
+                                               for (j = 0;j < surfacenumvertices;j++)
+                                                       ub[j*4] = 255;
+                                       }
+                               }
                        }
                        RSurf_RenumberElements(rsurface.modelelement3i + 3*surfacefirsttriangle, rsurface.batchelement3i + 3*numtriangles, 3*surfacenumtriangles, numvertices - surfacefirstvertex);
                        numvertices += surfacenumvertices;
@@ -8959,6 +9200,129 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                rsurface.batchnumtriangles = batchnumtriangles;
        }
 
+       // apply skeletal animation that would have been done in the vertex shader
+       if (rsurface.batchskeletaltransform3x4)
+       {
+               const unsigned char *si;
+               const unsigned char *sw;
+               const float *t[4];
+               const float *b = rsurface.batchskeletaltransform3x4;
+               float *vp, *vs, *vt, *vn;
+               float w[4];
+               float m[3][4], n[3][4];
+               float tp[3], ts[3], tt[3], tn[3];
+               si = rsurface.batchskeletalindex4ub;
+               sw = rsurface.batchskeletalweight4ub;
+               vp = rsurface.batchvertex3f;
+               vs = rsurface.batchsvector3f;
+               vt = rsurface.batchtvector3f;
+               vn = rsurface.batchnormal3f;
+               memset(m[0], 0, sizeof(m));
+               memset(n[0], 0, sizeof(n));
+               for (i = 0;i < batchnumvertices;i++)
+               {
+                       t[0] = b + si[0]*12;
+                       if (sw[0] == 255)
+                       {
+                               // common case - only one matrix
+                               m[0][0] = t[0][ 0];
+                               m[0][1] = t[0][ 1];
+                               m[0][2] = t[0][ 2];
+                               m[0][3] = t[0][ 3];
+                               m[1][0] = t[0][ 4];
+                               m[1][1] = t[0][ 5];
+                               m[1][2] = t[0][ 6];
+                               m[1][3] = t[0][ 7];
+                               m[2][0] = t[0][ 8];
+                               m[2][1] = t[0][ 9];
+                               m[2][2] = t[0][10];
+                               m[2][3] = t[0][11];
+                       }
+                       else if (sw[2] + sw[3])
+                       {
+                               // blend 4 matrices
+                               t[1] = b + si[1]*12;
+                               t[2] = b + si[2]*12;
+                               t[3] = b + si[3]*12;
+                               w[0] = sw[0] * (1.0f / 255.0f);
+                               w[1] = sw[1] * (1.0f / 255.0f);
+                               w[2] = sw[2] * (1.0f / 255.0f);
+                               w[3] = sw[3] * (1.0f / 255.0f);
+                               // blend the matrices
+                               m[0][0] = t[0][ 0] * w[0] + t[1][ 0] * w[1] + t[2][ 0] * w[2] + t[3][ 0] * w[3];
+                               m[0][1] = t[0][ 1] * w[0] + t[1][ 1] * w[1] + t[2][ 1] * w[2] + t[3][ 1] * w[3];
+                               m[0][2] = t[0][ 2] * w[0] + t[1][ 2] * w[1] + t[2][ 2] * w[2] + t[3][ 2] * w[3];
+                               m[0][3] = t[0][ 3] * w[0] + t[1][ 3] * w[1] + t[2][ 3] * w[2] + t[3][ 3] * w[3];
+                               m[1][0] = t[0][ 4] * w[0] + t[1][ 4] * w[1] + t[2][ 4] * w[2] + t[3][ 4] * w[3];
+                               m[1][1] = t[0][ 5] * w[0] + t[1][ 5] * w[1] + t[2][ 5] * w[2] + t[3][ 5] * w[3];
+                               m[1][2] = t[0][ 6] * w[0] + t[1][ 6] * w[1] + t[2][ 6] * w[2] + t[3][ 6] * w[3];
+                               m[1][3] = t[0][ 7] * w[0] + t[1][ 7] * w[1] + t[2][ 7] * w[2] + t[3][ 7] * w[3];
+                               m[2][0] = t[0][ 8] * w[0] + t[1][ 8] * w[1] + t[2][ 8] * w[2] + t[3][ 8] * w[3];
+                               m[2][1] = t[0][ 9] * w[0] + t[1][ 9] * w[1] + t[2][ 9] * w[2] + t[3][ 9] * w[3];
+                               m[2][2] = t[0][10] * w[0] + t[1][10] * w[1] + t[2][10] * w[2] + t[3][10] * w[3];
+                               m[2][3] = t[0][11] * w[0] + t[1][11] * w[1] + t[2][11] * w[2] + t[3][11] * w[3];
+                       }
+                       else
+                       {
+                               // blend 2 matrices
+                               t[1] = b + si[1]*12;
+                               w[0] = sw[0] * (1.0f / 255.0f);
+                               w[1] = sw[1] * (1.0f / 255.0f);
+                               // blend the matrices
+                               m[0][0] = t[0][ 0] * w[0] + t[1][ 0] * w[1];
+                               m[0][1] = t[0][ 1] * w[0] + t[1][ 1] * w[1];
+                               m[0][2] = t[0][ 2] * w[0] + t[1][ 2] * w[1];
+                               m[0][3] = t[0][ 3] * w[0] + t[1][ 3] * w[1];
+                               m[1][0] = t[0][ 4] * w[0] + t[1][ 4] * w[1];
+                               m[1][1] = t[0][ 5] * w[0] + t[1][ 5] * w[1];
+                               m[1][2] = t[0][ 6] * w[0] + t[1][ 6] * w[1];
+                               m[1][3] = t[0][ 7] * w[0] + t[1][ 7] * w[1];
+                               m[2][0] = t[0][ 8] * w[0] + t[1][ 8] * w[1];
+                               m[2][1] = t[0][ 9] * w[0] + t[1][ 9] * w[1];
+                               m[2][2] = t[0][10] * w[0] + t[1][10] * w[1];
+                               m[2][3] = t[0][11] * w[0] + t[1][11] * w[1];
+                       }
+                       si += 4;
+                       sw += 4;
+                       // modify the vertex
+                       VectorCopy(vp, tp);
+                       vp[0] = tp[0] * m[0][0] + tp[1] * m[0][1] + tp[2] * m[0][2] + m[0][3];
+                       vp[1] = tp[0] * m[1][0] + tp[1] * m[1][1] + tp[2] * m[1][2] + m[1][3];
+                       vp[2] = tp[0] * m[2][0] + tp[1] * m[2][1] + tp[2] * m[2][2] + m[2][3];
+                       vp += 3;
+                       if (vn)
+                       {
+                               // the normal transformation matrix is a set of cross products...
+                               CrossProduct(m[1], m[0], n[0]);
+                               CrossProduct(m[2], m[0], n[1]);
+                               CrossProduct(m[0], m[1], n[2]);
+                               VectorCopy(vn, tn);
+                               vn[0] = tn[0] * n[0][0] + tn[1] * n[0][1] + tn[2] * n[0][2];
+                               vn[1] = tn[0] * n[1][0] + tn[1] * n[1][1] + tn[2] * n[1][2];
+                               vn[2] = tn[0] * n[2][0] + tn[1] * n[2][1] + tn[2] * n[2][2];
+                               VectorNormalize(vn);
+                               vn += 3;
+                               if (vs)
+                               {
+                                       VectorCopy(vs, ts);
+                                       vs[0] = ts[0] * n[0][0] + ts[1] * n[0][1] + ts[2] * n[0][2];
+                                       vs[1] = ts[0] * n[1][0] + ts[1] * n[1][1] + ts[2] * n[1][2];
+                                       vs[2] = ts[0] * n[2][0] + ts[1] * n[2][1] + ts[2] * n[2][2];
+                                       VectorNormalize(vs);
+                                       vs += 3;
+                                       VectorCopy(vt, tt);
+                                       vt[0] = tt[0] * n[0][0] + tt[1] * n[0][1] + tt[2] * n[0][2];
+                                       vt[1] = tt[0] * n[1][0] + tt[1] * n[1][1] + tt[2] * n[1][2];
+                                       vt[2] = tt[0] * n[2][0] + tt[1] * n[2][1] + tt[2] * n[2][2];
+                                       VectorNormalize(vt);
+                                       vt += 3;
+                               }
+                       }
+               }
+               rsurface.batchskeletaltransform3x4 = NULL;
+               rsurface.batchskeletalnumtransforms = 0;
+       }
+
        // q1bsp surfaces rendered in vertex color mode have to have colors
        // calculated based on lightstyles
        if ((batchneed & (BATCHNEED_VERTEXMESH_VERTEXCOLOR | BATCHNEED_ARRAY_VERTEXCOLOR)) && texturesurfacelist[0]->lightmapinfo)
@@ -9400,6 +9764,14 @@ void RSurf_PrepareVerticesForBatch(int batchneed, int texturenumsurfaces, const
                if ((batchneed & BATCHNEED_VERTEXMESH_LIGHTMAP) && rsurface.batchtexcoordlightmap2f)
                        for (j = 0, vertexmesh = rsurface.batchvertexmesh;j < batchnumvertices;j++, vertexmesh++)
                                Vector2Copy(rsurface.batchtexcoordlightmap2f + 2*j, vertexmesh->texcoordlightmap2f);
+               if ((batchneed & BATCHNEED_VERTEXMESH_SKELETAL) && rsurface.batchskeletalindex4ub)
+               {
+                       for (j = 0, vertexmesh = rsurface.batchvertexmesh;j < batchnumvertices;j++, vertexmesh++)
+                       {
+                               Vector4Copy(rsurface.batchskeletalindex4ub + 4*j, vertexmesh->skeletalindex4ub);
+                               Vector4Copy(rsurface.batchskeletalweight4ub + 4*j, vertexmesh->skeletalweight4ub);
+                       }
+               }
        }
 }
 
@@ -9435,7 +9807,31 @@ void RSurf_DrawBatch(void)
                }
        }
 #endif
-       R_Mesh_Draw(rsurface.batchfirstvertex, rsurface.batchnumvertices, rsurface.batchfirsttriangle, rsurface.batchnumtriangles, rsurface.batchelement3i, rsurface.batchelement3i_indexbuffer, rsurface.batchelement3i_bufferoffset, rsurface.batchelement3s, rsurface.batchelement3s_indexbuffer, rsurface.batchelement3s_bufferoffset);
+       if (rsurface.batchmultidraw)
+       {
+               // issue multiple draws rather than copying index data
+               int numsurfaces = rsurface.batchmultidrawnumsurfaces;
+               const msurface_t **surfacelist = rsurface.batchmultidrawsurfacelist;
+               int i, j, k, firstvertex, endvertex, firsttriangle, endtriangle;
+               for (i = 0;i < numsurfaces;)
+               {
+                       // combine consecutive surfaces as one draw
+                       for (k = i, j = i + 1;j < numsurfaces;k = j, j++)
+                               if (surfacelist[j] != surfacelist[k] + 1)
+                                       break;
+                       firstvertex = surfacelist[i]->num_firstvertex;
+                       endvertex = surfacelist[k]->num_firstvertex + surfacelist[k]->num_vertices;
+                       firsttriangle = surfacelist[i]->num_firsttriangle;
+                       endtriangle = surfacelist[k]->num_firsttriangle + surfacelist[k]->num_triangles;
+                       R_Mesh_Draw(firstvertex, endvertex - firstvertex, firsttriangle, endtriangle - firsttriangle, rsurface.batchelement3i, rsurface.batchelement3i_indexbuffer, rsurface.batchelement3i_bufferoffset, rsurface.batchelement3s, rsurface.batchelement3s_indexbuffer, rsurface.batchelement3s_bufferoffset);
+                       i = j;
+               }
+       }
+       else
+       {
+               // there is only one consecutive run of index data (may have been combined)
+               R_Mesh_Draw(rsurface.batchfirstvertex, rsurface.batchnumvertices, rsurface.batchfirsttriangle, rsurface.batchnumtriangles, rsurface.batchelement3i, rsurface.batchelement3i_indexbuffer, rsurface.batchelement3i_bufferoffset, rsurface.batchelement3s, rsurface.batchelement3s_indexbuffer, rsurface.batchelement3s_bufferoffset);
+       }
 }
 
 static int RSurf_FindWaterPlaneForSurface(const msurface_t *surface)
@@ -9455,7 +9851,7 @@ static int RSurf_FindWaterPlaneForSurface(const msurface_t *surface)
                d = 0;
                if(!prepared)
                {
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, 1, &surface);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX, 1, &surface);
                        prepared = true;
                        if(rsurface.batchnumvertices == 0)
                                break;
@@ -9802,7 +10198,7 @@ static void R_DrawTextureSurfaceList_Sky(int texturenumsurfaces, const msurface_
                        // just to make sure that braindead drivers don't draw
                        // anything despite that colormask...
                        GL_BlendFunc(GL_ZERO, GL_ONE);
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, texturenumsurfaces, texturesurfacelist);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                        if (rsurface.batchvertex3fbuffer)
                                R_Mesh_PrepareVertices_Vertex3f(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchvertex3fbuffer);
                        else
@@ -10221,7 +10617,7 @@ static void R_DrawTextureSurfaceList_ShowSurfaces(int texturenumsurfaces, const
        {
                RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, texturenumsurfaces, texturesurfacelist);
                batchvertex = R_Mesh_PrepareVertices_Generic_Lock(rsurface.batchnumvertices);
-               for (j = 0, vi = rsurface.batchfirstvertex;j < rsurface.batchnumvertices;j++, vi++)
+               for (j = 0, vi = 0;j < rsurface.batchnumvertices;j++, vi++)
                {
                        VectorCopy(rsurface.batchvertex3f + 3*vi, batchvertex[vi].vertex3f);
                        Vector4Set(batchvertex[vi].color4f, 0, 0, 0, 1);
@@ -10233,7 +10629,7 @@ static void R_DrawTextureSurfaceList_ShowSurfaces(int texturenumsurfaces, const
        {
                RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_NOGAPS, texturenumsurfaces, texturesurfacelist);
                batchvertex = R_Mesh_PrepareVertices_Generic_Lock(rsurface.batchnumvertices);
-               for (j = 0, vi = rsurface.batchfirstvertex;j < rsurface.batchnumvertices;j++, vi++)
+               for (j = 0, vi = 0;j < rsurface.batchnumvertices;j++, vi++)
                {
                        unsigned char c = (vi << 3) * (1.0f / 256.0f);
                        VectorCopy(rsurface.batchvertex3f + 3*vi, batchvertex[vi].vertex3f);
@@ -10419,7 +10815,7 @@ static void R_DrawSurface_TransparentCallback(const entity_render_t *ent, const
                                R_SetupShader_DepthOrShadow(false, false);
                        }
                        RSurf_SetupDepthAndCulling();
-                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX, texturenumsurfaces, texturesurfacelist);
+                       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
                        if (rsurface.batchvertex3fbuffer)
                                R_Mesh_PrepareVertices_Vertex3f(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchvertex3fbuffer);
                        else
@@ -10514,7 +10910,7 @@ static void R_DrawTextureSurfaceList_DepthOnly(int texturenumsurfaces, const msu
        if (r_fb.water.renderingscene && (rsurface.texture->currentmaterialflags & (MATERIALFLAG_WATERSHADER | MATERIALFLAG_REFLECTION)))
                return;
        RSurf_SetupDepthAndCulling();
-       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX, texturenumsurfaces, texturesurfacelist);
+       RSurf_PrepareVerticesForBatch(BATCHNEED_ARRAY_VERTEX | BATCHNEED_ALLOWMULTIDRAW, texturenumsurfaces, texturesurfacelist);
        if (rsurface.batchvertex3fbuffer)
                R_Mesh_PrepareVertices_Vertex3f(rsurface.batchnumvertices, rsurface.batchvertex3f, rsurface.batchvertex3fbuffer);
        else