- edge0ylerp = (yc - screen[edge0p][1]) / (screen[edge0n][1] - screen[edge0p][1]);
- edge1ylerp = (yc - screen[edge1p][1]) / (screen[edge1n][1] - screen[edge1p][1]);
- if (edge0ylerp < 0 || edge0ylerp > 1 || edge1ylerp < 0 || edge1ylerp > 1)
- continue;
- edge0yilerp = 1.0f - edge0ylerp;
- edge1yilerp = 1.0f - edge1ylerp;
- edge0xf = screen[edge0p][0] * edge0yilerp + screen[edge0n][0] * edge0ylerp;
- edge1xf = screen[edge1p][0] * edge1yilerp + screen[edge1n][0] * edge1ylerp;
- if (edge0xf < edge1xf)
- {
- startxf = edge0xf;
- endxf = edge1xf;
- }
- else
- {
- startxf = edge1xf;
- endxf = edge0xf;
- }
- startx = (int)ceil(startxf);
- endx = (int)ceil(endxf);
- if (startx < 0)
- startx = 0;
- if (endx > width)
- endx = width;
- if (startx >= endx)
- continue;
- if (startxf > startx || endxf < endx-1) { printf("%s:%i X wrong (%i to %i is outside %f to %f)\n", __FILE__, __LINE__, startx, endx, startxf, endxf); }
- spanilength = 1.0f / (endxf - startxf);
- startxlerp = startx - startxf;
- span = &dpsoftrast.draw.spanqueue[dpsoftrast.draw.numspans++];
- memcpy(span->mip, mip, sizeof(span->mip));
- span->start = y * width + startx;
- span->length = endx - startx;
- {
- __m128 edge0ylerpm = _mm_set1_ps(edge0ylerp), edge0yilerpm = _mm_set1_ps(edge0yilerp),
- edge1ylerpm = _mm_set1_ps(edge1ylerp), edge1yilerpm = _mm_set1_ps(edge1yilerp),
- spanilengthm = _mm_set1_ps(spanilength), startxlerpm = _mm_set1_ps(startxlerp),
- data0, data1;
+ data0 = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(screen[edge0n], screen[edge0p]), edge0lerp), screen[edge0p]);
+ data1 = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(screen[edge1n], screen[edge1p]), edge1lerp), screen[edge1p]);
+ startx = _mm_cvtss_si32(_mm_add_ss(data0, _mm_set1_ps(0.5f)));
+ endx = _mm_cvtss_si32(_mm_add_ss(data1, _mm_set1_ps(0.5f)));
+ if (startx < 0) startx = 0;
+ if (endx > width) endx = width;
+ if (startx >= endx) continue;
+#if 0
+ _mm_store_ss(&startxf, data0);
+ _mm_store_ss(&endxf, data1);
+ if (startxf > startx || endxf < endx-1) { printf("%s:%i X wrong (%i to %i is outside %f to %f)\n", __FILE__, __LINE__, startx, endx, startxf, endxf); }
+#endif
+ spanilength = _mm_rcp_ss(_mm_sub_ss(data1, data0));
+ spanilength = _mm_shuffle_ps(spanilength, spanilength, _MM_SHUFFLE(0, 0, 0, 0));
+ startxlerp = _mm_sub_ps(_mm_set1_ps(startx), _mm_shuffle_ps(data0, data0, _MM_SHUFFLE(0, 0, 0, 0)));
+ span = &dpsoftrast.draw.spanqueue[dpsoftrast.draw.numspans++];
+ memcpy(span->mip, mip, sizeof(span->mip));
+ span->start = y * width + startx;
+ span->length = endx - startx;