Texture scaling library complete.

git-svn-id: http://svn.berlios.de/svnroot/repos/oolite-linux/trunk@963 127b21dd-08f5-0310-b4b7-95ae10353056
This commit is contained in:
Jens Ayton 2007-05-13 14:06:49 +00:00
parent 6701857ef9
commit 25a46d911b

View File

@ -1089,6 +1089,102 @@ static void SqueezeHorizontally1(OOScalerPixMap srcPx, OOTextureDimension dstWid
} }
static void SqueezeVertically1(OOScalerPixMap srcPx, OOTextureDimension dstHeight)
{
uint8_t *src, *srcStart, *dst;
uint_fast32_t x, y, xCount, startY, endY, srcRowBytes, lastRow;
uint_fast32_t fractY, endFractY, deltaY;
uint_fast32_t accum, weight;
uint_fast8_t startWeight, endWeight;
dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
srcRowBytes = srcPx.rowBytes;
xCount = srcPx.width;
deltaY = (srcPx.height << 12) / dstHeight;
endFractY = 0;
endWeight = 0;
endY = 0;
lastRow = srcPx.height - 1;
while (endY <= lastRow)
{
fractY = endFractY;
endFractY += deltaY;
startY = endY;
endY = endFractY >> 12;
startWeight = 0xFF - endWeight;
endWeight = (endFractY >> 4) & 0xFF;
srcStart = (uint8_t *)((char *)srcPx.pixels + srcRowBytes * startY);
for (x = 0; x != xCount; ++x)
{
src = srcStart++;
accum = startWeight * *src;
weight = startWeight + endWeight;
y = startY;
for (;;)
{
++y;
src = (uint8_t *)((char *)src + srcRowBytes);
if (EXPECT(y == endY))
{
if (EXPECT(endY != lastRow)) accum += *src * endWeight;
break;
}
else
{
accum += *src * 0xFF;
weight += 0xFF;
}
}
*dst++ = accum / weight;
}
}
}
// Macros to manage 4-channel accumulators in 4-channel squeeze scalers.
#define ACCUM(PX, WT) do { \
uint32_t px = PX; \
uint_fast32_t wt = WT; \
ag = ((px & 0xFF00FF00) >> 8) * wt; \
br = (px & 0x00FF00FF) * wt; \
accum1 += ag >> 16; \
accum2 += br >> 16; \
accum3 += ag & 0xFFFF; \
accum4 += br & 0xFFFF; \
weight += wt; \
} while (0)
#define CLEAR_ACCUM() do { \
accum1 = 0; \
accum2 = 0; \
accum3 = 0; \
accum4 = 0; \
weight = 0; \
} while (0)
/* These integer divisions cause a stall -- this is the biggest
bottleneck in this file. Unrolling the loop might help on PPC.
Linear interpolation instead of box filtering would help, with
a quality hit. Given that scaling doesn't happen very often,
I think I'll leave it this way. -- Ahruman
*/
#define ACCUM2PX() ( \
(((accum1 / weight) & 0xFF) << 24) | \
(((accum3 / weight) & 0xFF) << 8) | \
(((accum2 / weight) & 0xFF) << 16) | \
((accum4 / weight) & 0xFF) \
)
static void SqueezeHorizontally4(OOScalerPixMap srcPx, OOTextureDimension dstWidth) static void SqueezeHorizontally4(OOScalerPixMap srcPx, OOTextureDimension dstWidth)
{ {
uint32_t *src, *srcStart, *dst; uint32_t *src, *srcStart, *dst;
@ -1104,18 +1200,6 @@ static void SqueezeHorizontally4(OOScalerPixMap srcPx, OOTextureDimension dstWid
deltaX = (srcPx.width << 12) / dstWidth; deltaX = (srcPx.width << 12) / dstWidth;
#define ACCUM(PX, WT) do { \
uint32_t px = PX; \
uint_fast32_t wt = WT; \
ag = ((px & 0xFF00FF00) >> 8) * wt; \
br = (px & 0x00FF00FF) * wt; \
accum1 += ag >> 16; \
accum2 += br >> 16; \
accum3 += ag & 0xFFFF; \
accum4 += br & 0xFFFF; \
weight += wt; \
} while (0)
for (y = 0; y != srcPx.height; ++y) for (y = 0; y != srcPx.height; ++y)
{ {
borderPx = *srcStart; borderPx = *srcStart;
@ -1132,11 +1216,7 @@ static void SqueezeHorizontally4(OOScalerPixMap srcPx, OOTextureDimension dstWid
endFractX += deltaX; endFractX += deltaX;
endX = endFractX >> 12; endX = endFractX >> 12;
accum1 = 0; CLEAR_ACCUM();
accum2 = 0;
accum3 = 0;
accum4 = 0;
weight = 0;
borderWeight = 0xFF - borderWeight; borderWeight = 0xFF - borderWeight;
ACCUM(borderPx, borderWeight); ACCUM(borderPx, borderWeight);
@ -1158,20 +1238,7 @@ static void SqueezeHorizontally4(OOScalerPixMap srcPx, OOTextureDimension dstWid
} }
} }
/* These integer divisions cause a stall -- this is the biggest *dst++ = ACCUM2PX();
bottleneck in this file. Unrolling the loop might help on PPC.
Linear interpolation instead of box filtering would help, with
a quality hit. Given that scaling doesn't happen very often,
I think I'll leave it this way. -- Ahruman
*/
accum1 = (accum1 / weight) & 0xFF;
accum2 = (accum2 / weight) & 0xFF;
accum3 = (accum3 / weight) & 0xFF;
accum4 = (accum4 / weight) & 0xFF;
ag = (accum1 << 24) | (accum3 << 8);
br = (accum2 << 16) | accum4;
*dst++ = ag | br;
} }
srcStart = (uint32_t *)((char *)srcStart + srcRowBytes); srcStart = (uint32_t *)((char *)srcStart + srcRowBytes);
@ -1179,9 +1246,66 @@ static void SqueezeHorizontally4(OOScalerPixMap srcPx, OOTextureDimension dstWid
} }
#warning Several scalers still do nothing! static void SqueezeVertically4(OOScalerPixMap srcPx, OOTextureDimension dstHeight)
static void SqueezeVertically1(OOScalerPixMap srcPx, OOTextureDimension dstHeight) {} {
static void SqueezeVertically4(OOScalerPixMap srcPx, OOTextureDimension dstHeight) {} uint32_t *src, *srcStart, *dst;
uint_fast32_t x, y, xCount, startY, endY, srcRowBytes, lastRow;
uint32_t ag, br;
uint_fast32_t fractY, endFractY, deltaY;
uint_fast32_t accum1, accum2, accum3, accum4, weight;
uint_fast8_t startWeight, endWeight;
dst = srcPx.pixels; // Output is placed in same buffer, without line padding.
srcRowBytes = srcPx.rowBytes;
xCount = srcPx.width;
deltaY = (srcPx.height << 12) / dstHeight;
endFractY = 0;
endWeight = 0;
endY = 0;
lastRow = srcPx.height - 1;
while (endY <= lastRow)
{
fractY = endFractY;
endFractY += deltaY;
startY = endY;
endY = endFractY >> 12;
startWeight = 0xFF - endWeight;
endWeight = (endFractY >> 4) & 0xFF;
srcStart = (uint32_t *)((char *)srcPx.pixels + srcRowBytes * startY);
for (x = 0; x != xCount; ++x)
{
src = srcStart++;
CLEAR_ACCUM();
ACCUM(*src, startWeight);
y = startY;
for (;;)
{
++y;
src = (uint32_t *)((char *)src + srcRowBytes);
if (EXPECT(y == endY))
{
if (EXPECT(endY <= lastRow)) ACCUM(*src, endWeight);
break;
}
else
{
ACCUM(*src, 0xFF);
}
}
*dst++ = ACCUM2PX();
}
}
}
static void EnsureCorrectDataSize(OOScalerPixMap *pixMap, BOOL leaveSpaceForMipMaps) static void EnsureCorrectDataSize(OOScalerPixMap *pixMap, BOOL leaveSpaceForMipMaps)