2013-12-22 04:55:30 +09:00
|
|
|
/*
|
|
|
|
Copyright (c) 2013 yvt
|
|
|
|
|
|
|
|
This file is part of OpenSpades.
|
|
|
|
|
|
|
|
OpenSpades is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
OpenSpades is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with OpenSpades. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "SWMapRenderer.h"
|
|
|
|
#include <Client/GameMap.h>
|
|
|
|
#include <Core/Bitmap.h>
|
|
|
|
#include <array>
|
2013-12-25 09:57:47 +01:00
|
|
|
#include <cstring>
|
2013-12-22 04:55:30 +09:00
|
|
|
#include "SWRenderer.h"
|
|
|
|
#include <Core/MiniHeap.h>
|
|
|
|
#include <Core/Settings.h>
|
2013-12-23 01:21:43 +09:00
|
|
|
#include <Core/ConcurrentDispatch.h>
|
2013-12-23 03:35:00 +09:00
|
|
|
#include <Core/Stopwatch.h>
|
2013-12-23 19:46:56 +09:00
|
|
|
#include "SWUtils.h"
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
SPADES_SETTING(r_swUndersampling, "0");
|
|
|
|
|
|
|
|
namespace spades {
|
|
|
|
namespace draw {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// special tan function whose value is finite.
|
|
|
|
static inline float SpecialTan(float v) {
|
2013-12-22 15:00:56 +09:00
|
|
|
static const float pi = M_PI;
|
2013-12-22 04:55:30 +09:00
|
|
|
if(v <= -pi * 0.5f) {
|
|
|
|
return -2.f;
|
|
|
|
}else if(v < -pi * 0.25f) {
|
|
|
|
v = -2.f - 1.f / tanf(v);
|
|
|
|
}else if(v < pi * 0.25f) {
|
|
|
|
v = tanf(v);
|
|
|
|
}else if(v < pi * 0.5f){
|
|
|
|
v = 2.f - 1.f / tanf(v);
|
|
|
|
}else{
|
|
|
|
return v = 2.f;
|
|
|
|
}
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
// convert from tan value to special tan value.
|
|
|
|
static inline float ToSpecialTan(float v) {
|
|
|
|
if(v < -1.f)
|
|
|
|
return -2.f - fastRcp(v);
|
|
|
|
else if(v > 1.f)
|
|
|
|
return 2.f - fastRcp(v);
|
|
|
|
else
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
enum class Face: short {
|
|
|
|
PosX, NegX,
|
|
|
|
PosY, NegY,
|
|
|
|
PosZ, NegZ
|
|
|
|
};
|
|
|
|
|
|
|
|
struct SWMapRenderer::LinePixel {
|
|
|
|
union {
|
|
|
|
struct {
|
2013-12-23 00:44:26 +09:00
|
|
|
uint32_t combined;
|
2013-12-23 04:56:34 +09:00
|
|
|
float depth;
|
2013-12-22 04:55:30 +09:00
|
|
|
};
|
|
|
|
struct {
|
2013-12-23 00:44:26 +09:00
|
|
|
unsigned int color: 24;
|
|
|
|
//Face face: 7;
|
|
|
|
bool filled: 1;
|
2013-12-23 04:56:34 +09:00
|
|
|
|
|
|
|
};
|
|
|
|
struct {
|
|
|
|
uint64_t allData;
|
2013-12-22 04:55:30 +09:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
// using "operator =" makes this struct non-POD
|
|
|
|
void Set(const LinePixel& p){
|
2013-12-23 04:56:34 +09:00
|
|
|
allData = p.allData;
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
inline void Clear() {
|
2013-12-23 00:44:26 +09:00
|
|
|
combined = 0;
|
2013-12-23 19:46:56 +09:00
|
|
|
depth = 10000.f;
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
inline bool IsEmpty() const {
|
2013-12-23 00:44:26 +09:00
|
|
|
return combined == 0;
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// infinite length line from -z to +z
|
|
|
|
struct SWMapRenderer::Line {
|
|
|
|
std::vector<LinePixel> pixels;
|
|
|
|
Vector3 horizonDir;
|
|
|
|
float pitchTanMin;
|
|
|
|
float pitchScale;
|
2013-12-23 03:35:00 +09:00
|
|
|
int pitchTanMinI;
|
|
|
|
int pitchScaleI;
|
2013-12-22 04:55:30 +09:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
SWMapRenderer::SWMapRenderer(SWRenderer *r,
|
|
|
|
client::GameMap *m,
|
|
|
|
SWFeatureLevel level):
|
|
|
|
map(m),
|
|
|
|
frameBuf(nullptr),
|
|
|
|
depthBuf(nullptr),
|
2013-12-23 19:46:56 +09:00
|
|
|
rleHeap(m->Width() * m->Height() * 64),
|
2013-12-22 04:55:30 +09:00
|
|
|
level(level),
|
|
|
|
w(m->Width()), h(m->Height()),
|
|
|
|
renderer(r){
|
|
|
|
rle.resize(w * h);
|
|
|
|
rleLen.resize(w * h);
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
Stopwatch sw;
|
|
|
|
sw.Reset();
|
|
|
|
SPLog("Building RLE map...");
|
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
int idx = 0;
|
|
|
|
for(int y = 0; y < h; y++)
|
|
|
|
for(int x = 0; x < w; x++) {
|
|
|
|
BuildRle(x, y, rleBuf);
|
|
|
|
|
|
|
|
auto ref = rleHeap.Alloc(rleBuf.size() * sizeof(RleData));
|
|
|
|
short *ptr = rleHeap.Dereference<short>(ref);
|
|
|
|
std::memcpy(ptr, rleBuf.data(), rleBuf.size() * sizeof(RleData));
|
|
|
|
|
|
|
|
rle[idx] = ref;
|
|
|
|
rleLen[idx] = rleBuf.size() * sizeof(RleData);
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
}
|
2013-12-23 03:35:00 +09:00
|
|
|
SPLog("RLE map created in %.6f seconds", sw.GetTime());
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
SWMapRenderer::~SWMapRenderer() {
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void SWMapRenderer::BuildRle(int x, int y, std::vector<RleData> &out) {
|
|
|
|
out.clear();
|
|
|
|
|
|
|
|
out.push_back(0); // [0] = +Z face position address
|
2013-12-24 04:29:39 +09:00
|
|
|
out.push_back(0);
|
|
|
|
out.push_back(0); // [2] = +X face position address
|
|
|
|
out.push_back(0);
|
|
|
|
out.push_back(0); // [4] = -X face position address
|
|
|
|
out.push_back(0);
|
|
|
|
out.push_back(0); // [6] = +Y face position address
|
|
|
|
out.push_back(0);
|
|
|
|
out.push_back(0); // [8] = -Y face position address
|
|
|
|
out.push_back(0);
|
|
|
|
|
|
|
|
auto setHeader = [&](size_t idx, size_t val){
|
|
|
|
reinterpret_cast<short *>(out.data())[idx]
|
|
|
|
= static_cast<short>(val);
|
|
|
|
};
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
uint64_t smap = map->GetSolidMapWrapped(x, y);
|
2013-12-23 03:35:00 +09:00
|
|
|
std::array<uint64_t, 4> adjs =
|
|
|
|
{map->GetSolidMapWrapped(x+1, y),
|
|
|
|
map->GetSolidMapWrapped(x-1, y),
|
|
|
|
map->GetSolidMapWrapped(x, y+1),
|
|
|
|
map->GetSolidMapWrapped(x, y-1)};
|
2013-12-22 04:55:30 +09:00
|
|
|
bool old = false;
|
|
|
|
|
|
|
|
for(int z = 0; z < 64; z++) {
|
|
|
|
bool b = (smap >> z) & 1;
|
|
|
|
if(b && !old) {
|
|
|
|
out.push_back(static_cast<RleData>(z));
|
|
|
|
}
|
|
|
|
old = b;
|
|
|
|
}
|
|
|
|
out.push_back(-1);
|
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
setHeader(0, out.size());
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
old = true;
|
|
|
|
for(int z = 63; z >= 0; z--) {
|
|
|
|
bool b = (smap >> z) & 1;
|
|
|
|
if(b && !old) {
|
|
|
|
out.push_back(static_cast<RleData>(z));
|
|
|
|
}
|
|
|
|
old = b;
|
|
|
|
}
|
|
|
|
out.push_back(-1);
|
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
for(int k = 0; k < 4; k++) {
|
2013-12-24 04:29:39 +09:00
|
|
|
setHeader(k + 1, out.size());
|
2013-12-23 03:35:00 +09:00
|
|
|
for(int z = 0; z < 64; z++) {
|
|
|
|
if((smap >> z) & 1){
|
|
|
|
if(!((adjs[k] >> z) & 1)){
|
|
|
|
out.push_back(static_cast<RleData>(z));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out.push_back(-1);
|
|
|
|
}
|
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
// padding
|
|
|
|
while(out.size() & 3){
|
|
|
|
out.push_back(42);
|
|
|
|
}
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
void SWMapRenderer::UpdateRle(int x, int y) {
|
|
|
|
int idx = x + y * w;
|
|
|
|
BuildRle(x, y, rleBuf);
|
|
|
|
|
|
|
|
rleHeap.Free(rle[idx], rleLen[idx]);
|
|
|
|
|
|
|
|
auto ref = rleHeap.Alloc(rleBuf.size() * sizeof(RleData));
|
|
|
|
short *ptr = rleHeap.Dereference<short>(ref);
|
|
|
|
std::memcpy(ptr, rleBuf.data(), rleBuf.size() * sizeof(RleData));
|
|
|
|
|
|
|
|
rle[idx] = ref;
|
|
|
|
rleLen[idx] = rleBuf.size() * sizeof(RleData);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<SWFeatureLevel flevel>
|
|
|
|
void SWMapRenderer::BuildLine(Line& line,
|
|
|
|
float minPitch, float maxPitch) {
|
|
|
|
|
2013-12-23 00:44:26 +09:00
|
|
|
// hard code for further optimization
|
|
|
|
enum {
|
|
|
|
w = 512, h = 512
|
|
|
|
};
|
|
|
|
SPAssert(map->Width() == 512);
|
|
|
|
SPAssert(map->Height() == 512);
|
|
|
|
|
|
|
|
const auto *rle = this->rle.data();
|
|
|
|
auto& rleHeap = this->rleHeap;
|
|
|
|
client::GameMap *map = this->map;
|
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
// pitch culling
|
|
|
|
{
|
|
|
|
const auto& frustrum = renderer->frustrum;
|
2013-12-22 15:00:56 +09:00
|
|
|
static const float pi = M_PI;
|
2013-12-22 04:55:30 +09:00
|
|
|
const auto& horz = line.horizonDir;
|
|
|
|
minPitch = -pi * 0.4999f;
|
|
|
|
maxPitch = pi * 0.4999f;
|
|
|
|
|
|
|
|
auto cull = [&minPitch, &maxPitch]() {
|
|
|
|
minPitch = 2.f;
|
|
|
|
maxPitch = -2.f;
|
|
|
|
};
|
|
|
|
auto clip = [&minPitch, &maxPitch, &horz, &cull](Vector3 plane) {
|
|
|
|
if(plane.x == 0.f && plane.y == 0.f) {
|
|
|
|
if(plane.z > 0.f) {
|
|
|
|
minPitch = std::max(minPitch, 0.f);
|
|
|
|
}else{
|
|
|
|
maxPitch = std::min(maxPitch, 0.f);
|
|
|
|
}
|
|
|
|
}else if(plane.z == 0.f){
|
|
|
|
if(Vector3::Dot(plane, horz) < 0.f) {
|
|
|
|
cull();
|
|
|
|
}
|
|
|
|
}else{
|
|
|
|
Vector3 prj = plane; prj.z = 0.f;
|
|
|
|
prj = prj.Normalize();
|
|
|
|
|
|
|
|
float zv = fabsf(plane.z);
|
|
|
|
float cs = Vector3::Dot(prj, horz);
|
|
|
|
|
|
|
|
float ang = zv * zv * (1.f - cs * cs) / (cs * cs);
|
|
|
|
ang = -cs * sqrtf(1.f + ang);
|
|
|
|
ang = zv / ang;
|
|
|
|
|
|
|
|
// convert to tan
|
|
|
|
ang = sqrtf(1.f - ang * ang) / ang;
|
|
|
|
|
|
|
|
// convert to angle
|
|
|
|
ang = atanf(ang);
|
|
|
|
|
|
|
|
if(plane.z > 0.f) {
|
2013-12-23 03:35:00 +09:00
|
|
|
minPitch = std::max(minPitch, ang - 0.01f);
|
2013-12-22 04:55:30 +09:00
|
|
|
}else{
|
2013-12-23 03:35:00 +09:00
|
|
|
maxPitch = std::min(maxPitch, -ang + 0.01f);
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
clip(frustrum[2].n);
|
|
|
|
clip(frustrum[3].n);
|
|
|
|
clip(frustrum[4].n);
|
|
|
|
clip(frustrum[5].n);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
float minTan = SpecialTan(minPitch);
|
|
|
|
float maxTan = SpecialTan(maxPitch);
|
|
|
|
|
|
|
|
line.pitchTanMin = minTan;
|
|
|
|
line.pitchScale = lineResolution / (maxTan - minTan);
|
2013-12-23 03:35:00 +09:00
|
|
|
line.pitchTanMinI = static_cast<int>(minTan * 65536.f);
|
|
|
|
line.pitchScaleI = static_cast<int>(line.pitchScale * 65536.f);
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
// TODO: pitch culling
|
|
|
|
|
|
|
|
// ray direction
|
|
|
|
float dirX = line.horizonDir.x;
|
|
|
|
float dirY = line.horizonDir.y;
|
2013-12-24 04:29:39 +09:00
|
|
|
if(fabsf(dirY) < 1.e-4f) dirY = 1.e-4f;
|
|
|
|
if(fabsf(dirX) < 1.e-4f) dirX = 1.e-4f;
|
2013-12-22 04:55:30 +09:00
|
|
|
float invDirX = 1.f / dirX;
|
|
|
|
float invDirY = 1.f / dirY;
|
|
|
|
int signX = dirX > 0.f ? 1 : -1;
|
|
|
|
int signY = dirY > 0.f ? 1 : -1;
|
2013-12-24 04:29:39 +09:00
|
|
|
int invDirXI = static_cast<int>(invDirX * 256.f);
|
|
|
|
int invDirYI = static_cast<int>(invDirY * 256.f);
|
|
|
|
int dirXI = static_cast<int>(dirX * 512.f);
|
|
|
|
int dirYI = static_cast<int>(dirY * 512.f);
|
|
|
|
if(invDirXI < 0) invDirXI = -invDirXI;
|
|
|
|
if(invDirYI < 0) invDirYI = -invDirYI;
|
|
|
|
if(dirXI < 0) dirXI = -dirXI;
|
|
|
|
if(dirYI < 0) dirYI = -dirYI;
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
// camera position
|
|
|
|
float cx = sceneDef.viewOrigin.x;
|
|
|
|
float cy = sceneDef.viewOrigin.y;
|
|
|
|
float cz = sceneDef.viewOrigin.z;
|
|
|
|
|
|
|
|
int icz = static_cast<int>(floorf(cz));
|
|
|
|
|
|
|
|
// ray position
|
2013-12-24 04:29:39 +09:00
|
|
|
//float rx = cx, ry = cy;
|
|
|
|
int rx = static_cast<int>(cx * 512.f);
|
|
|
|
int ry = static_cast<int>(cy * 512.f);
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
// ray position in integer
|
2013-12-24 04:29:39 +09:00
|
|
|
int irx = rx >> 9; //static_cast<int>(floorf(rx));
|
|
|
|
int iry = ry >> 9; //static_cast<int>(floorf(ry));
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
float fogDist = 128.f;
|
2013-12-24 04:29:39 +09:00
|
|
|
float distance = 1.e-20f; // traveled path
|
|
|
|
float invDist = 1.f / distance;
|
2013-12-22 04:55:30 +09:00
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
//auto& pixels = line.pixels;
|
2013-12-22 04:55:30 +09:00
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
line.pixels.resize(lineResolution);
|
|
|
|
auto *pixels = line.pixels.data(); // std::vector feels slow...
|
2013-12-22 04:55:30 +09:00
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
const float transScale = static_cast<float>(lineResolution) / (maxTan - minTan);
|
2013-12-22 04:55:30 +09:00
|
|
|
const float transOffset = -minTan * transScale;
|
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
#if ENABLE_SSE
|
|
|
|
if(lineResolution > 4){
|
|
|
|
static_assert(sizeof(LinePixel) == 8, "size of LinePixel has changed; needs code modification");
|
|
|
|
union {
|
|
|
|
LinePixel pxs[2];
|
|
|
|
__m128 m;
|
|
|
|
};
|
|
|
|
pxs[0].Clear();
|
|
|
|
pxs[1].Clear();
|
|
|
|
auto *ptr = pixels;
|
|
|
|
for(auto *e = pixels + lineResolution; (reinterpret_cast<size_t>(ptr) & 0xf) &&
|
|
|
|
(ptr < e); ptr++) {
|
|
|
|
ptr->Clear();
|
|
|
|
}
|
|
|
|
for(auto *e = pixels + lineResolution - 2;
|
|
|
|
ptr < e; ptr += 2) {
|
|
|
|
_mm_store_ps(reinterpret_cast<float *>(ptr), m);
|
|
|
|
}
|
|
|
|
for(auto *e = pixels + lineResolution; ptr < e; ptr++) {
|
|
|
|
ptr->Clear();
|
|
|
|
}
|
|
|
|
}else
|
|
|
|
#endif
|
|
|
|
for(size_t i = 0; i < lineResolution; i++)
|
2013-12-22 04:55:30 +09:00
|
|
|
pixels[i].Clear();
|
2013-12-24 04:29:39 +09:00
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
// if culled out, bail out now (pixels are filled)
|
|
|
|
if(minPitch >= maxPitch)
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::array<float, 65> zval; // precompute (z - cz) * some
|
|
|
|
for(size_t i = 0; i < zval.size(); i++)
|
|
|
|
zval[i] = (static_cast<float>(i) - cz);
|
|
|
|
|
|
|
|
float vmax = lineResolution + 0.5f;
|
|
|
|
auto transform = [&zval, &transOffset, vmax, &transScale](float invDist, int z) {
|
|
|
|
float p = ToSpecialTan(invDist * zval[z]) * transScale + transOffset;
|
|
|
|
p = std::max(p, 0.f);
|
|
|
|
p = std::min(p, vmax);
|
|
|
|
return static_cast<int>(p);
|
|
|
|
};
|
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
float zscale; // travel distance -> view Z value factor
|
|
|
|
zscale = Vector3::Dot(line.horizonDir, sceneDef.viewAxis[2]);
|
|
|
|
|
|
|
|
float heightScale; // Z value -> view Z value factor
|
|
|
|
heightScale = sceneDef.viewAxis[2].z;
|
|
|
|
|
|
|
|
std::array<float, 65> heightScaleVal; // precompute (heightScale * z)
|
|
|
|
for(size_t i = 0; i < zval.size(); i++)
|
|
|
|
heightScaleVal[i] = (static_cast<float>(i) * heightScale);
|
|
|
|
|
|
|
|
float depthBias;
|
|
|
|
depthBias = -cz * heightScale;
|
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
RleData *lastRle;
|
|
|
|
{
|
|
|
|
auto ref = rle[(irx & w-1) + ((iry & h-1) * w)];
|
|
|
|
lastRle = rleHeap.Dereference<RleData>(ref);
|
|
|
|
}
|
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
int count = 1;
|
2013-12-24 04:29:39 +09:00
|
|
|
int cnt2 = static_cast<int>(fogDist * 8.f);
|
2013-12-22 04:55:30 +09:00
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
while(distance < fogDist && (--cnt2) > 0) {
|
|
|
|
int nextIRX, nextIRY;
|
2013-12-22 04:55:30 +09:00
|
|
|
int oirx = irx, oiry = iry;
|
2013-12-24 04:29:39 +09:00
|
|
|
|
|
|
|
// DDE
|
2013-12-22 04:55:30 +09:00
|
|
|
Face wallFace;
|
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
if(signX > 0){
|
|
|
|
nextIRX = irx + 1;
|
|
|
|
if(signY > 0) {
|
|
|
|
nextIRY = iry + 1;
|
|
|
|
|
|
|
|
int timeToNextX = (512 - (rx & 511)) * invDirXI;
|
|
|
|
int timeToNextY = (512 - (ry & 511)) * invDirYI;
|
|
|
|
|
|
|
|
if(timeToNextX < timeToNextY) {
|
|
|
|
// go across x plane
|
|
|
|
irx = nextIRX;
|
|
|
|
rx = irx << 9;
|
|
|
|
ry += (dirYI * timeToNextX) >> 17;
|
|
|
|
distance += static_cast<float>(timeToNextX) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::NegX;
|
|
|
|
}else{
|
|
|
|
// go across y plane
|
|
|
|
iry = nextIRY;
|
|
|
|
rx += (dirXI * timeToNextY) >> 17;
|
|
|
|
ry = iry << 9;
|
|
|
|
distance += static_cast<float>(timeToNextY) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::NegY;
|
|
|
|
}
|
|
|
|
}else /* (signY < 0) */{
|
|
|
|
nextIRY = iry - 1;
|
|
|
|
|
|
|
|
int timeToNextX = (512 - (rx & 511)) * invDirXI;
|
|
|
|
int timeToNextY = (ry & 511) * invDirYI;
|
|
|
|
|
|
|
|
if(timeToNextX < timeToNextY) {
|
|
|
|
// go across x plane
|
|
|
|
irx = nextIRX;
|
|
|
|
rx = irx << 9;
|
|
|
|
ry -= (dirYI * timeToNextX) >> 17;
|
|
|
|
distance += static_cast<float>(timeToNextX) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::NegX;
|
|
|
|
}else{
|
|
|
|
// go across y plane
|
|
|
|
iry = nextIRY;
|
|
|
|
rx += (dirXI * timeToNextY) >> 17;
|
|
|
|
ry = (iry << 9) - 1;
|
|
|
|
distance += static_cast<float>(timeToNextY) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::PosY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}else /* signX < 0 */ {
|
|
|
|
nextIRX = irx - 1;
|
|
|
|
if(signY > 0) {
|
|
|
|
nextIRY = iry + 1;
|
|
|
|
|
|
|
|
int timeToNextX = (rx & 511) * invDirXI;
|
|
|
|
int timeToNextY = (512 - (ry & 511)) * invDirYI;
|
|
|
|
|
|
|
|
if(timeToNextX < timeToNextY) {
|
|
|
|
// go across x plane
|
|
|
|
irx = nextIRX;
|
|
|
|
rx = (irx << 9) - 1;
|
|
|
|
ry += (dirYI * timeToNextX) >> 17;
|
|
|
|
distance += static_cast<float>(timeToNextX) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::PosX;
|
|
|
|
}else{
|
|
|
|
// go across y plane
|
|
|
|
iry = nextIRY;
|
|
|
|
rx -= (dirXI * timeToNextY) >> 17;
|
|
|
|
ry = iry << 9;
|
|
|
|
distance += static_cast<float>(timeToNextY) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::NegY;
|
|
|
|
}
|
|
|
|
}else /* (signY < 0) */{
|
|
|
|
nextIRY = iry - 1;
|
|
|
|
|
|
|
|
int timeToNextX = (rx & 511) * invDirXI;
|
|
|
|
int timeToNextY = (ry & 511) * invDirYI;
|
|
|
|
|
|
|
|
if(timeToNextX < timeToNextY) {
|
|
|
|
// go across x plane
|
|
|
|
irx = nextIRX;
|
|
|
|
rx = (irx << 9) - 1;
|
|
|
|
ry -= (dirYI * timeToNextX) >> 17;
|
|
|
|
distance += static_cast<float>(timeToNextX) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::PosX;
|
|
|
|
}else{
|
|
|
|
// go across y plane
|
|
|
|
iry = nextIRY;
|
|
|
|
rx -= (dirXI * timeToNextY) >> 17;
|
|
|
|
ry = (iry << 9) - 1;
|
|
|
|
distance += static_cast<float>(timeToNextY) * (1.f / 512.f / 256.f);
|
|
|
|
wallFace = Face::PosY;
|
|
|
|
}
|
|
|
|
}
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
float oldInvDist = invDist;
|
2013-12-22 04:55:30 +09:00
|
|
|
|
2013-12-24 04:29:39 +09:00
|
|
|
invDist = fastRcp(distance);
|
2013-12-22 04:55:30 +09:00
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
float medDist = distance * zscale + depthBias;//(distance + oldDistance) * 0.5f;
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
// check for new spans
|
2013-12-23 00:44:26 +09:00
|
|
|
|
|
|
|
auto BuildLinePixel = [map](int x, int y, int z,
|
2013-12-23 19:46:56 +09:00
|
|
|
Face face, float dist) {
|
2013-12-23 00:44:26 +09:00
|
|
|
LinePixel px;
|
2013-12-23 19:46:56 +09:00
|
|
|
px.depth = dist;
|
2013-12-23 00:44:26 +09:00
|
|
|
#if ENABLE_SSE
|
|
|
|
if(flevel == SWFeatureLevel::SSE2) {
|
|
|
|
__m128i m;
|
|
|
|
uint32_t col = map->GetColorWrapped(x, y, z);
|
|
|
|
m = _mm_setr_epi32(col, 0,0,0);
|
|
|
|
m = _mm_unpacklo_epi8(m, _mm_setzero_si128());
|
|
|
|
m = _mm_shufflelo_epi16(m, 0xc6);
|
|
|
|
|
|
|
|
switch(face){
|
|
|
|
case Face::PosZ:
|
|
|
|
m = _mm_srli_epi16(m, 1);
|
|
|
|
break;
|
|
|
|
case Face::PosX:
|
|
|
|
case Face::PosY:
|
|
|
|
case Face::NegX:
|
|
|
|
m = _mm_adds_epi16
|
|
|
|
(_mm_srli_epi16(m, 1), _mm_srli_epi16(m, 2));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if((col>>24)<100) {
|
|
|
|
m = _mm_srli_epi16(m, 1);
|
|
|
|
}
|
|
|
|
m = _mm_packus_epi16(m, m);
|
|
|
|
_mm_store_ss(reinterpret_cast<float *>(&px.combined),
|
|
|
|
_mm_castsi128_ps(m));
|
|
|
|
px.filled = true;
|
|
|
|
}else
|
|
|
|
#endif
|
|
|
|
// non-optimized
|
|
|
|
{
|
|
|
|
uint32_t col;
|
|
|
|
col = map->GetColorWrapped(x, y, z);
|
|
|
|
col = (col & 0xff00) | ((col & 0xff) << 16) | ((col & 0xff0000) >> 16);
|
|
|
|
switch(face){
|
|
|
|
case Face::PosZ:
|
|
|
|
col = (col & 0xfcfcfc) >> 2;
|
|
|
|
break;
|
|
|
|
case Face::PosX:
|
|
|
|
case Face::PosY:
|
|
|
|
case Face::NegX:
|
|
|
|
col = (col & 0xfefefe) >> 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
px.combined = col;
|
|
|
|
px.filled = true;
|
|
|
|
}
|
|
|
|
return px;
|
|
|
|
};
|
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
// floor/ceiling
|
|
|
|
{
|
|
|
|
|
|
|
|
// linear code
|
|
|
|
|
|
|
|
// RLE scan
|
2013-12-24 04:29:39 +09:00
|
|
|
RleData *rle = lastRle;
|
2013-12-22 04:55:30 +09:00
|
|
|
{
|
2013-12-24 04:29:39 +09:00
|
|
|
RleData *ptr = rle + 10;
|
2013-12-22 04:55:30 +09:00
|
|
|
while(*ptr != -1) {
|
|
|
|
int z = *ptr;
|
|
|
|
if(z > icz) {
|
2013-12-24 04:29:39 +09:00
|
|
|
int p1 = transform(invDist, z);
|
|
|
|
int p2 = transform(oldInvDist, z);
|
|
|
|
LinePixel pix = BuildLinePixel(oirx, oiry, z, Face::NegZ,
|
|
|
|
medDist + heightScaleVal[z]);
|
|
|
|
|
|
|
|
for(int j = p1; j < p2; j++) {
|
|
|
|
auto& p = pixels[j];
|
|
|
|
if(!p.IsEmpty()) continue;
|
|
|
|
p.Set(pix);
|
|
|
|
}
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
ptr++;
|
|
|
|
}
|
2013-12-24 04:29:39 +09:00
|
|
|
ptr++;
|
2013-12-22 04:55:30 +09:00
|
|
|
while(*ptr != -1) {
|
|
|
|
int z = *ptr;
|
|
|
|
if(z < icz) {
|
2013-12-24 04:29:39 +09:00
|
|
|
int p1 = transform(invDist, z + 1);
|
|
|
|
int p2 = transform(oldInvDist, z + 1);
|
|
|
|
LinePixel pix = BuildLinePixel(oirx, oiry, z, Face::PosZ,
|
|
|
|
medDist + heightScaleVal[z]);
|
|
|
|
|
|
|
|
for(int j = p2; j < p1; j++) {
|
|
|
|
auto& p = pixels[j];
|
|
|
|
if(!p.IsEmpty()) continue;
|
|
|
|
p.Set(pix);
|
|
|
|
}
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
ptr++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // done: floor/ceiling
|
|
|
|
|
|
|
|
// add walls
|
2013-12-24 04:29:39 +09:00
|
|
|
{
|
2013-12-23 03:35:00 +09:00
|
|
|
// by RLE map
|
|
|
|
auto ref = rle[(irx & w-1) + ((iry & h-1) * w)];
|
|
|
|
RleData *rle = rleHeap.Dereference<RleData>(ref);
|
2013-12-24 04:29:39 +09:00
|
|
|
lastRle = rle;
|
2013-12-23 19:46:56 +09:00
|
|
|
auto *ptr = rle;
|
2013-12-24 04:29:39 +09:00
|
|
|
ptr += reinterpret_cast<unsigned short *>(rle)
|
|
|
|
[1 + static_cast<int>(wallFace)];
|
2013-12-23 03:35:00 +09:00
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
while(*ptr != -1) {
|
|
|
|
int z = *(ptr++);
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
int p1 = transform(invDist, z);
|
|
|
|
int p2 = transform(invDist, z + 1);
|
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
LinePixel pix = BuildLinePixel(irx, iry, z, wallFace,
|
|
|
|
medDist + heightScaleVal[z]);
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
for(int j = p1; j < p2; j++) {
|
|
|
|
auto& p = pixels[j];
|
|
|
|
if(!p.IsEmpty()) continue;
|
|
|
|
p.Set(pix);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // add wall - end
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
// check pitch cull
|
|
|
|
if((--count) == 0){
|
|
|
|
if((transform(invDist, 0) >= lineResolution - 1 && icz >= 0) ||
|
|
|
|
transform(invDist, 63) <= 0)
|
|
|
|
break;
|
|
|
|
count = 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// let's go to next voxel!
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
struct AtanTable {
|
2013-12-23 00:44:26 +09:00
|
|
|
std::array<uint16_t, 5000> sm;
|
|
|
|
std::array<uint16_t, 5000> lg;
|
|
|
|
std::array<uint16_t, 5000> smN;
|
|
|
|
std::array<uint16_t, 5000> lgN;
|
|
|
|
|
|
|
|
// [0, 2pi] -> [0, 65536]
|
|
|
|
static uint16_t ToFixed(float v) {
|
|
|
|
v /= (M_PI * 2.f);
|
|
|
|
v *= 65536.f;
|
|
|
|
int i = static_cast<int>(v);
|
|
|
|
return static_cast<uint16_t>(i & 65535);
|
|
|
|
}
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
AtanTable() {
|
|
|
|
for(int i = 0; i < 5000; i++) {
|
2013-12-23 00:44:26 +09:00
|
|
|
sm[i] = ToFixed(atanf(i / 4096.f));
|
|
|
|
lg[i] = ToFixed(atanf(1.f / ((i + .5f) / 4096.f)));
|
|
|
|
smN[i] = ToFixed(-atanf(i / 4096.f));
|
|
|
|
lgN[i] = ToFixed(-atanf(1.f / ((i + .5f) / 4096.f)));
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
static AtanTable atanTable;
|
2013-12-23 00:44:26 +09:00
|
|
|
static inline uint16_t fastATan(float v){
|
2013-12-22 04:55:30 +09:00
|
|
|
if(v < 0.f) {
|
|
|
|
if(v > -1.f) {
|
|
|
|
v *= -4096.f;
|
|
|
|
int idx = static_cast<int>(v);
|
|
|
|
//v -= idx;
|
2013-12-23 00:44:26 +09:00
|
|
|
auto ret = atanTable.smN[idx];
|
2013-12-22 04:55:30 +09:00
|
|
|
return ret;
|
|
|
|
}else{
|
|
|
|
v = fastDiv(-4096.f, v);
|
|
|
|
int idx = static_cast<int>(v);
|
|
|
|
//v -= idx;
|
2013-12-23 00:44:26 +09:00
|
|
|
auto ret = atanTable.lgN[idx];
|
2013-12-22 04:55:30 +09:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}else{
|
|
|
|
if(v < 1.f) {
|
|
|
|
v *= 4096.f;
|
|
|
|
int idx = static_cast<int>(v);
|
|
|
|
//v -= idx;
|
2013-12-23 00:44:26 +09:00
|
|
|
auto ret = atanTable.sm[idx];
|
2013-12-22 04:55:30 +09:00
|
|
|
return ret;
|
|
|
|
//ret += (atanTable.sm[idx + 1] - ret) * v;
|
|
|
|
//return ret;
|
|
|
|
}else{
|
|
|
|
v = fastDiv(4096.f, v);
|
|
|
|
int idx = static_cast<int>(v);
|
|
|
|
//v -= idx;
|
2013-12-23 00:44:26 +09:00
|
|
|
auto ret = atanTable.lg[idx];
|
2013-12-22 04:55:30 +09:00
|
|
|
return ret;
|
|
|
|
//ret += (atanTable.lg[idx + 1] - ret) * v;
|
|
|
|
//return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-23 00:44:26 +09:00
|
|
|
static inline uint16_t fastATan2(float y, float x) {
|
2013-12-22 04:55:30 +09:00
|
|
|
if(x == 0.f) {
|
2013-12-23 00:44:26 +09:00
|
|
|
return y > 0.f ? 16384 : -16384;
|
|
|
|
//y > 0.f ? (pi * 0.5f) : (-pi * 0.5f);
|
2013-12-22 04:55:30 +09:00
|
|
|
}else if(x > 0.f) {
|
|
|
|
return fastATan(fastDiv(y, x));
|
|
|
|
}else{
|
2013-12-23 00:44:26 +09:00
|
|
|
return fastATan(fastDiv(y, x)) + 32768;
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template<SWFeatureLevel flevel, int under>
|
|
|
|
void SWMapRenderer::RenderFinal(float yawMin, float yawMax,
|
2013-12-23 00:44:26 +09:00
|
|
|
unsigned int numLines,
|
|
|
|
unsigned int threadId,
|
|
|
|
unsigned int numThreads) {
|
2013-12-22 04:55:30 +09:00
|
|
|
float fovX = tanf(sceneDef.fovX * 0.5f);
|
|
|
|
float fovY = tanf(sceneDef.fovY * 0.5f);
|
|
|
|
Vector3 front = sceneDef.viewAxis[2];
|
|
|
|
Vector3 right = sceneDef.viewAxis[0];
|
|
|
|
Vector3 down = sceneDef.viewAxis[1];
|
|
|
|
|
2013-12-23 00:44:26 +09:00
|
|
|
unsigned int fw = frameBuf->GetWidth();
|
|
|
|
unsigned int fh = frameBuf->GetHeight();
|
2013-12-22 04:55:30 +09:00
|
|
|
uint32_t *fb = frameBuf->GetPixels();
|
2013-12-23 19:46:56 +09:00
|
|
|
float *depthBuf = this->depthBuf;
|
2013-12-22 04:55:30 +09:00
|
|
|
Vector3 v1 = front - right * fovX + down * fovY;
|
|
|
|
Vector3 deltaDown = -down * (fovY * 2.f / static_cast<float>(fh));
|
|
|
|
Vector3 deltaRight = right * (fovX * 2.f / static_cast<float>(fw) * under);
|
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
Vector2 screenPos = {-fovX, -fovY};
|
|
|
|
float deltaScreenPosRight = fovX * 2.f / static_cast<float>(fw);
|
|
|
|
float deltaScreenPosDown = fovY * 2.f / static_cast<float>(fh);
|
|
|
|
|
2013-12-22 15:00:56 +09:00
|
|
|
static const float pi = M_PI;
|
2013-12-22 04:55:30 +09:00
|
|
|
float yawScale = 65536.f / (pi * 2.f);
|
|
|
|
int yawScale2 = static_cast<int>(pi * 2.f / (yawMax - yawMin) * 65536.f);
|
2013-12-23 00:44:26 +09:00
|
|
|
int yawMin2 = static_cast<int>(yawMin * yawScale);
|
2013-12-22 04:55:30 +09:00
|
|
|
auto& lineList = this->lines;
|
|
|
|
|
2013-12-22 15:00:56 +09:00
|
|
|
enum {
|
2013-12-22 16:15:10 +09:00
|
|
|
blockSize = 8,
|
2013-12-22 15:00:56 +09:00
|
|
|
hBlock = blockSize / under
|
|
|
|
};
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
Vector3 deltaDownLarge = deltaDown * blockSize;
|
|
|
|
Vector3 deltaRightLarge = deltaRight * hBlock;
|
|
|
|
|
2013-12-23 00:44:26 +09:00
|
|
|
unsigned int startX = threadId * fw / numThreads;
|
|
|
|
unsigned int endX = (threadId + 1) * fw / numThreads;
|
|
|
|
|
|
|
|
startX = (startX / blockSize) * blockSize;
|
|
|
|
endX = (endX / blockSize) * blockSize;
|
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
float deltaScreenPosRightSmall = deltaScreenPosRight * under;
|
|
|
|
float deltaScreenPosDownSmall = deltaScreenPosDown;
|
|
|
|
|
|
|
|
deltaScreenPosRight *= static_cast<float>(blockSize);
|
|
|
|
deltaScreenPosDown *= static_cast<float>(blockSize);
|
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
v1 += deltaRight * static_cast<float>(startX / under);
|
2013-12-23 19:46:56 +09:00
|
|
|
screenPos.x += deltaScreenPosRight * static_cast<float>(startX / blockSize);
|
2013-12-23 01:21:43 +09:00
|
|
|
|
2013-12-23 00:44:26 +09:00
|
|
|
for(unsigned int fx = startX; fx < endX; fx+=blockSize){
|
2013-12-22 04:55:30 +09:00
|
|
|
Vector3 v2 = v1;
|
2013-12-23 19:46:56 +09:00
|
|
|
screenPos.y = -fovY;
|
2013-12-23 00:44:26 +09:00
|
|
|
for(unsigned int fy = 0; fy < fh; fy+=blockSize){
|
2013-12-23 03:35:00 +09:00
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
|
|
|
|
uint32_t *fb2 = fb + fx + fy * fw;
|
|
|
|
float *db2 = depthBuf + fx + fy * fw;
|
|
|
|
|
|
|
|
if(v2.z > 0.99f || v2.z < -0.99f) {
|
2013-12-23 03:35:00 +09:00
|
|
|
// near to pole. cannot be approximated by piecewise
|
|
|
|
goto SlowBlockPath;
|
|
|
|
}
|
|
|
|
|
|
|
|
FastBlockPath:
|
|
|
|
{
|
|
|
|
|
|
|
|
// Use bi-linear interpolation for faster yaw/pitch
|
|
|
|
// computation.
|
|
|
|
|
|
|
|
auto calcYawindex = [yawScale2, numLines, yawMin2](Vector3 v) {
|
|
|
|
int yawIndex;
|
2013-12-22 04:55:30 +09:00
|
|
|
{
|
2013-12-23 03:35:00 +09:00
|
|
|
float x = v.x, y = v.y;
|
2013-12-23 00:44:26 +09:00
|
|
|
int yaw;
|
2013-12-22 04:55:30 +09:00
|
|
|
yaw = fastATan2(y, x);
|
2013-12-23 00:44:26 +09:00
|
|
|
yaw -= yawMin2;
|
2013-12-23 03:35:00 +09:00
|
|
|
yawIndex = static_cast<int>
|
2013-12-23 00:44:26 +09:00
|
|
|
(yaw & 0xffff);
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
2013-12-23 03:35:00 +09:00
|
|
|
yawIndex <<= 8;
|
|
|
|
return yawIndex;
|
|
|
|
};
|
|
|
|
auto calcPitch = [] (Vector3 vv) {
|
|
|
|
float pitch;
|
|
|
|
pitch = vv.z * fastRSqrt(vv.x*vv.x+vv.y*vv.y);
|
|
|
|
pitch = ToSpecialTan(pitch);
|
|
|
|
return static_cast<int>(pitch * (65536.f * 8192.f));
|
|
|
|
};
|
|
|
|
int yawIndex1 = calcYawindex(v2);
|
|
|
|
int pitch1 = calcPitch(v2);
|
|
|
|
int yawIndex2 = calcYawindex(v2 + deltaRightLarge);
|
|
|
|
int pitch2 = calcPitch(v2 + deltaRightLarge);
|
|
|
|
int yawIndex3 = calcYawindex(v2 + deltaDownLarge);
|
|
|
|
int pitch3 = calcPitch(v2 + deltaDownLarge);
|
|
|
|
int yawIndex4 = calcYawindex(v2 + deltaRightLarge + deltaDownLarge);
|
|
|
|
int pitch4 = calcPitch(v2 + deltaRightLarge + deltaDownLarge);
|
|
|
|
|
|
|
|
// note: `<<8>>8` is phase unwrapping
|
|
|
|
int yawDiff1 = ((yawIndex2 - yawIndex1)<<8>>8) / hBlock;
|
|
|
|
int yawDiff2 = ((yawIndex4 - yawIndex3)<<8>>8) / hBlock;
|
|
|
|
int pitchDiff1 = (pitch2 - pitch1) / hBlock;
|
|
|
|
int pitchDiff2 = (pitch4 - pitch3) / hBlock;
|
|
|
|
|
|
|
|
int yawIndexA = yawIndex1;
|
|
|
|
int yawIndexB = yawIndex3;
|
|
|
|
int pitchA = pitch1;
|
|
|
|
int pitchB = pitch3;
|
|
|
|
|
|
|
|
for(unsigned int x = 0; x < blockSize; x+=under) {
|
|
|
|
uint32_t *fb3 = fb2 + x;
|
2013-12-23 19:46:56 +09:00
|
|
|
auto *db3 = db2 + x;
|
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
int yawIndexC = yawIndexA;
|
|
|
|
int yawDelta = ((yawIndexB - yawIndexA)<<8>>8) / blockSize;
|
|
|
|
int pitchC = pitchA;
|
|
|
|
int pitchDelta = (pitchB - pitchA) / blockSize;
|
2013-12-22 04:55:30 +09:00
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
for(unsigned int y = 0; y < blockSize; y++) {
|
2013-12-23 19:46:56 +09:00
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
unsigned int yawIndex = static_cast<unsigned int>(yawIndexC<<8>>16);
|
|
|
|
yawIndex = (yawIndex * yawScale2) >> 16;
|
|
|
|
yawIndex = (yawIndex * numLines) >> 16;
|
|
|
|
auto& line = lineList[yawIndex];
|
2013-12-23 00:44:26 +09:00
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
// solve pitch
|
|
|
|
int pitchIndex;
|
|
|
|
|
|
|
|
{
|
|
|
|
pitchIndex = pitchC >> 13;
|
|
|
|
pitchIndex -= line.pitchTanMinI;
|
|
|
|
pitchIndex = static_cast<int>
|
|
|
|
((static_cast<int64_t>(pitchIndex) *
|
|
|
|
static_cast<int64_t>(line.pitchScaleI)) >> 32);
|
|
|
|
//pitch = (pitch - line.pitchTanMin) * line.pitchScale;
|
|
|
|
//pitchIndex = static_cast<int>(pitch);
|
|
|
|
pitchIndex = std::max(pitchIndex, 0);
|
|
|
|
pitchIndex = std::min(pitchIndex, lineResolution - 1);
|
2013-12-23 00:44:26 +09:00
|
|
|
}
|
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
auto& pix = line.pixels[pitchIndex];
|
2013-12-23 00:44:26 +09:00
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
// write color.
|
|
|
|
// NOTE: combined contains both color and other information,
|
|
|
|
// though this isn't a problem as long as the color comes
|
|
|
|
// in the LSB's
|
|
|
|
#if ENABLE_SSE
|
|
|
|
if(flevel == SWFeatureLevel::SSE2) {
|
|
|
|
__m128i m;
|
|
|
|
|
|
|
|
if(under == 1) {
|
|
|
|
_mm_stream_si32(reinterpret_cast<int *>(fb3),
|
|
|
|
static_cast<int>(pix.combined));
|
2013-12-23 19:46:56 +09:00
|
|
|
*db3 = pix.depth; // FIXME: stream
|
|
|
|
/*
|
|
|
|
_mm_stream_si32(reinterpret_cast<int *>(db3),
|
|
|
|
reinterpret_cast<int>(pix.depth * distScale));*/
|
2013-12-23 03:35:00 +09:00
|
|
|
}else if(under == 2){
|
2013-12-23 19:46:56 +09:00
|
|
|
m = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double *>(&pix)));
|
2013-12-23 03:35:00 +09:00
|
|
|
_mm_store_sd(reinterpret_cast<double *>(fb3),
|
2013-12-23 19:46:56 +09:00
|
|
|
_mm_castsi128_pd(_mm_shuffle_epi32(m, 0x00)));
|
|
|
|
_mm_store_sd(reinterpret_cast<double *>(db3),
|
|
|
|
_mm_castsi128_pd(_mm_shuffle_epi32(m, 0x55)));
|
2013-12-23 03:35:00 +09:00
|
|
|
}else if(under == 4){
|
2013-12-23 19:46:56 +09:00
|
|
|
m = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double *>(&pix)));
|
2013-12-23 03:35:00 +09:00
|
|
|
_mm_stream_si128(reinterpret_cast<__m128i *>(fb3),
|
2013-12-23 19:46:56 +09:00
|
|
|
_mm_shuffle_epi32(m, 0x00));
|
|
|
|
_mm_stream_si128(reinterpret_cast<__m128i *>(db3),
|
|
|
|
_mm_shuffle_epi32(m, 0x55));
|
2013-12-23 03:35:00 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
}else
|
|
|
|
#endif
|
|
|
|
// non-optimized
|
|
|
|
{
|
|
|
|
uint32_t col = pix.combined;
|
2013-12-23 19:46:56 +09:00
|
|
|
float d = pix.depth;
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
for(int k = 0; k < under; k++){
|
|
|
|
fb3[k] = col;
|
2013-12-23 19:46:56 +09:00
|
|
|
db3[k] = d;
|
2013-12-23 03:35:00 +09:00
|
|
|
}
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
|
|
|
|
fb3 += fw;
|
2013-12-23 19:46:56 +09:00
|
|
|
db3 += fw;
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
yawIndexC += yawDelta;
|
|
|
|
pitchC += pitchDelta;
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
yawIndexA += yawDiff1;
|
|
|
|
yawIndexB += yawDiff2;
|
|
|
|
pitchA += pitchDiff1;
|
|
|
|
pitchB += pitchDiff2;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
goto Converge;
|
|
|
|
|
|
|
|
SlowBlockPath:
|
|
|
|
{
|
|
|
|
Vector3 v3 = v2;
|
2013-12-23 19:46:56 +09:00
|
|
|
Vector2 screenPos2 = screenPos;
|
2013-12-23 03:35:00 +09:00
|
|
|
for(unsigned int x = 0; x < blockSize; x+=under) {
|
|
|
|
Vector3 v4 = v3;
|
|
|
|
uint32_t *fb3 = fb2 + x;
|
2013-12-23 19:46:56 +09:00
|
|
|
auto *db3 = db2 + x;
|
|
|
|
screenPos2.y = screenPos.y;
|
|
|
|
|
2013-12-23 03:35:00 +09:00
|
|
|
for(unsigned int y = 0; y < blockSize; y++) {
|
|
|
|
Vector3 vv = v4;
|
|
|
|
|
|
|
|
// solve yaw
|
|
|
|
unsigned int yawIndex;
|
|
|
|
{
|
|
|
|
float x = vv.x, y = vv.y;
|
|
|
|
int yaw;
|
|
|
|
yaw = fastATan2(y, x);
|
|
|
|
yaw -= yawMin2;
|
|
|
|
yawIndex = static_cast<unsigned int>
|
|
|
|
(yaw & 0xffff);
|
|
|
|
}
|
|
|
|
yawIndex = (yawIndex * yawScale2) >> 16;
|
|
|
|
yawIndex = (yawIndex * numLines) >> 16;
|
|
|
|
|
|
|
|
auto& line = lineList[yawIndex];
|
|
|
|
|
|
|
|
// solve pitch
|
|
|
|
int pitchIndex;
|
|
|
|
|
|
|
|
{
|
|
|
|
float pitch;
|
|
|
|
pitch = vv.z * fastRSqrt(vv.x*vv.x+vv.y*vv.y);
|
|
|
|
pitch = ToSpecialTan(pitch);
|
|
|
|
pitch = (pitch - line.pitchTanMin) * line.pitchScale;
|
|
|
|
pitchIndex = static_cast<int>(pitch);
|
|
|
|
pitchIndex = std::max(pitchIndex, 0);
|
|
|
|
pitchIndex = std::min(pitchIndex, lineResolution - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto& pix = line.pixels[pitchIndex];
|
|
|
|
|
|
|
|
// write color.
|
|
|
|
// NOTE: combined contains both color and other information,
|
|
|
|
// though this isn't a problem as long as the color comes
|
|
|
|
// in the LSB's
|
2013-12-23 19:46:56 +09:00
|
|
|
#if ENABLE_SSE
|
2013-12-23 03:35:00 +09:00
|
|
|
if(flevel == SWFeatureLevel::SSE2) {
|
|
|
|
__m128i m;
|
|
|
|
|
|
|
|
if(under == 1) {
|
|
|
|
_mm_stream_si32(reinterpret_cast<int *>(fb3),
|
|
|
|
static_cast<int>(pix.combined));
|
2013-12-23 19:46:56 +09:00
|
|
|
*db3 = pix.depth; // FIXME: stream
|
|
|
|
/*
|
|
|
|
_mm_stream_si32(reinterpret_cast<int *>(db3),
|
|
|
|
reinterpret_cast<int>(pix.depth * distScale));*/
|
2013-12-23 03:35:00 +09:00
|
|
|
}else if(under == 2){
|
2013-12-23 19:46:56 +09:00
|
|
|
m = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double *>(&pix)));
|
2013-12-23 03:35:00 +09:00
|
|
|
_mm_store_sd(reinterpret_cast<double *>(fb3),
|
2013-12-23 19:46:56 +09:00
|
|
|
_mm_castsi128_pd(_mm_shuffle_epi32(m, 0x00)));
|
|
|
|
_mm_store_sd(reinterpret_cast<double *>(db3),
|
|
|
|
_mm_castsi128_pd(_mm_shuffle_epi32(m, 0x55)));
|
2013-12-23 03:35:00 +09:00
|
|
|
}else if(under == 4){
|
2013-12-23 19:46:56 +09:00
|
|
|
m = _mm_castpd_si128(_mm_load_sd(reinterpret_cast<const double *>(&pix)));
|
2013-12-23 03:35:00 +09:00
|
|
|
_mm_stream_si128(reinterpret_cast<__m128i *>(fb3),
|
2013-12-23 19:46:56 +09:00
|
|
|
_mm_shuffle_epi32(m, 0x00));
|
|
|
|
_mm_stream_si128(reinterpret_cast<__m128i *>(db3),
|
|
|
|
_mm_shuffle_epi32(m, 0x55));
|
2013-12-23 03:35:00 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
}else
|
2013-12-23 19:46:56 +09:00
|
|
|
#endif
|
2013-12-23 03:35:00 +09:00
|
|
|
// non-optimized
|
|
|
|
{
|
|
|
|
uint32_t col = pix.combined;
|
2013-12-23 19:46:56 +09:00
|
|
|
float d = pix.depth;
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
for(int k = 0; k < under; k++){
|
|
|
|
fb3[k] = col;
|
2013-12-23 19:46:56 +09:00
|
|
|
db3[k] = d;
|
2013-12-23 03:35:00 +09:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fb3 += fw;
|
2013-12-23 19:46:56 +09:00
|
|
|
db3 += fw;
|
2013-12-23 03:35:00 +09:00
|
|
|
|
|
|
|
v4 += deltaDown;
|
2013-12-23 19:46:56 +09:00
|
|
|
screenPos2.y += deltaScreenPosDownSmall;
|
2013-12-23 03:35:00 +09:00
|
|
|
} // y
|
|
|
|
v3 += deltaRight;
|
2013-12-23 19:46:56 +09:00
|
|
|
screenPos2.x += deltaScreenPosRightSmall;
|
2013-12-23 03:35:00 +09:00
|
|
|
} // x
|
|
|
|
|
|
|
|
} // end SlowBlockPath
|
|
|
|
|
|
|
|
Converge:
|
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
v2 += deltaDownLarge;
|
2013-12-23 19:46:56 +09:00
|
|
|
screenPos.y += deltaScreenPosDown;
|
2013-12-22 04:55:30 +09:00
|
|
|
} // fy
|
|
|
|
v1 += deltaRightLarge;
|
2013-12-23 19:46:56 +09:00
|
|
|
screenPos.x += deltaScreenPosRight;
|
2013-12-22 04:55:30 +09:00
|
|
|
} // fx
|
|
|
|
|
|
|
|
}
|
2013-12-23 01:21:43 +09:00
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
template<SWFeatureLevel flevel>
|
|
|
|
void SWMapRenderer::RenderInner(const client::SceneDefinition &def,
|
|
|
|
Bitmap *frame, float *depthBuffer) {
|
|
|
|
|
|
|
|
sceneDef = def;
|
|
|
|
frameBuf = frame;
|
|
|
|
depthBuf = depthBuffer;
|
|
|
|
|
|
|
|
// calculate line density.
|
|
|
|
float yawMin, yawMax;
|
|
|
|
float pitchMin, pitchMax;
|
|
|
|
size_t numLines;
|
|
|
|
{
|
|
|
|
float fovX = tanf(def.fovX * 0.5f);
|
|
|
|
float fovY = tanf(def.fovY * 0.5f);
|
|
|
|
float fovDiag = sqrtf(fovX * fovX + fovY * fovY);
|
|
|
|
float fovDiagAng = atanf(fovDiag);
|
|
|
|
float pitch = asinf(def.viewAxis[2].z);
|
2013-12-22 15:00:56 +09:00
|
|
|
static const float pi = M_PI;
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
//pitch = 0.f;
|
|
|
|
|
|
|
|
if(fabsf(pitch) >= pi * 0.49f - fovDiagAng) {
|
|
|
|
// pole is visible
|
|
|
|
yawMin = 0.f;
|
|
|
|
yawMax = pi * 2.f;
|
|
|
|
}else{
|
|
|
|
float yaw = atan2l(def.viewAxis[2].y, def.viewAxis[2].x);
|
|
|
|
// TODO: incorrect!
|
|
|
|
yawMin = yaw - pi * .5f; //fovDiagAng;
|
|
|
|
yawMax = yaw + pi * .5f; //fovDiagAng;
|
|
|
|
}
|
|
|
|
|
|
|
|
pitchMin = pitch - fovDiagAng;
|
|
|
|
pitchMax = pitch + fovDiagAng;
|
|
|
|
if(pitchMin < -pi * 0.5f) {
|
|
|
|
pitchMax = std::max(pitchMax, -pi-pitchMin);
|
|
|
|
pitchMin = -pi * 0.5f;
|
|
|
|
}
|
|
|
|
if(pitchMax > pi * 0.5f) {
|
|
|
|
pitchMin = std::min(pitchMin, pi - pitchMax);
|
|
|
|
pitchMax = pi * 0.5f;
|
|
|
|
}
|
|
|
|
|
|
|
|
// pitch of PI/2 will make tan(x) infinite
|
|
|
|
pitchMin = std::max(pitchMin, -pi * 0.4999f);
|
|
|
|
pitchMax = std::min(pitchMax, pi * 0.4999f);
|
|
|
|
|
|
|
|
float interval = static_cast<float>(frame->GetHeight());
|
|
|
|
interval = fovY * 2.f / interval;
|
|
|
|
lineResolution = static_cast<int>((pitchMax - pitchMin) / interval * 1.5f);
|
|
|
|
lineResolution = frame->GetHeight();
|
|
|
|
if(pitchMin > 0.f) {
|
|
|
|
//interval /= cosf(pitchMin);
|
|
|
|
}else if(pitchMax < 0.f){
|
|
|
|
//interval /= cosf(pitchMax);
|
|
|
|
}
|
|
|
|
|
|
|
|
numLines = static_cast<size_t>((yawMax - yawMin) / interval);
|
|
|
|
|
|
|
|
int under = r_swUndersampling;
|
|
|
|
under = std::max(std::min(under, 4), 1);
|
|
|
|
numLines /= under;
|
|
|
|
|
|
|
|
if(numLines < 8) numLines = 8;
|
|
|
|
if(numLines > 65536) {
|
|
|
|
SPRaise("Too many lines emit: %d", static_cast<int>(numLines));
|
|
|
|
}
|
|
|
|
lines.resize(std::max(numLines, lines.size()));
|
2013-12-24 04:29:39 +09:00
|
|
|
/*
|
2013-12-22 04:55:30 +09:00
|
|
|
SPLog("numlines: %d, each %f deg, and %d res",
|
|
|
|
static_cast<int>(numLines),
|
|
|
|
interval * 180.f / pi,
|
2013-12-24 04:29:39 +09:00
|
|
|
static_cast<int>(lineResolution));*/
|
2013-12-22 04:55:30 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
// calculate vector for each lines
|
|
|
|
{
|
|
|
|
float scl = (yawMax - yawMin) / numLines;
|
|
|
|
Vector3 horiz = Vector3::Make(cosf(yawMin), sinf(yawMin), 0.f);
|
|
|
|
float c = cosf(scl);
|
|
|
|
float s = sinf(scl);
|
|
|
|
for(size_t i = 0; i < numLines; i++) {
|
|
|
|
Line& l = lines[i];
|
|
|
|
l.horizonDir = horiz;
|
|
|
|
|
|
|
|
float x = horiz.x * c - horiz.y * s;
|
|
|
|
float y = horiz.x * s + horiz.y * c;
|
|
|
|
horiz.x = x;
|
|
|
|
horiz.y = y;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-23 01:21:43 +09:00
|
|
|
{
|
|
|
|
unsigned int nlines = static_cast<unsigned int>(numLines);
|
2013-12-23 19:46:56 +09:00
|
|
|
InvokeParallel2([&](unsigned int th, unsigned int numThreads) {
|
2013-12-23 01:21:43 +09:00
|
|
|
unsigned int start = th * nlines / numThreads;
|
|
|
|
unsigned int end = (th+1) * nlines / numThreads;
|
|
|
|
|
|
|
|
for(size_t i = start; i < end; i++) {
|
|
|
|
BuildLine<flevel>(lines[i], pitchMin, pitchMax);
|
|
|
|
}
|
2013-12-23 19:46:56 +09:00
|
|
|
});
|
2013-12-23 01:21:43 +09:00
|
|
|
}
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
int under = r_swUndersampling;
|
2013-12-23 01:21:43 +09:00
|
|
|
|
2013-12-23 19:46:56 +09:00
|
|
|
InvokeParallel2([&](unsigned int th, unsigned int numThreads) {
|
2013-12-23 01:21:43 +09:00
|
|
|
|
|
|
|
if(under <= 1){
|
|
|
|
RenderFinal<flevel, 1>(yawMin, yawMax,
|
|
|
|
static_cast<unsigned int>(numLines),
|
|
|
|
th, numThreads);
|
|
|
|
}else if(under <= 2){
|
|
|
|
RenderFinal<flevel, 2>(yawMin, yawMax,
|
|
|
|
static_cast<unsigned int>(numLines),
|
|
|
|
th, numThreads);
|
|
|
|
}else{
|
|
|
|
RenderFinal<flevel, 4>(yawMin, yawMax,
|
|
|
|
static_cast<unsigned int>(numLines),
|
|
|
|
th, numThreads);
|
|
|
|
}
|
2013-12-23 19:46:56 +09:00
|
|
|
});
|
2013-12-23 01:21:43 +09:00
|
|
|
|
2013-12-22 04:55:30 +09:00
|
|
|
|
|
|
|
|
|
|
|
frameBuf = nullptr;
|
|
|
|
depthBuf = nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SWMapRenderer::Render(const client::SceneDefinition &def,
|
|
|
|
Bitmap *frame, float *depthBuffer) {
|
|
|
|
if(!frame) SPInvalidArgument("frame");
|
|
|
|
if(!depthBuffer) SPInvalidArgument("depthBuffer");
|
|
|
|
|
|
|
|
#if ENABLE_SSE2
|
2013-12-22 15:00:56 +09:00
|
|
|
if(static_cast<int>(level) >= static_cast<int>(SWFeatureLevel::SSE2)) {
|
2013-12-22 04:55:30 +09:00
|
|
|
RenderInner<SWFeatureLevel::SSE2>(def, frame, depthBuffer);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
RenderInner<SWFeatureLevel::None>(def, frame, depthBuffer);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|