/* This file is part of Iceball. Iceball is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Iceball is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Iceball. If not, see . */ #include "common.h" #if 0 #define DEBUG_INVERT_DRAW_DIR #endif #if 0 #define DEBUG_SHOW_TOP_BOTTOM #define DEBUG_HIDE_MAIN #endif #define CUBESUX_MARKER 20 #define RAYC_MAX ((int)((FOG_MAX_DISTANCE+1)*(FOG_MAX_DISTANCE+1)*8+10)) #define DF_NX 0x01 #define DF_NY 0x02 #define DF_NZ 0x04 #define DF_PX 0x08 #define DF_PY 0x10 #define DF_PZ 0x20 #define DF_SPREAD 0x3F enum { CM_NX = 0, CM_NY, CM_NZ, CM_PX, CM_PY, CM_PZ, CM_MAX }; int cam_shading_map[6][4] = { {CM_PZ, CM_NZ, CM_PY, CM_NY}, {CM_NX, CM_PX, CM_NZ, CM_PZ}, {CM_NX, CM_PX, CM_PY, CM_NY}, {CM_NZ, CM_PZ, CM_PY, CM_NY}, {CM_PX, CM_NX, CM_PZ, CM_NZ}, {CM_PX, CM_NX, CM_PY, CM_NY}, }; uint32_t *cubemap_color[CM_MAX]; float *cubemap_depth[CM_MAX]; int cubemap_size; int cubemap_shift; float fog_distance = FOG_INIT_DISTANCE; uint32_t fog_color = 0xD0E0FF; uint32_t *rtmp_pixels; int rtmp_width, rtmp_height, rtmp_pitch; camera_t *rtmp_camera; map_t *rtmp_map; uint32_t cam_shading[6] = { 0x000C0, 0x000A0, 0x000D0, 0x000E0, 0x00FF, 0x000D0, }; typedef struct raydata { int16_t x,y,z; int8_t gx,gz; float y1,y2; float sx,sy,sz; } raydata_t; typedef struct rayblock { uint32_t color; float x,y,z; } rayblock_t; typedef struct edgebit { int x1,x2; float z1,u1,v1; float z2,u2,v2; } edgebit_t; int elist_y1; int elist_y2; edgebit_t *elist = NULL; int elist_len = 0; int rayc_block_len, rayc_block_head; int rayc_data_len, rayc_data_head; raydata_t rayc_data[RAYC_MAX]; rayblock_t *rayc_block = NULL; #ifdef RENDER_CUBES_MULTITHREADED int *rayc_stack_len = NULL; int *rayc_stack_ordlen = NULL; #endif int *rayc_mark = NULL; int rayc_block_size = 0; int rayc_mark_size = 0; float *dbuf; #ifdef RENDER_FACE_COUNT int render_face_current = 0; int render_face_remain = 0; #endif /* * REFERENCE IMPLEMENTATION * */ uint32_t render_fog_apply_new(uint32_t color, float depth) { int b = color&255; int g = (color>>8)&255; int r = (color>>16)&255; int t = (color>>24)&255; //float fog = (fog_distance*fog_distance/depth)/256.0f; float fog = (fog_distance*fog_distance-(depth < 0.001f ? 0.001f : depth)) /(fog_distance*fog_distance); if(fog > 1.0f) fog = 1.0f; if(fog < 0.0f) fog = 0.0f; r = (r*fog+((fog_color>>16)&0xFF)*(1.0-fog)+0.5f); g = (g*fog+((fog_color>>8)&0xFF)*(1.0-fog)+0.5f); b = (b*fog+((fog_color)&0xFF)*(1.0-fog)+0.5f); int fcol = b|(g<<8)|(r<<16); return fcol|(t<<24); } uint32_t render_fog_apply(uint32_t color, float depth) { int b = color&255; int g = (color>>8)&255; int r = (color>>16)&255; int t = (color>>24)&255; float fog = (fog_distance-(depth < 0.001f ? 0.001f : depth))/fog_distance; if(fog > 1.0f) fog = 1.0f; if(fog < 0.0f) fog = 0.0f; r = (r*fog+((fog_color>>16)&0xFF)*(1.0-fog)+0.5f); g = (g*fog+((fog_color>>8)&0xFF)*(1.0-fog)+0.5f); b = (b*fog+((fog_color)&0xFF)*(1.0-fog)+0.5f); int fcol = b|(g<<8)|(r<<16); return fcol|(t<<24); } void render_rect_clip(uint32_t *color, int *x1, int *y1, int *x2, int *y2, float depth) { *color = render_fog_apply(*color, depth); // arrange *1 <= *2 if(*x1 > *x2) { int t = *x1; *x1 = *x2; *x2 = t; } if(*y1 > *y2) { int t = *y1; *y1 = *y2; *y2 = t; } // clip if(*x1 < 0) *x1 = 0; if(*y1 < 0) *y1 = 0; if(*x2 > cubemap_size) *x2 = cubemap_size; if(*y2 > cubemap_size) *y2 = cubemap_size; } void render_rect_clip_screen(uint32_t *color, int *x1, int *y1, int *x2, int *y2, float depth) { *color = render_fog_apply(*color, depth); // arrange *1 <= *2 if(*x1 > *x2) { int t = *x1; *x1 = *x2; *x2 = t; } if(*y1 > *y2) { int t = *y1; *y1 = *y2; *y2 = t; } // clip if(*x1 < 0) *x1 = 0; if(*y1 < 0) *y1 = 0; if(*x2 > rtmp_width) *x2 = rtmp_width; if(*y2 > rtmp_height) *y2 = rtmp_height; } void render_rect_zbuf(uint32_t *ccolor, float *cdepth, int x1, int y1, int x2, int y2, uint32_t color, float depth) { int x,y; // clip render_rect_clip_screen(&color, &x1, &y1, &x2, &y2, depth); //uint32_t dummy; //render_rect_clip_screen(&dummy, &x1, &y1, &x2, &y2, depth); if(x2 <= 0) return; if(x1 >= rtmp_width) return; if(y2 <= 0) return; if(y1 >= rtmp_height) return; if(x1 == x2) return; if(y1 == y2) return; // render uint32_t *cptr = &ccolor[y1*rtmp_pitch+x1]; float *dptr = &cdepth[y1*rtmp_width+x1]; int stride = x2-x1; int pitch = rtmp_pitch - stride; int dpitch = rtmp_width - stride; #ifdef __SSE__ if(x2-x1 >= 16) { int fpitch = cubemap_size - (((x2-x1)+7)&~7); uint32_t *cfptr = cptr; float *dfptr = dptr; int xs; for(x = x1; x < x2; x += 8) { _mm_prefetch(cfptr, _MM_HINT_NTA); cfptr += 8; _mm_prefetch(dfptr, _MM_HINT_NTA); dfptr += 8; } cfptr += fpitch; dfptr += fpitch; for(y = y1; y < y2-1; y++) { for(x = x1; x < x2-8; x += 8) { _mm_prefetch(cfptr, _MM_HINT_NTA); for(xs = 0; xs < 8; xs++) { if(*dptr > depth) { *dptr = depth; *cptr = color; } cptr++; dptr++; } _mm_prefetch(dfptr, _MM_HINT_NTA); cfptr += 8; dfptr += 8; } _mm_prefetch(cfptr, _MM_HINT_NTA); cfptr += 8; for(x = x; x < x2; x++) { if(*dptr > depth) { *dptr = depth; *cptr = color; } cptr++; dptr++; } _mm_prefetch(dfptr, _MM_HINT_NTA); dfptr += 8; cfptr += fpitch; dfptr += fpitch; cptr += pitch; dptr += pitch; } { for(x = x1; x < x2; x++) { if(*dptr > depth) { *dptr = depth; *cptr = color; } cptr++; dptr++; } dptr += dpitch; cptr += pitch; } } else { for(y = y1; y < y2; y++) { for(x = x1; x < x2; x++) { if(*dptr > depth) { *dptr = depth; *cptr = color; } cptr++; dptr++; } dptr += dpitch; cptr += pitch; } } #else for(y = y1; y < y2; y++) { for(x = x1; x < x2; x++) { if(*dptr > depth) { *dptr = depth; *cptr = color; } cptr++; dptr++; } dptr += dpitch; cptr += pitch; } #endif } // TODO: fast ver? void render_vxl_rect_ftb_fast(uint32_t *ccolor, float *cdepth, int x1, int y1, int x2, int y2, uint32_t color, float depth) //void render_vxl_rect_ftb_slow(uint32_t *ccolor, float *cdepth, int x1, int y1, int x2, int y2, uint32_t color, float depth) { int x,y; // TODO: stop using this bloody function // (alternatively, switch to the fast FTB as used in Doom and Quake) // // NOTE: this approach seems to be faster than render_vxl_rect_btf. // clip uint32_t dummy; render_rect_clip(&dummy, &x1, &y1, &x2, &y2, depth); if(x2 <= 0) return; if(x1 >= cubemap_size) return; if(y2 <= 0) return; if(y1 >= cubemap_size) return; if(x1 >= x2) return; if(y1 >= y2) return; // render uint32_t *cptr = &ccolor[(y1<= 16) { int fpitch = cubemap_size - (((x2-x1)+7)&~7); uint32_t *cfptr = cptr; float *dfptr = dptr; int xs; for(x = x1; x < x2; x += 8) { _mm_prefetch(cfptr, _MM_HINT_NTA); cfptr += 8; _mm_prefetch(dfptr, _MM_HINT_NTA); dfptr += 8; } cfptr += fpitch; dfptr += fpitch; for(y = y1; y < y2-1; y++) { for(x = x1; x < x2-8; x += 8) { _mm_prefetch(cfptr, _MM_HINT_NTA); for(xs = 0; xs < 8; xs++) { if(*cptr == fog_color) { *cptr = color; *dptr = depth; } cptr++; dptr++; } _mm_prefetch(dfptr, _MM_HINT_NTA); cfptr += 8; dfptr += 8; } _mm_prefetch(cfptr, _MM_HINT_NTA); cfptr += 8; for(x = x; x < x2; x++) { if(*cptr == fog_color) { *cptr = color; *dptr = depth; } cptr++; dptr++; } _mm_prefetch(dfptr, _MM_HINT_NTA); dfptr += 8; cfptr += fpitch; dfptr += fpitch; cptr += pitch; dptr += pitch; } { for(x = x1; x < x2; x++) { if(*cptr == fog_color) { *cptr = color; *dptr = depth; } cptr++; dptr++; } } } else { for(y = y1; y < y2; y++) { for(x = x1; x < x2; x++) { if(*cptr == fog_color) { *cptr = color; *dptr = depth; } cptr++; dptr++; } cptr += pitch; dptr += pitch; } } #else for(y = y1; y < y2; y++) { for(x = x1; x < x2; x++) { if(*cptr == fog_color) { *cptr = color; *dptr = depth; } cptr++; dptr++; } cptr += pitch; dptr += pitch; } #endif } void render_vxl_cube_htrap(uint32_t *ccolor, float *cdepth, int x1a, int x1b, int y1, int x2a, int x2b, int y2, uint32_t color, float depth) { // dropout if(x1b <= 0 && x2b <= 0) return; if(x1a >= cubemap_size && x2a >= cubemap_size) return; if(y2 <= 0) return; if(y1 >= cubemap_size) return; if(x1a >= x1b || x2a >= x2b) return; if(y1 >= y2) return; // calc gradients int m_x1a = (((x2a-x1a)<<16)+0x8000)/(y2-y1); int m_x1b = (((x2b-x1b)<<16)+0x8000)/(y2-y1); int sub_x1a = 0; int sub_x1b = 0; // Y clamp // TODO: clamp y1 properly if(y2 >= cubemap_size) y2 = cubemap_size; // render //uint32_t *cptr = &ccolor[(y1<= 0) { int rx1a = (x1a < 0 ? 0 : x1a); int rx1b = (x1b >= cubemap_size ? cubemap_size : x1b); cptr = &ccolor[(y<>16); x1b += (sub_x1b>>16); sub_x1a &= 0xFFFF; sub_x1b &= 0xFFFF; } } void render_vxl_cube_vtrap(uint32_t *ccolor, float *cdepth, int x1, int y1a, int y1b, int x2, int y2a, int y2b, uint32_t color, float depth) { // TODO: make this not so bloody horrible for the cache // dropout if(y1b <= 0 && y2b <= 0) return; if(y1a >= cubemap_size && y2a >= cubemap_size) return; if(x2 <= 0) return; if(x1 >= cubemap_size) return; if(y1a >= y1b || y2a >= y2b) return; if(x1 >= x2) return; // calc gradients int m_y1a = ((y2a-y1a)<<16); int m_y1b = ((y2b-y1b)<<16); m_y1a /= (x2-x1); m_y1b /= (x2-x1); int sub_y1a = 0; int sub_y1b = 0; // X clamp // TODO: clamp x1 properly // TODO: fix the leaks properly x2++; y1b++; y2b++; if(x2 >= cubemap_size) x2 = cubemap_size; // render //uint32_t *cptr = &ccolor[(y1<= 0) { int ry1a = (y1a < 0 ? 0 : y1a); int ry1b = (y1b >= cubemap_size ? cubemap_size : y1b); cptr = &ccolor[(ry1a<>16); y1b += (sub_y1b>>16); sub_y1a &= 0xFFFF; sub_y1b &= 0xFFFF; } } uint32_t render_shade(uint32_t color, int face) { uint32_t fc = cam_shading[face]; return (((((color&0x00FF00FF)*fc)>>8)&0x00FF00FF)) |((((((color>>8)&0x00FF00FF)*fc))&0xFF00FF00))|0x01000000; } void render_vxl_cube_sides(uint32_t *ccolor, float *cdepth, int x1, int y1, int x2, int y2, uint32_t color, float depth, int face, float fdist) { int hsize = (cubemap_size>>1); #if 0 if(depth > CUBESUX_MARKER) { int x3 = ((x1-hsize)*depth)/(depth+1.0f)+hsize; int y3 = ((y1-hsize)*depth)/(depth+1.0f)+hsize; int x4 = ((x2-hsize+1)*depth)/(depth+1.0f)+hsize; int y4 = ((y2-hsize+1)*depth)/(depth+1.0f)+hsize; if(x1 > x3) x1 = x3; if(y1 > y3) y1 = y3; if(x2 < x4) x2 = x4; if(y2 < y4) y2 = y4; render_vxl_rect_ftb_fast(ccolor, cdepth, x1, y1, x2, y2, render_shade(color, face), depth+0.5f); return; } #endif int x3 = ((x1-hsize)*depth)/(depth+1.0f)+hsize; int y3 = ((y1-hsize)*depth)/(depth+1.0f)+hsize; int x4 = ((x2-hsize)*depth)/(depth+1.0f)+hsize; int y4 = ((y2-hsize)*depth)/(depth+1.0f)+hsize+1; render_vxl_rect_ftb_fast(ccolor, cdepth, x1, y1, x2, y2, render_fog_apply_new(render_shade(color, face), fdist), depth); depth += 0.5f; if(y3 < y1) render_vxl_cube_htrap(ccolor, cdepth, x3, x4, y3, x1, x2, y1, render_fog_apply_new(render_shade(color, cam_shading_map[face][2]), fdist), depth+1.0f); else if(y2 < y4) render_vxl_cube_htrap(ccolor, cdepth, x1, x2, y2, x3, x4, y4, render_fog_apply_new(render_shade(color, cam_shading_map[face][3]), fdist), depth+1.0f); if(x3 < x1) render_vxl_cube_vtrap(ccolor, cdepth, x3, y3, y4, x1, y1, y2, render_fog_apply_new(render_shade(color, cam_shading_map[face][0]), fdist), depth+1.0f); else if(x2 < x4) render_vxl_cube_vtrap(ccolor, cdepth, x2, y1, y2, x4, y3, y4, render_fog_apply_new(render_shade(color, cam_shading_map[face][1]), fdist), depth+1.0f); } void render_vxl_cube(uint32_t *ccolor, float *cdepth, int x1, int y1, int x2, int y2, uint32_t color, float depth, int face, float fdist) { render_vxl_cube_sides(ccolor, cdepth, x1, y1, x2, y2, color, depth, face, fdist); } void render_vxl_face_raycast(int blkx, int blky, int blkz, float subx, float suby, float subz, int face, int gx, int gy, int gz) { int i; float tracemul = cubemap_size/2; float traceadd = tracemul; // get cubemaps uint32_t *ccolor = cubemap_color[face]; float *cdepth = cubemap_depth[face]; // clear cubemap for(i = 0; i < cubemap_size*cubemap_size; i++) { ccolor[i] = fog_color; cdepth[i] = fog_distance; } // get X cube direction int xgx = gz+gy; int xgy = 0; int xgz = -gx; // get Y cube direction int ygx = 0; int ygy = fabsf(gx+gz); int ygz = gy; // get base pos float bx = blkx+subx; float by = blky+suby; float bz = blkz+subz; if(xgx+xgy+xgz < 0) { bx += xgx; by += xgy; bz += xgz; } if(ygx+ygy+ygz < 0) { bx += ygx; by += ygy; bz += ygz; } if(gx+gy+gz < 0) { bx += gx; by += gy; bz += gz; } // now crawl through the block list #ifdef DEBUG_INVERT_DRAW_DIR { { int bctr; for(bctr = rayc_block_len-1; bctr >= 0; bctr--) #else #ifndef RENDER_CUBES_MULTITHREADED { { int bctr; for(bctr = 0; bctr <= rayc_block_len; bctr++) #else int ord_accum_start = 0; int ord_accum_end = 0; int ord_idx; //printf("start\n"); for(ord_idx = 0; rayc_stack_ordlen[ord_idx] != -1; ord_idx++) { ord_accum_start = ord_accum_end; ord_accum_end += rayc_stack_ordlen[ord_idx]; //printf("%i\n", ord_accum_end); int pil_idx; #pragma omp parallel for for(pil_idx = ord_accum_start; pil_idx < ord_accum_end; pil_idx++) { int bctr; int bc_start = rayc_stack_len[pil_idx]; int bc_end = rayc_stack_len[pil_idx+1]; for(bctr = bc_start; bctr < bc_end && bctr < rayc_block_len; bctr++) #endif #endif { rayblock_t *b = &rayc_block[bctr]; // get block delta float dx = b->x - bx; float dy = b->y - by; float dz = b->z - bz; // get correct screen positions float sx = dx*xgx+dy*xgy+dz*xgz; float sy = dx*ygx+dy*ygy+dz*ygz; float sz = dx* gx+dy* gy+dz* gz; // check distance if(sz < 0.001f || sz >= fog_distance) continue; // frustum cull if(fabsf(sx) > fabsf(sz+2.0f) || fabsf(sy) > fabsf(sz+2.0f)) continue; // draw float boxsize = tracemul/fabsf(sz); float px1 = sx*boxsize+traceadd; float py1 = sy*boxsize+traceadd; float px2 = px1+boxsize; float py2 = py1+boxsize; render_vxl_cube(ccolor, cdepth, (int)px1, (int)py1, (int)px2, (int)py2, b->color, sz, face, sx*sx+sy*sy+sz*sz); } } } } void render_vxl_redraw(camera_t *camera, map_t *map) { // if there isn't a map, clear screen and return if(map == NULL) { int face,i; for(face = 0; face < 6; face++) { // get cubemaps uint32_t *ccolor = cubemap_color[face]; float *cdepth = cubemap_depth[face]; // clear cubemap for(i = 0; i < cubemap_size*cubemap_size; i++) { ccolor[i] = fog_color; cdepth[i] = fog_distance; } } return; } int i; // stash stuff in globals to prevent spamming the stack too much // (and in turn thrashing the cache) rtmp_camera = camera; rtmp_map = map; // stash x/y/zlen int xlen = map->xlen; int ylen = map->ylen; int zlen = map->zlen; // get block pos int blkx = ((int)floor(camera->mpx)) & (xlen-1); int blky = ((int)floor(camera->mpy));// & (ylen-1); int blkz = ((int)floor(camera->mpz)) & (zlen-1); // get block subpos float subx = (camera->mpx - floor(camera->mpx)); float suby = (camera->mpy - floor(camera->mpy)); float subz = (camera->mpz - floor(camera->mpz)); // get centre (base) pos float bx = blkx + subx; float by = blky + suby; float bz = blkz + subz; int byi = blky; // check if we need to reallocate the mark table and block list { int markbase = xlen * zlen; int blockbase = markbase * ylen; if(rayc_mark_size != markbase) { rayc_mark_size = markbase; rayc_mark = (int*)realloc(rayc_mark, rayc_mark_size*sizeof(int)); #ifdef RENDER_CUBES_MULTITHREADED rayc_stack_len = realloc(rayc_stack_len, rayc_mark_size*8*sizeof(int)); rayc_stack_ordlen = realloc(rayc_stack_ordlen, rayc_mark_size*8*sizeof(int)); #endif } if(rayc_block_size != blockbase) { rayc_block_size = blockbase; rayc_block = (rayblock_t*)realloc(rayc_block, rayc_block_size*sizeof(rayblock_t)); } } // clear the mark table memset(rayc_mark, 0, rayc_mark_size*sizeof(int)); // prep the starting block rayc_block_len = 0; rayc_block_head = 0; rayc_data_len = 1; rayc_data_head = 0; rayc_data[0].x = blkx; rayc_data[0].y = blky; rayc_data[0].z = blkz; rayc_data[0].gx = 0; rayc_data[0].gz = 0; rayc_data[0].y1 = blky+suby; rayc_data[0].y2 = blky+suby; rayc_data[0].sx = subx; rayc_data[0].sy = suby; rayc_data[0].sz = subz; rayc_mark[blkx + blkz*xlen] = 1; #ifdef RENDER_CUBES_MULTITHREADED // get the block order stack set up for multiprocessing stuff int stack_ordidx = 0; int stack_pilidx = 0; int stack_ordrem = 1; int stack_ordptr = 0; int stack_pilptr = 0; #endif // build your way up while(rayc_data_head < rayc_data_len) { raydata_t *rd = &(rayc_data[rayc_data_head++]); // back this up so we can flip the top rayblock_t *b_pstart = &(rayc_block[rayc_block_len]); rayblock_t *b_pmid = b_pstart; // get delta float dx = rd->x - bx; float dz = rd->z - bz; if(rd->gx < 0) dx++; else if(rd->gx == 0) dx = 0; if(rd->gz < 0) dz++; else if(rd->gz == 0) dz = 0; // skip this if it's in the fog if(dx*dx+dz*dz >= fog_distance*fog_distance) continue; int near_cast = (rayc_data_head == 1); // find where we are int idx = (((int)(rd->z)) & (zlen-1))*xlen + (((int)rd->x) & (xlen-1)); uint8_t *p = map->pillars[idx]+4; rayc_mark[idx] = -1; int lastn = 0; int topcount = 0; int lasttop = 0; float ysearch = rd->y1; while(p[0] != 0) { if(ysearch < p[2] && (lastn == 0 || ysearch >= lasttop)) break; lastn = p[0]; lasttop = p[1]; topcount = p[0] - (p[2]-p[1]+1); p += p[0]*4; } int spreadflag = 1; // advance y1/y2 float y1 = rd->y1; float y2 = rd->y2; if(near_cast) { y1 = (lastn == 0 ? 0.0f : p[3]); if(y1 > rd->y1) y1 = rd->y1; rd->y1 = y1; rd->y2 = y2 = p[1]; } else { float dist1 = sqrtf(dx*dx+dz*dz); float dist2 = dist1 + 1.0f; // approx max dist this can travel float travel = dist2/dist1; if(y1 < by) y1 = by + (y1-by)*travel; if(y2 > by) y2 = by + (y2-by)*travel; } int iy1 = floor(y1); int iy2 = floor(y2); float by1 = y1; float by2 = y2; // TODO: get the order right! #ifdef DEBUG_SHOW_TOP_BOTTOM { rayblock_t *b = &rayc_block[rayc_block_len++]; b->x = rd->x; b->z = rd->z; b->y = iy1; b->color = 0xFFFF0000; } { rayblock_t *b = &rayc_block[rayc_block_len++]; b->x = rd->x; b->z = rd->z; b->y = iy2; b->color = 0xFF0000FF; } b_pstart += 2; b_pmid += 2; #endif // add the top blocks (if they exist and we can see them) if(lastn == 0) { if(y1 > 0.0f) y1 = 0; y2 = p[1]; } else if(rayc_data_head == 1) { y1 = p[3]; y2 = p[1]; // just the immediate ceiling, thanks. #ifndef DEBUG_HIDE_MAIN { rayblock_t *b = &rayc_block[rayc_block_len++]; b->x = rd->x; b->z = rd->z; b->y = p[3]-1; b->color = *(uint32_t *)(&p[-4]); } #endif } else if(p[3] >= rd->y1-1) { y1 = p[3]; y2 = p[1]; uint32_t *c = (uint32_t *)(&p[-4*topcount]); #ifndef DEBUG_HIDE_MAIN for(i = p[3]-topcount; i <= p[3]-1; i++) { if(i < iy1) { c++; continue; } rayblock_t *b = &rayc_block[rayc_block_len++]; b->x = rd->x; b->z = rd->z; b->y = i; b->color = *(c++); } #endif } // sneak your way down while(p[1] <= iy2) { if(p[1] != p[3]) y2 = p[1]; //printf("%i %i %i %i [%i, %i]\n", p[0],p[1],p[2],p[3],iy1,iy2); uint32_t *c = (uint32_t *)(&p[4]); #ifndef DEBUG_HIDE_MAIN for(i = p[1]; i <= p[2] && i <= iy2; i++) { rayblock_t *b = &rayc_block[rayc_block_len++]; b->x = rd->x; b->z = rd->z; b->y = i; b->color = *(c++); } #endif if(p[0] == 0) break; lastn = p[0]; lasttop = p[1]; topcount = p[0] - (p[2]-p[1]+1); p += 4*p[0]; if(p[1] != p[3] && rd->y2 >= p[3]) y2 = p[1]; c = (uint32_t *)(&p[-4*topcount]); #ifndef DEBUG_HIDE_MAIN for(i = p[3]-topcount; i <= p[3]-1 && i <= iy2; i++) { rayblock_t *b = &rayc_block[rayc_block_len++]; b->x = rd->x; b->z = rd->z; b->y = i; b->color = *(c++); } #endif } // find the y middle while(b_pmid < &rayc_block[rayc_block_len] && b_pmid->y <= byi) b_pmid++; b_pmid--; // flip! while(b_pstart < b_pmid) { rayblock_t t; t = *b_pstart; *b_pstart = *b_pmid; *b_pmid = t; b_pstart++; b_pmid--; } #ifdef RENDER_CUBES_MULTITHREADED // add blockdata to span length table rayc_stack_len[stack_pilidx++] = rayc_block_len - stack_pilptr; stack_pilptr = rayc_block_len; #endif // correct the y spread if(y1 < by1) y1 = by1; if(y2 > by2) y2 = by2; spreadflag = spreadflag && (y1 < y2); //spreadflag = 1; // spread out int ofx = 1; int ofz = 0; if(spreadflag) do { int idx2 = ((ofx + (int)rd->x) & (xlen-1)) + xlen * ((ofz + (int)rd->z) & (zlen-1)); if(ofx * rd->gx < 0 || ofz * rd->gz < 0) { // do nothing } else if(rayc_mark[idx2] == 0) { rayc_mark[idx2] = rayc_data_len+1; raydata_t *rd2 = &(rayc_data[rayc_data_len++]); rd2->x = ofx + (int)rd->x; rd2->z = ofz + (int)rd->z; rd2->y1 = y1; rd2->y2 = y2; rd2->sx = subx; rd2->sy = suby; rd2->sz = subz; rd2->gx = (ofx == 0 ? rd->gx : ofx); rd2->gz = (ofz == 0 ? rd->gz : ofz); } else if(rayc_mark[idx2] != -1) { raydata_t *rd2 = &(rayc_data[rayc_mark[idx2]-1]); if(y1 < rd2->y1) rd2->y1 = y1; if(y2 > rd2->y2) rd2->y2 = y2; if(rd2->gx == 0) rd2->gx = (ofx == 0 ? rd->gx : ofx); if(rd2->gz == 0) rd2->gz = (ofz == 0 ? rd->gz : ofz); } { int t = ofx; ofx = -ofz; ofz = t; } } while(ofx != 1); #ifdef RENDER_CUBES_MULTITHREADED // add data to ordered span count table if necessary if(--stack_ordrem <= 0) { rayc_stack_ordlen[stack_ordidx++] = stack_ordrem = rayc_data_len - stack_ordptr; stack_ordptr = rayc_data_len; } #endif } #ifdef RENDER_CUBES_MULTITHREADED // terminate stack lists rayc_stack_len[stack_pilidx++] = 0; rayc_stack_len[stack_pilidx] = -1; rayc_stack_ordlen[stack_ordidx++] = rayc_data_len - stack_ordptr; rayc_stack_ordlen[stack_ordidx] = -1; // apply running sum to stack lists { int rsum = 0; for(i = 0; rayc_stack_len[i] != -1; i++) { rsum += rayc_stack_len[i]; rayc_stack_len[i] = rsum; } } #endif //printf("%i %i %i\n", stack_pilidx, stack_ordidx, stack_ordrem); // render each face #ifdef RENDER_FACE_COUNT for(i = 0; i < RENDER_FACE_COUNT && render_face_remain > 0; i++) { switch(render_face_current) { default: render_face_current = 0; /* FALL THROUGH */ case 0: render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NX, -1, 0, 0); break; case 1: render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NY, 0, -1, 0); break; case 2: render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NZ, 0, 0, -1); break; case 3: render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PX, 1, 0, 0); break; case 4: render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PY, 0, 1, 0); break; case 5: render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PZ, 0, 0, 1); break; } render_face_current++; render_face_remain--; } #else #if 1 render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NX, -1, 0, 0); render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NY, 0, -1, 0); render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NZ, 0, 0, -1); render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PX, 1, 0, 0); render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PY, 0, 1, 0); render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PZ, 0, 0, 1); #else #pragma omp sections { #pragma omp section render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NX, -1, 0, 0); #pragma omp section render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NY, 0, -1, 0); #pragma omp section render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_NZ, 0, 0, -1); #pragma omp section render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PX, 1, 0, 0); #pragma omp section render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PY, 0, 1, 0); #pragma omp section render_vxl_face_raycast(blkx, blky, blkz, subx, suby, subz, CM_PZ, 0, 0, 1); } #endif #endif } void render_cubemap_edge( int face, int x1, int y1, float z1, float u1, float v1, int x2, int y2, float z2, float u2, float v2) { int y; // if out of Y range, drop out early. if(y1 < 0 && y2 < 0) return; if(y1 >= rtmp_height && y2 >= rtmp_height) return; // if perfectly horizontal, drop out early. if(y1 == y2) return; // prep line drawer int dx = x2-x1; int dy = y2-y1; int xadd = 0; int xinc = 1; int dc = 0; // ensure dy is positive if(dy < 0) { dx = -dx; dy = -dy; } // ensure dx is positive if(dx < 0) { dx = -dx; xinc = -1; } // calculate correct xadd,dx,dy xadd = dx/dy; xadd *= xinc; dx %= dy; // we are going clockwise. if(y1 < y2) { // right side // clip for y if(y1 < 0) { y1 = -y1; dc = dx*y1; z1 += (z2-z1)*y1; u1 += (u2-u1)*y1; v1 += (v2-v1)*y1; x1 += xinc*(dc/dy) + xadd*y1; dc %= dy; y1 = 0; } if(y2 > rtmp_height) y2 = rtmp_height; // expand list top/bottom if(y1 < elist_y1) elist_y1 = y1; if(y2 > elist_y2) elist_y2 = y2; // calc deltas float dz = z2-z1; float du = u2-u1; float dv = v2-v1; // apply edgebit_t *eb = &elist[y1]; for(y = y1; y < y2; y++) { eb->x2 = x1; eb->z2 = z1; eb->u2 = u1; eb->v2 = v1; z1 += dz; u1 += du; v1 += dv; x1 += xadd; dc += dx; if(dc >= dy) { x1 += xinc; dc -= dy; } eb++; } } else { // left side // clip for y if(y2 < 0) { y2 = -y2; dc = dx*y2; z2 += (z1-z2)*y2; u2 += (u1-u2)*y2; v2 += (v1-v2)*y2; x2 += xinc*(dc/dy) + xadd*y1; dc %= dy; y2 = 0; } if(y1 > rtmp_height) y1 = rtmp_height; // expand list top/bottom if(y2 < elist_y1) elist_y1 = y2; if(y1 > elist_y2) elist_y2 = y1; // calc deltas float dz = z1-z2; float du = u1-u2; float dv = v1-v2; // apply edgebit_t *eb = &elist[y1]; for(y = y2; y < y1; y++) { eb->x1 = x2; eb->z1 = z2; eb->u1 = u2; eb->v1 = v2; z2 += dz; u2 += du; v2 += dv; x2 += xadd; dc += dx; if(dc >= dy) { x2 += xinc; dc -= dy; } eb++; } } // clamp y1,y2 to screen size // NOTE: shouldn't be necessary if the algo is correct if(elist_y1 < 0) elist_y1 = 0; if(elist_y2 > rtmp_height) elist_y2 = rtmp_height; } void render_cubemap_quad( int face, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3, float x4, float y4, float z4) { float u1,u2,u3,u4; float v1,v2,v3,v4; // precalc 1/z z1 = 1.0f/z1; z2 = 1.0f/z2; z3 = 1.0f/z3; z4 = 1.0f/z4; // prep u/v values u1 = -z1; v1 = -z1; u2 = z1; v2 = -z1; u3 = z1; v3 = z1; u4 = -z1; v4 = z1; // copy to some "unclipped" things float x1a,x1b,y1a,y1b,z1a,z1b,u1a,u1b,v1a,v1b; float x2a,x2b,y2a,y2b,z2a,z2b,u2a,u2b,v2a,v2b; float x3a,x3b,y3a,y3b,z3a,z3b,u3a,u3b,v3a,v3b; float x4a,x4b,y4a,y4b,z4a,z4b,u4a,u4b,v4a,v4b; x1a=x1b=x1; y1a=y1b=y1; z1a=z1b=z1; u1a=u1b=u1; v1a=v1b=v1; x2a=x2b=x2; y2a=y2b=y2; z2a=z2b=z2; u2a=u2b=u2; v2a=v2b=v2; x3a=x3b=x3; y3a=y3b=y3; z3a=z3b=z3; u3a=u3b=u3; v3a=v3b=v3; x4a=x4b=x4; y4a=y4b=y4; z4a=z4b=z4; u4a=u4b=u4; v4a=v4b=v4; // TODO: clip stuff // render edges if(x1a != x1b || y1a != y1b) render_cubemap_edge(face, x1a,y1a,z1a,u1a,v1a, x1b,y1b,z1b,u1b,v1b); render_cubemap_edge(face, x1b,y1b,z1b,u1b,v1b, x2a,y2a,z2a,u2a,v2a); if(x2a != x2b || y2a != y2b) render_cubemap_edge(face, x2a,y2a,z2a,u2a,v2a, x2b,y2b,z2b,u2b,v2b); render_cubemap_edge(face, x2b,y2b,z2b,u2b,v2b, x3a,y3a,z3a,u3a,v3a); if(x3a != x3b || y3a != y3b) render_cubemap_edge(face, x3a,y3a,z3a,u3a,v3a, x3b,y3b,z3b,u3b,v3b); render_cubemap_edge(face, x3b,y3b,z3b,u3b,v3b, x4a,y4a,z4a,u4a,v4a); if(x4a != x4b || y4a != y4b) render_cubemap_edge(face, x4a,y4a,z4a,u4a,v4a, x4b,y4b,z4b,u4b,v4b); render_cubemap_edge(face, x4b,y4b,z4b,u4b,v4b, x1a,y1a,z1a,u1a,v1a); } void render_cubemap_face(int face, int gx, int gy, int gz) { int x,y; // reset edge list elist_y1 = rtmp_height; elist_y2 = 0; // calculate corners float cx1 = gx, cx2 = gx, cx3 = gx, cx4 = gx; float cy1 = gy, cy2 = gy, cy3 = gy, cy4 = gy; float cz1 = gz, cz2 = gz, cz3 = gz, cz4 = gz; // populate edge list render_cubemap_quad(face, cx1,cy1,cz1, cx2,cy2,cz2, cx3,cy3,cz3, cx4,cy4,cz4); // render edge list uint32_t *pb = rtmp_pixels + (rtmp_pitch*elist_y1); float *db = dbuf + (rtmp_width*elist_y1); for(y = elist_y1; y < elist_y2; y++) { edgebit_t *eb = &elist[y]; // get start/end int x1 = eb->x1; int x2 = eb->x2; // get start z/u/v float zi = eb->z1; float ui = eb->u1; float vi = eb->v1; // get delta z/u/v float dzi = eb->z2-eb->z1; float dui = eb->u2-eb->u1; float dvi = eb->v2-eb->v1; uint32_t *p = &pb[x1]; float *d = &db[x1]; for(x = x1; x < x2; x++) { // invert z float z = 1/zi; // calculate u,v float u = ui*z; float v = vi*z; // TODO: fetch // TODO: plot //*(p++); //*(d++); } pb += rtmp_pitch; db += rtmp_width; } } // TODO: get this working void render_cubemap_new(uint32_t *pixels, int width, int height, int pitch, camera_t *camera, map_t *map) { // stash stuff in globals to prevent spamming the stack too much // (and in turn thrashing the cache) rtmp_pixels = pixels; rtmp_width = width; rtmp_height = height; rtmp_pitch = pitch; rtmp_camera = camera; rtmp_map = map; // prep edge list if(elist_len != height) { if(elist != NULL) free(elist); elist_len = height; elist = (edgebit_t*)malloc(sizeof(edgebit_t)*elist_len); } // do each face // TODO? backface cull? render_cubemap_face(CM_NX, -1, 0, 0); render_cubemap_face(CM_NY, 0, -1, 0); render_cubemap_face(CM_NZ, 0, 0, -1); render_cubemap_face(CM_PX, 1, 0, 0); render_cubemap_face(CM_PY, 0, 1, 0); render_cubemap_face(CM_PZ, 0, 0, 1); } void render_cubemap(uint32_t *pixels, int width, int height, int pitch, camera_t *camera, map_t *map) { int x,y; // stash stuff in globals to prevent spamming the stack too much // (and in turn thrashing the cache) rtmp_pixels = pixels; rtmp_width = width; rtmp_height = height; rtmp_pitch = pitch; rtmp_camera = camera; rtmp_map = map; // get corner traces float tracemul = cubemap_size/2; float traceadd = tracemul; float ctrx1 = (camera->mzx+camera->mxx-camera->myx); float ctry1 = (camera->mzy+camera->mxy-camera->myy); float ctrz1 = (camera->mzz+camera->mxz-camera->myz); float ctrx2 = (camera->mzx-camera->mxx-camera->myx); float ctry2 = (camera->mzy-camera->mxy-camera->myy); float ctrz2 = (camera->mzz-camera->mxz-camera->myz); float ctrx3 = (camera->mzx+camera->mxx+camera->myx); float ctry3 = (camera->mzy+camera->mxy+camera->myy); float ctrz3 = (camera->mzz+camera->mxz+camera->myz); float ctrx4 = (camera->mzx-camera->mxx+camera->myx); float ctry4 = (camera->mzy-camera->mxy+camera->myy); float ctrz4 = (camera->mzz-camera->mxz+camera->myz); // calculate deltas float fbxq = ctrx1, fbyq = ctry1, fbzq = ctrz1; // base float fexq = ctrx2, feyq = ctry2, fezq = ctrz2; // end float flx = ctrx3-fbxq, fly = ctry3-fbyq, flz = ctrz3-fbzq; // left side float frx = ctrx4-fexq, fry = ctry4-feyq, frz = ctrz4-fezq; // right side flx /= (float)width; fly /= (float)width; flz /= (float)width; frx /= (float)width; fry /= (float)width; frz /= (float)width; // scale cubemap correctly fbxq += flx*((float)(width-height))/2.0f; fbyq += fly*((float)(width-height))/2.0f; fbzq += flz*((float)(width-height))/2.0f; fexq += frx*((float)(width-height))/2.0f; feyq += fry*((float)(width-height))/2.0f; fezq += frz*((float)(width-height))/2.0f; // raytrace it // TODO: find some faster method int hwidth = width/2; int hheight = height/2; #if 0 uint32_t *p = pixels; float *d = dbuf; for(y = -hheight; y < hheight; y++) #else #pragma omp parallel { int x,y,z; float fex = fexq; float fey = feyq; float fez = fezq; float fbx = fbxq; float fby = fbyq; float fbz = fbzq; uint32_t *p = pixels; float *d = dbuf; int t_count = omp_get_num_threads(); int t_idx = omp_get_thread_num(); int y_start = (height*t_idx)/t_count-hheight; int y_end = (height*(t_idx+1))/t_count-hheight; p += (y_start+hheight)*pitch; d += (y_start+hheight)*width; fbx += flx*(y_start+hheight); fby += fly*(y_start+hheight); fbz += flz*(y_start+hheight); fex += frx*(y_start+hheight); fey += fry*(y_start+hheight); fez += frz*(y_start+hheight); for(y = y_start; y < y_end; y++) #endif { float fx = fbx; float fy = fby; float fz = fbz; float fdx = (fex-fbx)/(float)width; float fdy = (fey-fby)/(float)width; float fdz = (fez-fbz)/(float)width; for(x = -hwidth; x < hwidth; x++) { int pidx, pmap; // get correct cube map + pos float tx,ty,tz,atz; if(fabsf(fx) > fabsf(fy) && fabsf(fx) > fabsf(fz)) { tx = -fz; ty = fy; tz = fx; atz = fabs(tz); pmap = fx >= 0.0f ? CM_PX : CM_NX; } else if(fabsf(fz) > fabsf(fy) && fabsf(fz) > fabsf(fx)) { tx = fx; ty = fy; tz = fz; atz = fabs(tz); pmap = fz >= 0.0f ? CM_PZ : CM_NZ; } else { tx = fx; ty = fz; tz = fy; atz = tz; pmap = fy >= 0.0f ? CM_PY : CM_NY; } pidx = ((cubemap_size-1)&(int)(tx*tracemul/tz+traceadd)) |(((cubemap_size-1)&(int)(ty*tracemul/atz+traceadd))<pillars[y*map->xlen+x][8]); //pixels[y*pitch+x] = cubemap_color[CM_PZ][y*cubemap_size+x]; }*/ } void render_pmf_box(float x, float y, float z, float depth, float r, uint32_t color) { // check Z straight away if(z < 0.001f) return; // get box int x1 = (( x-r)/z)*rtmp_width/2+rtmp_width/2; int y1 = (( y-r)/z)*rtmp_width/2+rtmp_height/2; int x2 = (( x+r)/z)*rtmp_width/2+rtmp_width/2; int y2 = (( y+r)/z)*rtmp_width/2+rtmp_height/2; // render render_rect_zbuf(rtmp_pixels, dbuf, x1, y1, x2, y2, color, depth); } void render_pmf_bone(uint32_t *pixels, int width, int height, int pitch, camera_t *cam_base, model_bone_t *bone, int islocal, float px, float py, float pz, float ry, float rx, float ry2, float scale) { // stash stuff in globals to prevent spamming the stack too much // (and in turn thrashing the cache) rtmp_pixels = pixels; rtmp_width = width; rtmp_height = height; rtmp_pitch = pitch; rtmp_camera = cam_base; // get zoom factor float bzoom = (cam_base->mzx*cam_base->mzx + cam_base->mzy*cam_base->mzy + cam_base->mzz*cam_base->mzz); float unzoom = 1.0f/bzoom; float rezoom = sqrtf(bzoom); scale /= 256.0f; int i; for(i = 0; i < bone->ptlen; i++) { model_point_t *pt = &(bone->pts[i]); // get color uint32_t color = (pt->b)|(pt->g<<8)|(pt->r<<16)|(1<<24); // get position float x = pt->x; float y = pt->y; float z = pt->z; // rotate float sry = sin(ry); float cry = cos(ry); float srx = sin(rx); float crx = cos(rx); float sry2 = sin(ry2); float cry2 = cos(ry2); float tx = (x*cry+z*sry); float ty = y; float tz = (z*cry-x*sry); y = (ty*crx-tz*srx); tz = (tz*crx+ty*srx); x = (tx*cry2+tz*sry2); z = (tz*cry2-tx*sry2); // scalinate x *= scale; y *= scale; z *= scale; // offsettate x += px; y += py; z += pz; if(!islocal) { x -= cam_base->mpx; y -= cam_base->mpy; z -= cam_base->mpz; } // get correct centre depth float m = fabsf(x); if(m < fabsf(y)) m = fabsf(y); if(m < fabsf(z)) m = fabsf(z); //float dlen2 = x*x + y*y + z*z; //float dlen = sqrtf(dlen2); //float depth = sqrtf(2*m*m - dlen2); float depth = m; // cameranananinate if(!islocal) { float nx = x*cam_base->mxx+y*cam_base->mxy+z*cam_base->mxz; float ny = x*cam_base->myx+y*cam_base->myy+z*cam_base->myz; float nz = x*cam_base->mzx*unzoom+y*cam_base->mzy*unzoom+z*cam_base->mzz*unzoom; x = nx; y = ny; z = nz; } //depth *= z*rezoom; // plotinate render_pmf_box(-x, y, z, depth, pt->radius*scale, color); } } int render_init(int width, int height) { int i; int size = (width > height ? width : height); // get nearest power of 2 size = (size-1); size |= size>>1; size |= size>>2; size |= size>>4; size |= size>>8; size++; int msize = size; // reduce quality a little bit // 800x600 -> 1024^2 -> 512^2 ends up as 1MB x 6 textures = 6MB size >>= 1; // allocate cubemaps for(i = 0; i < CM_MAX; i++) { cubemap_color[i] = (uint32_t*)malloc(size*size*4); cubemap_depth[i] = (float*)malloc(size*size*4); if(cubemap_color[i] == NULL || cubemap_depth[i] == NULL) { // Can't allocate :. Can't continue // Clean up like a boss fprintf(stderr, "render_init: could not allocate cubemap %i\n", i); for(; i >= 0; i--) { if(cubemap_color[i] != NULL) free(cubemap_color[i]); if(cubemap_depth[i] != NULL) free(cubemap_depth[i]); cubemap_color[i] = NULL; cubemap_depth[i] = NULL; } return 1; } } // we might as well set this, too! cubemap_size = size; // calculate shift factor cubemap_shift = -1; while(size != 0) { cubemap_shift++; size >>= 1; } // allocate space for depth buffer dbuf = (float*)malloc(width*height*sizeof(float)); // TODO: check if NULL return 0; } void render_deinit(void) { int i; // deallocate cubemaps for(i = 0; i < CM_MAX; i++) { if(cubemap_color[i] != NULL) { free(cubemap_color[i]); cubemap_color[i] = NULL; } if(cubemap_depth[i] != NULL) { free(cubemap_depth[i]); cubemap_depth[i] = NULL; } } // deallocate edgelist if(elist != NULL) { free(elist); elist = NULL; elist_len = 0; } // deallocate depth buffer if(dbuf != NULL) { free(dbuf); dbuf = NULL; } }