Merge remote-tracking branch 'origin/master'

master
jp9000 2013-05-10 20:00:24 -07:00
commit afade3d007
8 changed files with 371 additions and 197 deletions

1
.gitignore vendored
View File

@ -33,6 +33,7 @@ ipch/
*.zip
*.lnk
*.chm
*~
!OBSHelp/*.css
!OBSHelp/*.htm

View File

@ -51,13 +51,15 @@ struct BASE_EXPORT ProfileNodeInfo
DWORD numCalls;
DWORD avgTimeElapsed;
DWORD avgCpuTime;
double avgPercentage;
double childPercentage;
double unaccountedPercentage;
bool bSingular;
QWORD totalTimeElapsed;
QWORD totalTimeElapsed,
cpuTimeElapsed;
ProfileNodeInfo *parent;
List<ProfileNodeInfo*> Children;
@ -65,6 +67,8 @@ struct BASE_EXPORT ProfileNodeInfo
void calculateProfileData(int rootCallCount)
{
avgTimeElapsed = (DWORD)(totalTimeElapsed/(QWORD)rootCallCount);
avgCpuTime = (DWORD)(cpuTimeElapsed/(QWORD)rootCallCount);
if(parent) avgPercentage = (double(avgTimeElapsed)/double(parent->avgTimeElapsed))*parent->avgPercentage;
else avgPercentage = 100.0f;
@ -100,13 +104,15 @@ struct BASE_EXPORT ProfileNodeInfo
int perFrameCalls = numCalls/rootCallCount;
float fTimeTaken = (float)MicroToMS(avgTimeElapsed);
float cpuTime = (float)MicroToMS(avgCpuTime);
float totalCpuTime = (float)cpuTimeElapsed*0.001;
if(avgPercentage >= minPercentage && fTimeTaken >= minTime)
{
if(Children.Num())
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms] [avg calls per frame: %d] [children: %.3g%%] [unaccounted: %.3g%%]"), lpIndent, lpName, avgPercentage, fTimeTaken, perFrameCalls, childPercentage, unaccountedPercentage);
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms (cpu time: avg %g ms, total %g ms)] [avg calls per frame: %d] [children: %.3g%%] [unaccounted: %.3g%%]"), lpIndent, lpName, avgPercentage, fTimeTaken, cpuTime, totalCpuTime, perFrameCalls, childPercentage, unaccountedPercentage);
else
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms] [avg calls per frame: %d]"), lpIndent, lpName, avgPercentage, fTimeTaken, perFrameCalls);
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms (cpu time: avg %g ms, total %g ms)] [avg calls per frame: %d]"), lpIndent, lpName, avgPercentage, fTimeTaken, cpuTime, totalCpuTime, perFrameCalls);
}
for(unsigned int i=0; i<Children.Num(); i++)
@ -226,6 +232,8 @@ ProfilerNode::ProfilerNode(CTSTR lpName, bool bSingularize)
this->lpName = lpName;
startTime = OSGetTimeMicroseconds();
MonitorThread(OSGetCurrentThread());
}
ProfilerNode::~ProfilerNode()
@ -237,8 +245,18 @@ ProfilerNode::~ProfilerNode()
{
DWORD curTime = (DWORD)(newTime-startTime);
info->totalTimeElapsed += curTime;
if(thread)
info->cpuTimeElapsed += (DWORD)(OSGetThreadTime(thread) - cpuStartTime);
}
if(!bSingularNode)
__curProfilerNode = parent;
}
void ProfilerNode::MonitorThread(HANDLE thread_)
{
if(!thread_)
return;
thread = thread_;
cpuStartTime = OSGetThreadTime(thread);
}

View File

@ -26,7 +26,9 @@ struct ProfileNodeInfo;
class BASE_EXPORT ProfilerNode
{
CTSTR lpName;
QWORD startTime;
QWORD startTime,
cpuStartTime;
HANDLE thread;
ProfilerNode *parent;
bool bSingularNode;
ProfileNodeInfo *info;
@ -34,6 +36,7 @@ class BASE_EXPORT ProfilerNode
public:
ProfilerNode(CTSTR name, bool bSingularize=false);
~ProfilerNode();
void MonitorThread(HANDLE thread);
};
BASE_EXPORT extern ProfilerNode *__curProfilerNode;

View File

@ -138,6 +138,7 @@ BASE_EXPORT int STDCALL OSGetVersion();
BASE_EXPORT int STDCALL OSGetTotalCores();
BASE_EXPORT int STDCALL OSGetLogicalCores();
BASE_EXPORT HANDLE STDCALL OSCreateThread(XTHREAD lpThreadFunc, LPVOID param);
BASE_EXPORT HANDLE STDCALL OSGetCurrentThread();
BASE_EXPORT BOOL STDCALL OSWaitForThread(HANDLE hThread, LPDWORD ret);
BASE_EXPORT BOOL STDCALL OSCloseThread(HANDLE hThread);
BASE_EXPORT BOOL STDCALL OSTerminateThread(HANDLE hThread, DWORD waitMS=100);
@ -152,6 +153,7 @@ BASE_EXPORT void STDCALL OSSetMainAppWindow(HANDLE window);
BASE_EXPORT DWORD STDCALL OSGetTime();
BASE_EXPORT QWORD STDCALL OSGetTimeMicroseconds();
BASE_EXPORT QWORD STDCALL OSGetThreadTime(HANDLE hThread);
BASE_EXPORT void __cdecl OSMessageBoxva(const TCHAR *format, va_list argptr);
BASE_EXPORT void __cdecl OSMessageBox(const TCHAR *format, ...);

View File

@ -655,6 +655,16 @@ double STDCALL OSGetTimeDoubleMS()
return double(elapsedTime) * 1000.0 / double(clockFreq.QuadPart);
}
QWORD STDCALL OSGetThreadTime(HANDLE thread)
{
#define TO_QWORD(t) (((QWORD)t.dwHighDateTime)<<32 | (QWORD)t.dwLowDateTime)
FILETIME user, create, exit, kernel;
if(!GetThreadTimes(thread, &create, &exit, &kernel, &user))
return -1;
return (TO_QWORD(user) + TO_QWORD(kernel))/10;
#undef TO_QWORD
}
UINT STDCALL OSGetProcessorCount()
{
@ -667,6 +677,11 @@ HANDLE STDCALL OSCreateThread(XTHREAD lpThreadFunc, LPVOID param)
return CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)lpThreadFunc, param, 0, &dummy);
}
HANDLE STDCALL OSGetCurrentThread()
{
return GetCurrentThread();
}
BOOL STDCALL OSWaitForThread(HANDLE hThread, LPDWORD ret)
{
BOOL bRet = (WaitForSingleObjectEx(hThread, INFINITE, 0) == WAIT_OBJECT_0);

View File

@ -71,6 +71,7 @@ namespace
{
pnFrameRateExtN = fr;
pnFrameRateExtD = 1;
return;
}
fr = (mfxU32)(dFrameRate * 1.001 + .5);
@ -79,6 +80,7 @@ namespace
{
pnFrameRateExtN = fr * 1000;
pnFrameRateExtD = 1001;
return;
}
pnFrameRateExtN = (mfxU32)(dFrameRate * 10000 + .5);
@ -119,11 +121,19 @@ class QSVEncoder : public VideoEncoder
std::unique_ptr<MFXVideoENCODE> enc;
mfxEncodeCtrl ctrl;
mfxFrameSurface1 enc_surf;
List<mfxU8> surf_buff;
List<mfxU8> bs_buff;
mfxBitstream bs;
struct encode_task
{
mfxFrameSurface1 surf;
mfxBitstream bs;
mfxSyncPoint sp;
bool keyframe;
};
List<encode_task> encode_tasks;
unsigned oldest, insert, encode, in_use;
int fps;
@ -134,6 +144,7 @@ class QSVEncoder : public VideoEncoder
bool bFirstFrameProcessed;
bool bUseCBR, bUseCFR, bDupeFrames;
unsigned deferredFrames;
List<VideoPacket> CurrentPackets;
List<BYTE> HeaderPacket, SEIData;
@ -167,6 +178,8 @@ public:
}
}
session.SetPriority(MFX_PRIORITY_HIGH);
fps = fps_;
bUseCBR = AppConfig->GetInt(TEXT("Video Encoding"), TEXT("UseCBR")) != 0;
@ -174,14 +187,20 @@ public:
bDupeFrames = bDupeFrames_;
memset(&params, 0, sizeof(params));
params.AsyncDepth = 1;
//params.AsyncDepth = 0;
params.mfx.CodecId = MFX_CODEC_AVC;
params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY;
params.mfx.TargetKbps = maxBitrate;
params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY;//SPEED;
params.mfx.TargetKbps = (mfxU16)(maxBitrate*0.9);
params.mfx.MaxKbps = maxBitrate;
params.mfx.InitialDelayInKB = 1;
//params.mfx.InitialDelayInKB = 1;
//params.mfx.GopRefDist = 1;
//params.mfx.NumRefFrame = 0;
params.mfx.GopPicSize = 61;
params.mfx.GopRefDist = 3;
params.mfx.GopOptFlag = 2;
params.mfx.IdrInterval = 2;
params.mfx.NumSlice = 1;
params.mfx.RateControlMethod = bUseCBR ? MFX_RATECONTROL_CBR : MFX_RATECONTROL_VBR;
params.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY;
@ -206,22 +225,53 @@ public:
enc.reset(new MFXVideoENCODE(session));
enc->Close();
auto result = enc->Init(&params);
mfxFrameAllocRequest req;
memset(&req, 0, sizeof(req));
enc->QueryIOSurf(&params, &req);
memset(&enc_surf, 0, sizeof(enc_surf));
memcpy(&enc_surf.Info, &params.mfx.FrameInfo, sizeof(enc_surf.Info));
enc->Init(&params);
decltype(params) query;
memcpy(&query, &params, sizeof(params));
enc->GetVideoParam(&query);
unsigned size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8);
bs_buff.SetSize(size+31);//.resize(size+31);
bs.Data = (mfxU8*)(((size_t)bs_buff.Array() + 31) / 32 * 32);
bs.MaxLength = size;
params.mfx.BufferSizeInKB = size/1000;
unsigned num_surf = max(6, req.NumFrameSuggested+1);
encode_tasks.SetSize(num_surf);
const unsigned bs_size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8);
bs_buff.SetSize(bs_size * num_surf + 31);
params.mfx.BufferSizeInKB = bs_size/1000;
const unsigned surf_size = width*height*2;
surf_buff.SetSize(surf_size * num_surf + 15);
mfxU8* bs_start = (mfxU8*)(((size_t)bs_buff.Array() + 31)/32*32);
mfxU8* surf_start = (mfxU8*)(((size_t)surf_buff.Array() + 15)/16*16);
for(unsigned i = 0; i < encode_tasks.Num(); i++)
{
encode_tasks[i].sp = nullptr;
mfxFrameSurface1& surf = encode_tasks[i].surf;
memset(&surf, 0, sizeof(mfxFrameSurface1));
memcpy(&surf.Info, &params.mfx.FrameInfo, sizeof(params.mfx.FrameInfo));
surf.Data.Y = surf_start + i*surf_size;
surf.Data.UV = surf_start + i*surf_size + width*height;
mfxBitstream& bs = encode_tasks[i].bs;
memset(&bs, 0, sizeof(mfxBitstream));
bs.Data = bs_start + i*bs_size;
bs.MaxLength = bs_size;
}
oldest = 0;
insert = 0;
encode = 0;
in_use = 0;
Log(TEXT("Using %u encode tasks"), encode_tasks.Num());
Log(TEXT("Buffer size: %u configured, %u suggested by QSV; using %u"),
bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, size*8/1024);
bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, params.mfx.BufferSizeInKB*1000*8/1024);
Log(TEXT("------------------------------------------"));
Log(TEXT("%s"), GetInfoString().Array());
@ -230,6 +280,8 @@ public:
memset(&ctrl, 0, sizeof(ctrl));
ctrl.FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | MFX_FRAMETYPE_IDR;
deferredFrames = 0;
DataPacket packet;
GetHeaders(packet);
}
@ -239,197 +291,264 @@ public:
ClearPackets();
}
bool Encode(LPVOID picInPtr, List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD outputTimestamp, int &ctsOffset)
virtual void RequestBuffers(LPVOID buffers)
{
bs.DataLength = 0;
bs.DataOffset = 0;
mfxFrameSurface1& pic = *(mfxFrameSurface1*)picInPtr;
enc_surf.Data.Y = pic.Data.Y;
enc_surf.Data.UV = pic.Data.UV;
enc_surf.Data.Pitch = pic.Data.Pitch;
enc_surf.Data.TimeStamp = pic.Data.TimeStamp*90;
mfxSyncPoint sp = nullptr;
auto sts = enc->EncodeFrameAsync(bRequestKeyframe ? &ctrl : nullptr, &enc_surf, &bs, &sp);
if(!buffers)
return;
mfxFrameData& data = encode_tasks[insert].surf.Data;
mfxFrameData& buff = *(mfxFrameData*)buffers;
buff.Y = data.Y;
buff.UV = data.UV;
}
sts = MFXVideoCORE_SyncOperation(session, sp, INFINITE);
List<x264_nal_t> nalOut;
mfxU8 *start = bs.Data + bs.DataOffset,
*end = bs.Data + bs.DataOffset + bs.DataLength;
static mfxU8 start_seq[] = {0, 0, 1};
start = std::search(start, end, start_seq, start_seq+3);
while(start != end)
void ProcessEncodedFrame(List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD outputTimestamp, int &ctsOffset, mfxU32 wait=0)
{
if(!in_use)
return;
unsigned pending_tasks = in_use;
while(in_use)
{
decltype(start) next = std::search(start+1, end, start_seq, start_seq+3);
x264_nal_t nal;
nal.i_ref_idc = start[3]>>5;
nal.i_type = start[3]&0x1f;
if(nal.i_type == NAL_SLICE_IDR)
nal.i_ref_idc = NAL_PRIORITY_HIGHEST;
nal.p_payload = start;
nal.i_payload = int(next-start);
nalOut << nal;
start = next;
}
size_t nalNum = nalOut.Num();
encode_task& task = encode_tasks[oldest];
auto& sp = task.sp;
mfxStatus sts;
if((sts = MFXVideoCORE_SyncOperation(session, sp, wait)) != MFX_ERR_NONE)
return;
sp = nullptr;
in_use -= 1;
packets.Clear();
ClearPackets();
oldest = (oldest+1)%encode_tasks.Num();
if(bRequestKeyframe)
bRequestKeyframe = false;
mfxBitstream& bs = task.bs;
if(!bFirstFrameProcessed && nalNum)
{
//delayOffset = -picOut.i_dts;
bFirstFrameProcessed = true;
}
INT64 ts = INT64(outputTimestamp);
int timeOffset = 0;//int((picOut.i_pts+delayOffset)-ts);
if(bDupeFrames)
{
//if frame duplication is being used, the shift will be insignificant, so just don't bother adjusting audio
timeOffset += frameShift;
if(nalNum && timeOffset < 0)
List<x264_nal_t> nalOut;
mfxU8 *start = bs.Data + bs.DataOffset,
*end = bs.Data + bs.DataOffset + bs.DataLength;
static mfxU8 start_seq[] = {0, 0, 1};
start = std::search(start, end, start_seq, start_seq+3);
while(start != end)
{
frameShift -= timeOffset;
timeOffset = 0;
decltype(start) next = std::search(start+1, end, start_seq, start_seq+3);
x264_nal_t nal;
nal.i_ref_idc = start[3]>>5;
nal.i_type = start[3]&0x1f;
if(nal.i_type == NAL_SLICE_IDR)
nal.i_ref_idc = NAL_PRIORITY_HIGHEST;
nal.p_payload = start;
nal.i_payload = int(next-start);
nalOut << nal;
start = next;
}
}
else
{
timeOffset += ctsOffset;
size_t nalNum = nalOut.Num();
//dynamically adjust the CTS for the stream if it gets lower than the current value
//(thanks to cyrus for suggesting to do this instead of a single shift)
if(nalNum && timeOffset < 0)
packets.Clear();
ClearPackets();
if(!bFirstFrameProcessed && nalNum)
{
ctsOffset -= timeOffset;
timeOffset = 0;
delayOffset = 0;//bs.TimeStamp/90;
bFirstFrameProcessed = true;
}
}
timeOffset = htonl(timeOffset);
INT64 ts = INT64(outputTimestamp);
int timeOffset = int(bs.TimeStamp/90+delayOffset-ts);//int((picOut.i_pts+delayOffset)-ts);
BYTE *timeOffsetAddr = ((BYTE*)&timeOffset)+1;
VideoPacket *newPacket = NULL;
PacketType bestType = PacketType_VideoDisposable;
bool bFoundFrame = false;
for(int i=0; i<nalNum; i++)
{
x264_nal_t &nal = nalOut[i];
if(nal.i_type == NAL_SEI)
if(bDupeFrames)
{
//if frame duplication is being used, the shift will be insignificant, so just don't bother adjusting audio
timeOffset += frameShift;
if(nalNum && timeOffset < 0)
{
frameShift -= timeOffset;
timeOffset = 0;
}
}
else
{
timeOffset += ctsOffset;
//dynamically adjust the CTS for the stream if it gets lower than the current value
//(thanks to cyrus for suggesting to do this instead of a single shift)
if(nalNum && timeOffset < 0)
{
ctsOffset -= timeOffset;
timeOffset = 0;
}
}
timeOffset = htonl(timeOffset);
BYTE *timeOffsetAddr = ((BYTE*)&timeOffset)+1;
VideoPacket *newPacket = NULL;
PacketType bestType = PacketType_VideoDisposable;
bool bFoundFrame = false;
for(int i=0; i<nalNum; i++)
{
x264_nal_t &nal = nalOut[i];
if(nal.i_type == NAL_SEI)
{
BYTE *skip = nal.p_payload;
while(*(skip++) != 0x1);
int skipBytes = (int)(skip-nal.p_payload);
int newPayloadSize = (nal.i_payload-skipBytes);
if (nal.p_payload[skipBytes+1] == 0x5) {
SEIData.Clear();
BufferOutputSerializer packetOut(SEIData);
packetOut.OutputDword(htonl(newPayloadSize));
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
} else {
if (!newPacket)
newPacket = CurrentPackets.CreateNew();
BufferOutputSerializer packetOut(newPacket->Packet);
packetOut.OutputDword(htonl(newPayloadSize));
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
}
}
/*else if(nal.i_type == NAL_FILLER) //QSV does not produce NAL_FILLER
{
BYTE *skip = nal.p_payload;
while(*(skip++) != 0x1);
int skipBytes = (int)(skip-nal.p_payload);
int newPayloadSize = (nal.i_payload-skipBytes);
if (nal.p_payload[skipBytes+1] == 0x5) {
SEIData.Clear();
BufferOutputSerializer packetOut(SEIData);
packetOut.OutputDword(htonl(newPayloadSize));
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
} else {
if (!newPacket)
newPacket = CurrentPackets.CreateNew();
BufferOutputSerializer packetOut(newPacket->Packet);
packetOut.OutputDword(htonl(newPayloadSize));
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
}
}
/*else if(nal.i_type == NAL_FILLER) //QSV does not produce NAL_FILLER
{
BYTE *skip = nal.p_payload;
while(*(skip++) != 0x1);
int skipBytes = (int)(skip-nal.p_payload);
int newPayloadSize = (nal.i_payload-skipBytes);
if (!newPacket)
newPacket = CurrentPackets.CreateNew();
BufferOutputSerializer packetOut(newPacket->Packet);
packetOut.OutputDword(htonl(newPayloadSize));
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
}*/
else if(nal.i_type == NAL_SLICE_IDR || nal.i_type == NAL_SLICE)
{
BYTE *skip = nal.p_payload;
while(*(skip++) != 0x1);
int skipBytes = (int)(skip-nal.p_payload);
if (!newPacket)
newPacket = CurrentPackets.CreateNew();
newPacket = CurrentPackets.CreateNew();
if (!bFoundFrame)
{
newPacket->Packet.Insert(0, (nal.i_type == NAL_SLICE_IDR) ? 0x17 : 0x27);
newPacket->Packet.Insert(1, 1);
newPacket->Packet.InsertArray(2, timeOffsetAddr, 3);
bFoundFrame = true;
}
int newPayloadSize = (nal.i_payload-skipBytes);
BufferOutputSerializer packetOut(newPacket->Packet);
packetOut.OutputDword(htonl(newPayloadSize));
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
switch(nal.i_ref_idc)
}*/
else if(nal.i_type == NAL_SLICE_IDR || nal.i_type == NAL_SLICE)
{
case NAL_PRIORITY_DISPOSABLE: bestType = MAX(bestType, PacketType_VideoDisposable); break;
case NAL_PRIORITY_LOW: bestType = MAX(bestType, PacketType_VideoLow); break;
case NAL_PRIORITY_HIGH: bestType = MAX(bestType, PacketType_VideoHigh); break;
case NAL_PRIORITY_HIGHEST: bestType = MAX(bestType, PacketType_VideoHighest); break;
BYTE *skip = nal.p_payload;
while(*(skip++) != 0x1);
int skipBytes = (int)(skip-nal.p_payload);
if (!newPacket)
newPacket = CurrentPackets.CreateNew();
if (!bFoundFrame)
{
newPacket->Packet.Insert(0, (nal.i_type == NAL_SLICE_IDR) ? 0x17 : 0x27);
newPacket->Packet.Insert(1, 1);
newPacket->Packet.InsertArray(2, timeOffsetAddr, 3);
bFoundFrame = true;
}
int newPayloadSize = (nal.i_payload-skipBytes);
BufferOutputSerializer packetOut(newPacket->Packet);
packetOut.OutputDword(htonl(newPayloadSize));
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
switch(nal.i_ref_idc)
{
case NAL_PRIORITY_DISPOSABLE: bestType = MAX(bestType, PacketType_VideoDisposable); break;
case NAL_PRIORITY_LOW: bestType = MAX(bestType, PacketType_VideoLow); break;
case NAL_PRIORITY_HIGH: bestType = MAX(bestType, PacketType_VideoHigh); break;
case NAL_PRIORITY_HIGHEST: bestType = MAX(bestType, PacketType_VideoHighest); break;
}
}
/*else if(nal.i_type == NAL_SPS)
{
VideoPacket *newPacket = CurrentPackets.CreateNew();
BufferOutputSerializer headerOut(newPacket->Packet);
headerOut.OutputByte(0x17);
headerOut.OutputByte(0);
headerOut.Serialize(timeOffsetAddr, 3);
headerOut.OutputByte(1);
headerOut.Serialize(nal.p_payload+5, 3);
headerOut.OutputByte(0xff);
headerOut.OutputByte(0xe1);
headerOut.OutputWord(htons(nal.i_payload-4));
headerOut.Serialize(nal.p_payload+4, nal.i_payload-4);
x264_nal_t &pps = nalOut[i+1]; //the PPS always comes after the SPS
headerOut.OutputByte(1);
headerOut.OutputWord(htons(pps.i_payload-4));
headerOut.Serialize(pps.p_payload+4, pps.i_payload-4);
}*/
else
continue;
}
/*else if(nal.i_type == NAL_SPS)
packetTypes << bestType;
packets.SetSize(CurrentPackets.Num());
for(UINT i=0; i<packets.Num(); i++)
{
VideoPacket *newPacket = CurrentPackets.CreateNew();
BufferOutputSerializer headerOut(newPacket->Packet);
headerOut.OutputByte(0x17);
headerOut.OutputByte(0);
headerOut.Serialize(timeOffsetAddr, 3);
headerOut.OutputByte(1);
headerOut.Serialize(nal.p_payload+5, 3);
headerOut.OutputByte(0xff);
headerOut.OutputByte(0xe1);
headerOut.OutputWord(htons(nal.i_payload-4));
headerOut.Serialize(nal.p_payload+4, nal.i_payload-4);
x264_nal_t &pps = nalOut[i+1]; //the PPS always comes after the SPS
headerOut.OutputByte(1);
headerOut.OutputWord(htons(pps.i_payload-4));
headerOut.Serialize(pps.p_payload+4, pps.i_payload-4);
}*/
else
continue;
packets[i].lpPacket = CurrentPackets[i].Packet.Array();
packets[i].size = CurrentPackets[i].Packet.Num();
}
}
}
packetTypes << bestType;
packets.SetSize(CurrentPackets.Num());
for(UINT i=0; i<packets.Num(); i++)
bool Encode(LPVOID picInPtr, List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD outputTimestamp, int &ctsOffset)
{
profileIn("ProcessEncodedFrame");
ProcessEncodedFrame(packets, packetTypes, outputTimestamp, ctsOffset);
if(in_use == encode_tasks.Num())
{
packets[i].lpPacket = CurrentPackets[i].Packet.Array();
packets[i].size = CurrentPackets[i].Packet.Num();
Log(TEXT("Error: all encode tasks in use, stalling pipeline"));
ProcessEncodedFrame(packets, packetTypes, outputTimestamp, ctsOffset, INFINITE);
}
profileOut;
encode_task& task = encode_tasks[insert];
insert = (insert+1)%encode_tasks.Num();
in_use += 1;
task.keyframe = bRequestKeyframe;
bRequestKeyframe = false;
mfxBitstream& bs = task.bs;
mfxFrameSurface1& surf = task.surf;
mfxFrameSurface1& pic = *(mfxFrameSurface1*)picInPtr;
profileIn("setup new frame");
bs.DataLength = 0;
bs.DataOffset = 0;
surf.Data.Pitch = pic.Data.Pitch;
surf.Data.TimeStamp = pic.Data.TimeStamp*90;
profileOut;
mfxSyncPoint& sp = task.sp;
mfxStatus sts;
profileIn("EncodeFrameAsync");
unsigned limit = encode < insert ? insert : insert + encode_tasks.Num();
for(unsigned i = encode;i < limit; i++)
{
encode_task& task = encode_tasks[encode];
mfxBitstream& bs = task.bs;
mfxFrameSurface1& surf = task.surf;
mfxSyncPoint& sp = task.sp;
for(;;)
{
sts = enc->EncodeFrameAsync(task.keyframe ? &ctrl : nullptr, &surf, &bs, &sp);
if(sts == MFX_ERR_NONE || sp)
break;
if(sts == MFX_WRN_DEVICE_BUSY)
{
deferredFrames += 1;
return false;
}
//if(!sp); //sts == MFX_ERR_MORE_DATA usually; retry the call (see MSDK examples)
//Log(TEXT("returned status %i, %u"), sts, insert);
}
encode = (encode+1)%encode_tasks.Num();
}
profileOut;
return true;
}
@ -533,5 +652,4 @@ VideoEncoder* CreateQSVEncoder(int fps, int width, int height, int quality, CTST
if(CheckQSVHardwareSupport())
return new QSVEncoder(fps, width, height, quality, preset, bUse444, maxBitRate, bufferSize, bUseCFR, bDupeFrames);
return nullptr;
}
}

View File

@ -159,6 +159,8 @@ class VideoEncoder
protected:
virtual bool Encode(LPVOID picIn, List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD timestamp, int &ctsOffset)=0;
virtual void RequestBuffers(LPVOID buffers) {}
public:
virtual ~VideoEncoder() {}

View File

@ -290,7 +290,6 @@ void OBS::MainCaptureLoop()
EncoderPicture lastPic;
EncoderPicture outPics[NUM_OUT_BUFFERS];
DWORD outTimes[NUM_OUT_BUFFERS] = {0, 0, 0};
List<mfxU8> qsvBuffer;
for(int i=0; i<NUM_OUT_BUFFERS; i++)
{
@ -317,19 +316,7 @@ void OBS::MainCaptureLoop()
}
else
{
if(bUsingQSV)
{
size_t perBuffer = 2 * outputCX * outputCY;
qsvBuffer.SetSize(unsigned(NUM_OUT_BUFFERS * perBuffer + 15));
mfxU8 *aligned = (mfxU8*)(((size_t)qsvBuffer.Array()+15)/16*16);
for(int i=0; i < NUM_OUT_BUFFERS; i++)
{
outPics[i].mfxOut->Data.Y = aligned;
outPics[i].mfxOut->Data.UV = aligned + outputCX * outputCY;
aligned += perBuffer;
}
}
else
if(!bUsingQSV)
for(int i=0; i<NUM_OUT_BUFFERS; i++)
x264_picture_alloc(outPics[i].picOut, X264_CSP_I420, outputCX, outputCY);
}
@ -474,6 +461,17 @@ void OBS::MainCaptureLoop()
bool bRenderView = !IsIconic(hwndMain) && bRenderViewEnabled;
profileIn("frame");
List<ProfilerNode> threadedProfilers;
if(bUseThreaded420)
{
threadedProfilers.SetSize(numThreads);
for(int i = 0; i < numThreads; i++)
{
::new (&threadedProfilers[i]) ProfilerNode(TEXT("Convert444Threads"), true);
threadedProfilers[i].MonitorThread(h420Threads[i]);
}
}
#ifdef USE_100NS_TIME
QWORD qwTime = renderStartTime/10000;
@ -819,8 +817,10 @@ void OBS::MainCaptureLoop()
convertInfo[i].pitch = map.RowPitch;
if(bUsingQSV)
{
convertInfo[i].output[0] = nextPicOut.mfxOut->Data.Y;
convertInfo[i].output[1] = nextPicOut.mfxOut->Data.UV;
mfxFrameData& data = nextPicOut.mfxOut->Data;
videoEncoder->RequestBuffers(&data);
convertInfo[i].output[0] = data.Y;
convertInfo[i].output[1] = data.UV;
}
else
{
@ -838,7 +838,12 @@ void OBS::MainCaptureLoop()
{
outTimes[curOutBuffer] = (DWORD)curStreamTime;
if(bUsingQSV)
Convert444toNV12((LPBYTE)map.pData, outputCX, map.RowPitch, outputCY, 0, outputCY, &picOut.mfxOut->Data.Y);
{
mfxFrameData& data = picOut.mfxOut->Data;
videoEncoder->RequestBuffers(&data);
LPBYTE output[] = {data.Y, data.UV};
Convert444toNV12((LPBYTE)map.pData, outputCX, map.RowPitch, outputCY, 0, outputCY, output);
}
else
Convert444to420((LPBYTE)map.pData, outputCX, map.RowPitch, outputCY, 0, outputCY, picOut.picOut->img.plane);
prevTexture->Unmap(0);
@ -985,6 +990,13 @@ void OBS::MainCaptureLoop()
}
profileOut;
if(bUseThreaded420)
{
for(int i = 0; i < numThreads; i++)
{
threadedProfilers[i].~ProfilerNode();
}
}
profileOut;
//------------------------------------
@ -1057,7 +1069,10 @@ void OBS::MainCaptureLoop()
delete outPics[i].mfxOut;
else
for(int i=0; i<NUM_OUT_BUFFERS; i++)
{
x264_picture_clean(outPics[i].picOut);
delete outPics[i].picOut;
}
}
Free(h420Threads);