Merge remote-tracking branch 'origin/master'
commit
afade3d007
|
@ -33,6 +33,7 @@ ipch/
|
|||
*.zip
|
||||
*.lnk
|
||||
*.chm
|
||||
*~
|
||||
|
||||
!OBSHelp/*.css
|
||||
!OBSHelp/*.htm
|
||||
|
|
|
@ -51,13 +51,15 @@ struct BASE_EXPORT ProfileNodeInfo
|
|||
|
||||
DWORD numCalls;
|
||||
DWORD avgTimeElapsed;
|
||||
DWORD avgCpuTime;
|
||||
double avgPercentage;
|
||||
double childPercentage;
|
||||
double unaccountedPercentage;
|
||||
|
||||
bool bSingular;
|
||||
|
||||
QWORD totalTimeElapsed;
|
||||
QWORD totalTimeElapsed,
|
||||
cpuTimeElapsed;
|
||||
|
||||
ProfileNodeInfo *parent;
|
||||
List<ProfileNodeInfo*> Children;
|
||||
|
@ -65,6 +67,8 @@ struct BASE_EXPORT ProfileNodeInfo
|
|||
void calculateProfileData(int rootCallCount)
|
||||
{
|
||||
avgTimeElapsed = (DWORD)(totalTimeElapsed/(QWORD)rootCallCount);
|
||||
avgCpuTime = (DWORD)(cpuTimeElapsed/(QWORD)rootCallCount);
|
||||
|
||||
|
||||
if(parent) avgPercentage = (double(avgTimeElapsed)/double(parent->avgTimeElapsed))*parent->avgPercentage;
|
||||
else avgPercentage = 100.0f;
|
||||
|
@ -100,13 +104,15 @@ struct BASE_EXPORT ProfileNodeInfo
|
|||
int perFrameCalls = numCalls/rootCallCount;
|
||||
|
||||
float fTimeTaken = (float)MicroToMS(avgTimeElapsed);
|
||||
float cpuTime = (float)MicroToMS(avgCpuTime);
|
||||
float totalCpuTime = (float)cpuTimeElapsed*0.001;
|
||||
|
||||
if(avgPercentage >= minPercentage && fTimeTaken >= minTime)
|
||||
{
|
||||
if(Children.Num())
|
||||
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms] [avg calls per frame: %d] [children: %.3g%%] [unaccounted: %.3g%%]"), lpIndent, lpName, avgPercentage, fTimeTaken, perFrameCalls, childPercentage, unaccountedPercentage);
|
||||
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms (cpu time: avg %g ms, total %g ms)] [avg calls per frame: %d] [children: %.3g%%] [unaccounted: %.3g%%]"), lpIndent, lpName, avgPercentage, fTimeTaken, cpuTime, totalCpuTime, perFrameCalls, childPercentage, unaccountedPercentage);
|
||||
else
|
||||
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms] [avg calls per frame: %d]"), lpIndent, lpName, avgPercentage, fTimeTaken, perFrameCalls);
|
||||
Log(TEXT("%s%s - [%.3g%%] [avg time: %g ms (cpu time: avg %g ms, total %g ms)] [avg calls per frame: %d]"), lpIndent, lpName, avgPercentage, fTimeTaken, cpuTime, totalCpuTime, perFrameCalls);
|
||||
}
|
||||
|
||||
for(unsigned int i=0; i<Children.Num(); i++)
|
||||
|
@ -226,6 +232,8 @@ ProfilerNode::ProfilerNode(CTSTR lpName, bool bSingularize)
|
|||
this->lpName = lpName;
|
||||
|
||||
startTime = OSGetTimeMicroseconds();
|
||||
|
||||
MonitorThread(OSGetCurrentThread());
|
||||
}
|
||||
|
||||
ProfilerNode::~ProfilerNode()
|
||||
|
@ -237,8 +245,18 @@ ProfilerNode::~ProfilerNode()
|
|||
{
|
||||
DWORD curTime = (DWORD)(newTime-startTime);
|
||||
info->totalTimeElapsed += curTime;
|
||||
if(thread)
|
||||
info->cpuTimeElapsed += (DWORD)(OSGetThreadTime(thread) - cpuStartTime);
|
||||
}
|
||||
|
||||
if(!bSingularNode)
|
||||
__curProfilerNode = parent;
|
||||
}
|
||||
|
||||
void ProfilerNode::MonitorThread(HANDLE thread_)
|
||||
{
|
||||
if(!thread_)
|
||||
return;
|
||||
thread = thread_;
|
||||
cpuStartTime = OSGetThreadTime(thread);
|
||||
}
|
|
@ -26,7 +26,9 @@ struct ProfileNodeInfo;
|
|||
class BASE_EXPORT ProfilerNode
|
||||
{
|
||||
CTSTR lpName;
|
||||
QWORD startTime;
|
||||
QWORD startTime,
|
||||
cpuStartTime;
|
||||
HANDLE thread;
|
||||
ProfilerNode *parent;
|
||||
bool bSingularNode;
|
||||
ProfileNodeInfo *info;
|
||||
|
@ -34,6 +36,7 @@ class BASE_EXPORT ProfilerNode
|
|||
public:
|
||||
ProfilerNode(CTSTR name, bool bSingularize=false);
|
||||
~ProfilerNode();
|
||||
void MonitorThread(HANDLE thread);
|
||||
};
|
||||
|
||||
BASE_EXPORT extern ProfilerNode *__curProfilerNode;
|
||||
|
|
|
@ -138,6 +138,7 @@ BASE_EXPORT int STDCALL OSGetVersion();
|
|||
BASE_EXPORT int STDCALL OSGetTotalCores();
|
||||
BASE_EXPORT int STDCALL OSGetLogicalCores();
|
||||
BASE_EXPORT HANDLE STDCALL OSCreateThread(XTHREAD lpThreadFunc, LPVOID param);
|
||||
BASE_EXPORT HANDLE STDCALL OSGetCurrentThread();
|
||||
BASE_EXPORT BOOL STDCALL OSWaitForThread(HANDLE hThread, LPDWORD ret);
|
||||
BASE_EXPORT BOOL STDCALL OSCloseThread(HANDLE hThread);
|
||||
BASE_EXPORT BOOL STDCALL OSTerminateThread(HANDLE hThread, DWORD waitMS=100);
|
||||
|
@ -152,6 +153,7 @@ BASE_EXPORT void STDCALL OSSetMainAppWindow(HANDLE window);
|
|||
|
||||
BASE_EXPORT DWORD STDCALL OSGetTime();
|
||||
BASE_EXPORT QWORD STDCALL OSGetTimeMicroseconds();
|
||||
BASE_EXPORT QWORD STDCALL OSGetThreadTime(HANDLE hThread);
|
||||
|
||||
BASE_EXPORT void __cdecl OSMessageBoxva(const TCHAR *format, va_list argptr);
|
||||
BASE_EXPORT void __cdecl OSMessageBox(const TCHAR *format, ...);
|
||||
|
|
|
@ -655,6 +655,16 @@ double STDCALL OSGetTimeDoubleMS()
|
|||
return double(elapsedTime) * 1000.0 / double(clockFreq.QuadPart);
|
||||
}
|
||||
|
||||
QWORD STDCALL OSGetThreadTime(HANDLE thread)
|
||||
{
|
||||
#define TO_QWORD(t) (((QWORD)t.dwHighDateTime)<<32 | (QWORD)t.dwLowDateTime)
|
||||
FILETIME user, create, exit, kernel;
|
||||
if(!GetThreadTimes(thread, &create, &exit, &kernel, &user))
|
||||
return -1;
|
||||
return (TO_QWORD(user) + TO_QWORD(kernel))/10;
|
||||
#undef TO_QWORD
|
||||
}
|
||||
|
||||
|
||||
UINT STDCALL OSGetProcessorCount()
|
||||
{
|
||||
|
@ -667,6 +677,11 @@ HANDLE STDCALL OSCreateThread(XTHREAD lpThreadFunc, LPVOID param)
|
|||
return CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)lpThreadFunc, param, 0, &dummy);
|
||||
}
|
||||
|
||||
HANDLE STDCALL OSGetCurrentThread()
|
||||
{
|
||||
return GetCurrentThread();
|
||||
}
|
||||
|
||||
BOOL STDCALL OSWaitForThread(HANDLE hThread, LPDWORD ret)
|
||||
{
|
||||
BOOL bRet = (WaitForSingleObjectEx(hThread, INFINITE, 0) == WAIT_OBJECT_0);
|
||||
|
|
|
@ -71,6 +71,7 @@ namespace
|
|||
{
|
||||
pnFrameRateExtN = fr;
|
||||
pnFrameRateExtD = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
fr = (mfxU32)(dFrameRate * 1.001 + .5);
|
||||
|
@ -79,6 +80,7 @@ namespace
|
|||
{
|
||||
pnFrameRateExtN = fr * 1000;
|
||||
pnFrameRateExtD = 1001;
|
||||
return;
|
||||
}
|
||||
|
||||
pnFrameRateExtN = (mfxU32)(dFrameRate * 10000 + .5);
|
||||
|
@ -119,11 +121,19 @@ class QSVEncoder : public VideoEncoder
|
|||
std::unique_ptr<MFXVideoENCODE> enc;
|
||||
|
||||
mfxEncodeCtrl ctrl;
|
||||
|
||||
mfxFrameSurface1 enc_surf;
|
||||
|
||||
|
||||
List<mfxU8> surf_buff;
|
||||
List<mfxU8> bs_buff;
|
||||
mfxBitstream bs;
|
||||
struct encode_task
|
||||
{
|
||||
mfxFrameSurface1 surf;
|
||||
mfxBitstream bs;
|
||||
mfxSyncPoint sp;
|
||||
bool keyframe;
|
||||
};
|
||||
List<encode_task> encode_tasks;
|
||||
|
||||
unsigned oldest, insert, encode, in_use;
|
||||
|
||||
int fps;
|
||||
|
||||
|
@ -134,6 +144,7 @@ class QSVEncoder : public VideoEncoder
|
|||
bool bFirstFrameProcessed;
|
||||
|
||||
bool bUseCBR, bUseCFR, bDupeFrames;
|
||||
unsigned deferredFrames;
|
||||
|
||||
List<VideoPacket> CurrentPackets;
|
||||
List<BYTE> HeaderPacket, SEIData;
|
||||
|
@ -167,6 +178,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
session.SetPriority(MFX_PRIORITY_HIGH);
|
||||
|
||||
fps = fps_;
|
||||
|
||||
bUseCBR = AppConfig->GetInt(TEXT("Video Encoding"), TEXT("UseCBR")) != 0;
|
||||
|
@ -174,14 +187,20 @@ public:
|
|||
bDupeFrames = bDupeFrames_;
|
||||
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.AsyncDepth = 1;
|
||||
//params.AsyncDepth = 0;
|
||||
params.mfx.CodecId = MFX_CODEC_AVC;
|
||||
params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY;
|
||||
params.mfx.TargetKbps = maxBitrate;
|
||||
params.mfx.TargetUsage = MFX_TARGETUSAGE_BEST_QUALITY;//SPEED;
|
||||
params.mfx.TargetKbps = (mfxU16)(maxBitrate*0.9);
|
||||
params.mfx.MaxKbps = maxBitrate;
|
||||
params.mfx.InitialDelayInKB = 1;
|
||||
//params.mfx.InitialDelayInKB = 1;
|
||||
//params.mfx.GopRefDist = 1;
|
||||
//params.mfx.NumRefFrame = 0;
|
||||
params.mfx.GopPicSize = 61;
|
||||
params.mfx.GopRefDist = 3;
|
||||
params.mfx.GopOptFlag = 2;
|
||||
params.mfx.IdrInterval = 2;
|
||||
params.mfx.NumSlice = 1;
|
||||
|
||||
params.mfx.RateControlMethod = bUseCBR ? MFX_RATECONTROL_CBR : MFX_RATECONTROL_VBR;
|
||||
params.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY;
|
||||
|
||||
|
@ -206,22 +225,53 @@ public:
|
|||
enc.reset(new MFXVideoENCODE(session));
|
||||
enc->Close();
|
||||
|
||||
auto result = enc->Init(¶ms);
|
||||
mfxFrameAllocRequest req;
|
||||
memset(&req, 0, sizeof(req));
|
||||
enc->QueryIOSurf(¶ms, &req);
|
||||
|
||||
memset(&enc_surf, 0, sizeof(enc_surf));
|
||||
memcpy(&enc_surf.Info, ¶ms.mfx.FrameInfo, sizeof(enc_surf.Info));
|
||||
enc->Init(¶ms);
|
||||
|
||||
decltype(params) query;
|
||||
memcpy(&query, ¶ms, sizeof(params));
|
||||
enc->GetVideoParam(&query);
|
||||
|
||||
unsigned size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8);
|
||||
bs_buff.SetSize(size+31);//.resize(size+31);
|
||||
bs.Data = (mfxU8*)(((size_t)bs_buff.Array() + 31) / 32 * 32);
|
||||
bs.MaxLength = size;
|
||||
params.mfx.BufferSizeInKB = size/1000;
|
||||
unsigned num_surf = max(6, req.NumFrameSuggested+1);
|
||||
|
||||
encode_tasks.SetSize(num_surf);
|
||||
|
||||
const unsigned bs_size = max(query.mfx.BufferSizeInKB*1000, bufferSize*1024/8);
|
||||
bs_buff.SetSize(bs_size * num_surf + 31);
|
||||
params.mfx.BufferSizeInKB = bs_size/1000;
|
||||
|
||||
const unsigned surf_size = width*height*2;
|
||||
surf_buff.SetSize(surf_size * num_surf + 15);
|
||||
|
||||
mfxU8* bs_start = (mfxU8*)(((size_t)bs_buff.Array() + 31)/32*32);
|
||||
mfxU8* surf_start = (mfxU8*)(((size_t)surf_buff.Array() + 15)/16*16);
|
||||
for(unsigned i = 0; i < encode_tasks.Num(); i++)
|
||||
{
|
||||
encode_tasks[i].sp = nullptr;
|
||||
|
||||
mfxFrameSurface1& surf = encode_tasks[i].surf;
|
||||
memset(&surf, 0, sizeof(mfxFrameSurface1));
|
||||
memcpy(&surf.Info, ¶ms.mfx.FrameInfo, sizeof(params.mfx.FrameInfo));
|
||||
surf.Data.Y = surf_start + i*surf_size;
|
||||
surf.Data.UV = surf_start + i*surf_size + width*height;
|
||||
|
||||
mfxBitstream& bs = encode_tasks[i].bs;
|
||||
memset(&bs, 0, sizeof(mfxBitstream));
|
||||
bs.Data = bs_start + i*bs_size;
|
||||
bs.MaxLength = bs_size;
|
||||
}
|
||||
|
||||
oldest = 0;
|
||||
insert = 0;
|
||||
encode = 0;
|
||||
in_use = 0;
|
||||
|
||||
Log(TEXT("Using %u encode tasks"), encode_tasks.Num());
|
||||
Log(TEXT("Buffer size: %u configured, %u suggested by QSV; using %u"),
|
||||
bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, size*8/1024);
|
||||
bufferSize, query.mfx.BufferSizeInKB*1000*8/1024, params.mfx.BufferSizeInKB*1000*8/1024);
|
||||
|
||||
Log(TEXT("------------------------------------------"));
|
||||
Log(TEXT("%s"), GetInfoString().Array());
|
||||
|
@ -230,6 +280,8 @@ public:
|
|||
memset(&ctrl, 0, sizeof(ctrl));
|
||||
ctrl.FrameType = MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | MFX_FRAMETYPE_IDR;
|
||||
|
||||
deferredFrames = 0;
|
||||
|
||||
DataPacket packet;
|
||||
GetHeaders(packet);
|
||||
}
|
||||
|
@ -239,197 +291,264 @@ public:
|
|||
ClearPackets();
|
||||
}
|
||||
|
||||
bool Encode(LPVOID picInPtr, List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD outputTimestamp, int &ctsOffset)
|
||||
virtual void RequestBuffers(LPVOID buffers)
|
||||
{
|
||||
bs.DataLength = 0;
|
||||
bs.DataOffset = 0;
|
||||
mfxFrameSurface1& pic = *(mfxFrameSurface1*)picInPtr;
|
||||
enc_surf.Data.Y = pic.Data.Y;
|
||||
enc_surf.Data.UV = pic.Data.UV;
|
||||
enc_surf.Data.Pitch = pic.Data.Pitch;
|
||||
enc_surf.Data.TimeStamp = pic.Data.TimeStamp*90;
|
||||
mfxSyncPoint sp = nullptr;
|
||||
auto sts = enc->EncodeFrameAsync(bRequestKeyframe ? &ctrl : nullptr, &enc_surf, &bs, &sp);
|
||||
if(!buffers)
|
||||
return;
|
||||
mfxFrameData& data = encode_tasks[insert].surf.Data;
|
||||
mfxFrameData& buff = *(mfxFrameData*)buffers;
|
||||
buff.Y = data.Y;
|
||||
buff.UV = data.UV;
|
||||
}
|
||||
|
||||
sts = MFXVideoCORE_SyncOperation(session, sp, INFINITE);
|
||||
|
||||
List<x264_nal_t> nalOut;
|
||||
mfxU8 *start = bs.Data + bs.DataOffset,
|
||||
*end = bs.Data + bs.DataOffset + bs.DataLength;
|
||||
static mfxU8 start_seq[] = {0, 0, 1};
|
||||
start = std::search(start, end, start_seq, start_seq+3);
|
||||
while(start != end)
|
||||
void ProcessEncodedFrame(List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD outputTimestamp, int &ctsOffset, mfxU32 wait=0)
|
||||
{
|
||||
if(!in_use)
|
||||
return;
|
||||
unsigned pending_tasks = in_use;
|
||||
while(in_use)
|
||||
{
|
||||
decltype(start) next = std::search(start+1, end, start_seq, start_seq+3);
|
||||
x264_nal_t nal;
|
||||
nal.i_ref_idc = start[3]>>5;
|
||||
nal.i_type = start[3]&0x1f;
|
||||
if(nal.i_type == NAL_SLICE_IDR)
|
||||
nal.i_ref_idc = NAL_PRIORITY_HIGHEST;
|
||||
nal.p_payload = start;
|
||||
nal.i_payload = int(next-start);
|
||||
nalOut << nal;
|
||||
start = next;
|
||||
}
|
||||
size_t nalNum = nalOut.Num();
|
||||
encode_task& task = encode_tasks[oldest];
|
||||
auto& sp = task.sp;
|
||||
mfxStatus sts;
|
||||
if((sts = MFXVideoCORE_SyncOperation(session, sp, wait)) != MFX_ERR_NONE)
|
||||
return;
|
||||
sp = nullptr;
|
||||
in_use -= 1;
|
||||
|
||||
packets.Clear();
|
||||
ClearPackets();
|
||||
oldest = (oldest+1)%encode_tasks.Num();
|
||||
|
||||
if(bRequestKeyframe)
|
||||
bRequestKeyframe = false;
|
||||
mfxBitstream& bs = task.bs;
|
||||
|
||||
if(!bFirstFrameProcessed && nalNum)
|
||||
{
|
||||
//delayOffset = -picOut.i_dts;
|
||||
bFirstFrameProcessed = true;
|
||||
}
|
||||
|
||||
INT64 ts = INT64(outputTimestamp);
|
||||
int timeOffset = 0;//int((picOut.i_pts+delayOffset)-ts);
|
||||
|
||||
if(bDupeFrames)
|
||||
{
|
||||
//if frame duplication is being used, the shift will be insignificant, so just don't bother adjusting audio
|
||||
timeOffset += frameShift;
|
||||
|
||||
if(nalNum && timeOffset < 0)
|
||||
List<x264_nal_t> nalOut;
|
||||
mfxU8 *start = bs.Data + bs.DataOffset,
|
||||
*end = bs.Data + bs.DataOffset + bs.DataLength;
|
||||
static mfxU8 start_seq[] = {0, 0, 1};
|
||||
start = std::search(start, end, start_seq, start_seq+3);
|
||||
while(start != end)
|
||||
{
|
||||
frameShift -= timeOffset;
|
||||
timeOffset = 0;
|
||||
decltype(start) next = std::search(start+1, end, start_seq, start_seq+3);
|
||||
x264_nal_t nal;
|
||||
nal.i_ref_idc = start[3]>>5;
|
||||
nal.i_type = start[3]&0x1f;
|
||||
if(nal.i_type == NAL_SLICE_IDR)
|
||||
nal.i_ref_idc = NAL_PRIORITY_HIGHEST;
|
||||
nal.p_payload = start;
|
||||
nal.i_payload = int(next-start);
|
||||
nalOut << nal;
|
||||
start = next;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
timeOffset += ctsOffset;
|
||||
size_t nalNum = nalOut.Num();
|
||||
|
||||
//dynamically adjust the CTS for the stream if it gets lower than the current value
|
||||
//(thanks to cyrus for suggesting to do this instead of a single shift)
|
||||
if(nalNum && timeOffset < 0)
|
||||
packets.Clear();
|
||||
ClearPackets();
|
||||
|
||||
|
||||
if(!bFirstFrameProcessed && nalNum)
|
||||
{
|
||||
ctsOffset -= timeOffset;
|
||||
timeOffset = 0;
|
||||
delayOffset = 0;//bs.TimeStamp/90;
|
||||
bFirstFrameProcessed = true;
|
||||
}
|
||||
}
|
||||
|
||||
timeOffset = htonl(timeOffset);
|
||||
INT64 ts = INT64(outputTimestamp);
|
||||
int timeOffset = int(bs.TimeStamp/90+delayOffset-ts);//int((picOut.i_pts+delayOffset)-ts);
|
||||
|
||||
BYTE *timeOffsetAddr = ((BYTE*)&timeOffset)+1;
|
||||
|
||||
VideoPacket *newPacket = NULL;
|
||||
|
||||
PacketType bestType = PacketType_VideoDisposable;
|
||||
bool bFoundFrame = false;
|
||||
|
||||
for(int i=0; i<nalNum; i++)
|
||||
{
|
||||
x264_nal_t &nal = nalOut[i];
|
||||
|
||||
if(nal.i_type == NAL_SEI)
|
||||
if(bDupeFrames)
|
||||
{
|
||||
//if frame duplication is being used, the shift will be insignificant, so just don't bother adjusting audio
|
||||
timeOffset += frameShift;
|
||||
|
||||
if(nalNum && timeOffset < 0)
|
||||
{
|
||||
frameShift -= timeOffset;
|
||||
timeOffset = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
timeOffset += ctsOffset;
|
||||
|
||||
//dynamically adjust the CTS for the stream if it gets lower than the current value
|
||||
//(thanks to cyrus for suggesting to do this instead of a single shift)
|
||||
if(nalNum && timeOffset < 0)
|
||||
{
|
||||
ctsOffset -= timeOffset;
|
||||
timeOffset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
timeOffset = htonl(timeOffset);
|
||||
|
||||
BYTE *timeOffsetAddr = ((BYTE*)&timeOffset)+1;
|
||||
|
||||
VideoPacket *newPacket = NULL;
|
||||
|
||||
PacketType bestType = PacketType_VideoDisposable;
|
||||
bool bFoundFrame = false;
|
||||
|
||||
for(int i=0; i<nalNum; i++)
|
||||
{
|
||||
x264_nal_t &nal = nalOut[i];
|
||||
|
||||
if(nal.i_type == NAL_SEI)
|
||||
{
|
||||
BYTE *skip = nal.p_payload;
|
||||
while(*(skip++) != 0x1);
|
||||
int skipBytes = (int)(skip-nal.p_payload);
|
||||
|
||||
int newPayloadSize = (nal.i_payload-skipBytes);
|
||||
|
||||
if (nal.p_payload[skipBytes+1] == 0x5) {
|
||||
SEIData.Clear();
|
||||
BufferOutputSerializer packetOut(SEIData);
|
||||
|
||||
packetOut.OutputDword(htonl(newPayloadSize));
|
||||
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
|
||||
} else {
|
||||
if (!newPacket)
|
||||
newPacket = CurrentPackets.CreateNew();
|
||||
|
||||
BufferOutputSerializer packetOut(newPacket->Packet);
|
||||
|
||||
packetOut.OutputDword(htonl(newPayloadSize));
|
||||
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
|
||||
}
|
||||
}
|
||||
/*else if(nal.i_type == NAL_FILLER) //QSV does not produce NAL_FILLER
|
||||
{
|
||||
BYTE *skip = nal.p_payload;
|
||||
while(*(skip++) != 0x1);
|
||||
int skipBytes = (int)(skip-nal.p_payload);
|
||||
|
||||
int newPayloadSize = (nal.i_payload-skipBytes);
|
||||
|
||||
if (nal.p_payload[skipBytes+1] == 0x5) {
|
||||
SEIData.Clear();
|
||||
BufferOutputSerializer packetOut(SEIData);
|
||||
|
||||
packetOut.OutputDword(htonl(newPayloadSize));
|
||||
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
|
||||
} else {
|
||||
if (!newPacket)
|
||||
newPacket = CurrentPackets.CreateNew();
|
||||
|
||||
BufferOutputSerializer packetOut(newPacket->Packet);
|
||||
|
||||
packetOut.OutputDword(htonl(newPayloadSize));
|
||||
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
|
||||
}
|
||||
}
|
||||
/*else if(nal.i_type == NAL_FILLER) //QSV does not produce NAL_FILLER
|
||||
{
|
||||
BYTE *skip = nal.p_payload;
|
||||
while(*(skip++) != 0x1);
|
||||
int skipBytes = (int)(skip-nal.p_payload);
|
||||
|
||||
int newPayloadSize = (nal.i_payload-skipBytes);
|
||||
|
||||
if (!newPacket)
|
||||
newPacket = CurrentPackets.CreateNew();
|
||||
|
||||
BufferOutputSerializer packetOut(newPacket->Packet);
|
||||
|
||||
packetOut.OutputDword(htonl(newPayloadSize));
|
||||
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
|
||||
}*/
|
||||
else if(nal.i_type == NAL_SLICE_IDR || nal.i_type == NAL_SLICE)
|
||||
{
|
||||
BYTE *skip = nal.p_payload;
|
||||
while(*(skip++) != 0x1);
|
||||
int skipBytes = (int)(skip-nal.p_payload);
|
||||
|
||||
if (!newPacket)
|
||||
newPacket = CurrentPackets.CreateNew();
|
||||
newPacket = CurrentPackets.CreateNew();
|
||||
|
||||
if (!bFoundFrame)
|
||||
{
|
||||
newPacket->Packet.Insert(0, (nal.i_type == NAL_SLICE_IDR) ? 0x17 : 0x27);
|
||||
newPacket->Packet.Insert(1, 1);
|
||||
newPacket->Packet.InsertArray(2, timeOffsetAddr, 3);
|
||||
|
||||
bFoundFrame = true;
|
||||
}
|
||||
|
||||
int newPayloadSize = (nal.i_payload-skipBytes);
|
||||
BufferOutputSerializer packetOut(newPacket->Packet);
|
||||
|
||||
packetOut.OutputDword(htonl(newPayloadSize));
|
||||
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
|
||||
|
||||
switch(nal.i_ref_idc)
|
||||
}*/
|
||||
else if(nal.i_type == NAL_SLICE_IDR || nal.i_type == NAL_SLICE)
|
||||
{
|
||||
case NAL_PRIORITY_DISPOSABLE: bestType = MAX(bestType, PacketType_VideoDisposable); break;
|
||||
case NAL_PRIORITY_LOW: bestType = MAX(bestType, PacketType_VideoLow); break;
|
||||
case NAL_PRIORITY_HIGH: bestType = MAX(bestType, PacketType_VideoHigh); break;
|
||||
case NAL_PRIORITY_HIGHEST: bestType = MAX(bestType, PacketType_VideoHighest); break;
|
||||
BYTE *skip = nal.p_payload;
|
||||
while(*(skip++) != 0x1);
|
||||
int skipBytes = (int)(skip-nal.p_payload);
|
||||
|
||||
if (!newPacket)
|
||||
newPacket = CurrentPackets.CreateNew();
|
||||
|
||||
if (!bFoundFrame)
|
||||
{
|
||||
newPacket->Packet.Insert(0, (nal.i_type == NAL_SLICE_IDR) ? 0x17 : 0x27);
|
||||
newPacket->Packet.Insert(1, 1);
|
||||
newPacket->Packet.InsertArray(2, timeOffsetAddr, 3);
|
||||
|
||||
bFoundFrame = true;
|
||||
}
|
||||
|
||||
int newPayloadSize = (nal.i_payload-skipBytes);
|
||||
BufferOutputSerializer packetOut(newPacket->Packet);
|
||||
|
||||
packetOut.OutputDword(htonl(newPayloadSize));
|
||||
packetOut.Serialize(nal.p_payload+skipBytes, newPayloadSize);
|
||||
|
||||
switch(nal.i_ref_idc)
|
||||
{
|
||||
case NAL_PRIORITY_DISPOSABLE: bestType = MAX(bestType, PacketType_VideoDisposable); break;
|
||||
case NAL_PRIORITY_LOW: bestType = MAX(bestType, PacketType_VideoLow); break;
|
||||
case NAL_PRIORITY_HIGH: bestType = MAX(bestType, PacketType_VideoHigh); break;
|
||||
case NAL_PRIORITY_HIGHEST: bestType = MAX(bestType, PacketType_VideoHighest); break;
|
||||
}
|
||||
}
|
||||
/*else if(nal.i_type == NAL_SPS)
|
||||
{
|
||||
VideoPacket *newPacket = CurrentPackets.CreateNew();
|
||||
BufferOutputSerializer headerOut(newPacket->Packet);
|
||||
|
||||
headerOut.OutputByte(0x17);
|
||||
headerOut.OutputByte(0);
|
||||
headerOut.Serialize(timeOffsetAddr, 3);
|
||||
headerOut.OutputByte(1);
|
||||
headerOut.Serialize(nal.p_payload+5, 3);
|
||||
headerOut.OutputByte(0xff);
|
||||
headerOut.OutputByte(0xe1);
|
||||
headerOut.OutputWord(htons(nal.i_payload-4));
|
||||
headerOut.Serialize(nal.p_payload+4, nal.i_payload-4);
|
||||
|
||||
x264_nal_t &pps = nalOut[i+1]; //the PPS always comes after the SPS
|
||||
|
||||
headerOut.OutputByte(1);
|
||||
headerOut.OutputWord(htons(pps.i_payload-4));
|
||||
headerOut.Serialize(pps.p_payload+4, pps.i_payload-4);
|
||||
}*/
|
||||
else
|
||||
continue;
|
||||
}
|
||||
/*else if(nal.i_type == NAL_SPS)
|
||||
|
||||
packetTypes << bestType;
|
||||
|
||||
packets.SetSize(CurrentPackets.Num());
|
||||
for(UINT i=0; i<packets.Num(); i++)
|
||||
{
|
||||
VideoPacket *newPacket = CurrentPackets.CreateNew();
|
||||
BufferOutputSerializer headerOut(newPacket->Packet);
|
||||
|
||||
headerOut.OutputByte(0x17);
|
||||
headerOut.OutputByte(0);
|
||||
headerOut.Serialize(timeOffsetAddr, 3);
|
||||
headerOut.OutputByte(1);
|
||||
headerOut.Serialize(nal.p_payload+5, 3);
|
||||
headerOut.OutputByte(0xff);
|
||||
headerOut.OutputByte(0xe1);
|
||||
headerOut.OutputWord(htons(nal.i_payload-4));
|
||||
headerOut.Serialize(nal.p_payload+4, nal.i_payload-4);
|
||||
|
||||
x264_nal_t &pps = nalOut[i+1]; //the PPS always comes after the SPS
|
||||
|
||||
headerOut.OutputByte(1);
|
||||
headerOut.OutputWord(htons(pps.i_payload-4));
|
||||
headerOut.Serialize(pps.p_payload+4, pps.i_payload-4);
|
||||
}*/
|
||||
else
|
||||
continue;
|
||||
packets[i].lpPacket = CurrentPackets[i].Packet.Array();
|
||||
packets[i].size = CurrentPackets[i].Packet.Num();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
packetTypes << bestType;
|
||||
|
||||
packets.SetSize(CurrentPackets.Num());
|
||||
for(UINT i=0; i<packets.Num(); i++)
|
||||
bool Encode(LPVOID picInPtr, List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD outputTimestamp, int &ctsOffset)
|
||||
{
|
||||
profileIn("ProcessEncodedFrame");
|
||||
ProcessEncodedFrame(packets, packetTypes, outputTimestamp, ctsOffset);
|
||||
if(in_use == encode_tasks.Num())
|
||||
{
|
||||
packets[i].lpPacket = CurrentPackets[i].Packet.Array();
|
||||
packets[i].size = CurrentPackets[i].Packet.Num();
|
||||
Log(TEXT("Error: all encode tasks in use, stalling pipeline"));
|
||||
ProcessEncodedFrame(packets, packetTypes, outputTimestamp, ctsOffset, INFINITE);
|
||||
}
|
||||
profileOut;
|
||||
encode_task& task = encode_tasks[insert];
|
||||
insert = (insert+1)%encode_tasks.Num();
|
||||
in_use += 1;
|
||||
task.keyframe = bRequestKeyframe;
|
||||
bRequestKeyframe = false;
|
||||
mfxBitstream& bs = task.bs;
|
||||
mfxFrameSurface1& surf = task.surf;
|
||||
mfxFrameSurface1& pic = *(mfxFrameSurface1*)picInPtr;
|
||||
profileIn("setup new frame");
|
||||
bs.DataLength = 0;
|
||||
bs.DataOffset = 0;
|
||||
surf.Data.Pitch = pic.Data.Pitch;
|
||||
surf.Data.TimeStamp = pic.Data.TimeStamp*90;
|
||||
profileOut;
|
||||
mfxSyncPoint& sp = task.sp;
|
||||
mfxStatus sts;
|
||||
profileIn("EncodeFrameAsync");
|
||||
unsigned limit = encode < insert ? insert : insert + encode_tasks.Num();
|
||||
for(unsigned i = encode;i < limit; i++)
|
||||
{
|
||||
encode_task& task = encode_tasks[encode];
|
||||
mfxBitstream& bs = task.bs;
|
||||
mfxFrameSurface1& surf = task.surf;
|
||||
mfxSyncPoint& sp = task.sp;
|
||||
for(;;)
|
||||
{
|
||||
sts = enc->EncodeFrameAsync(task.keyframe ? &ctrl : nullptr, &surf, &bs, &sp);
|
||||
|
||||
if(sts == MFX_ERR_NONE || sp)
|
||||
break;
|
||||
if(sts == MFX_WRN_DEVICE_BUSY)
|
||||
{
|
||||
deferredFrames += 1;
|
||||
return false;
|
||||
}
|
||||
//if(!sp); //sts == MFX_ERR_MORE_DATA usually; retry the call (see MSDK examples)
|
||||
//Log(TEXT("returned status %i, %u"), sts, insert);
|
||||
}
|
||||
encode = (encode+1)%encode_tasks.Num();
|
||||
}
|
||||
profileOut;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -533,5 +652,4 @@ VideoEncoder* CreateQSVEncoder(int fps, int width, int height, int quality, CTST
|
|||
if(CheckQSVHardwareSupport())
|
||||
return new QSVEncoder(fps, width, height, quality, preset, bUse444, maxBitRate, bufferSize, bUseCFR, bDupeFrames);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
}
|
|
@ -159,6 +159,8 @@ class VideoEncoder
|
|||
protected:
|
||||
virtual bool Encode(LPVOID picIn, List<DataPacket> &packets, List<PacketType> &packetTypes, DWORD timestamp, int &ctsOffset)=0;
|
||||
|
||||
virtual void RequestBuffers(LPVOID buffers) {}
|
||||
|
||||
public:
|
||||
virtual ~VideoEncoder() {}
|
||||
|
||||
|
|
|
@ -290,7 +290,6 @@ void OBS::MainCaptureLoop()
|
|||
EncoderPicture lastPic;
|
||||
EncoderPicture outPics[NUM_OUT_BUFFERS];
|
||||
DWORD outTimes[NUM_OUT_BUFFERS] = {0, 0, 0};
|
||||
List<mfxU8> qsvBuffer;
|
||||
|
||||
for(int i=0; i<NUM_OUT_BUFFERS; i++)
|
||||
{
|
||||
|
@ -317,19 +316,7 @@ void OBS::MainCaptureLoop()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(bUsingQSV)
|
||||
{
|
||||
size_t perBuffer = 2 * outputCX * outputCY;
|
||||
qsvBuffer.SetSize(unsigned(NUM_OUT_BUFFERS * perBuffer + 15));
|
||||
mfxU8 *aligned = (mfxU8*)(((size_t)qsvBuffer.Array()+15)/16*16);
|
||||
for(int i=0; i < NUM_OUT_BUFFERS; i++)
|
||||
{
|
||||
outPics[i].mfxOut->Data.Y = aligned;
|
||||
outPics[i].mfxOut->Data.UV = aligned + outputCX * outputCY;
|
||||
aligned += perBuffer;
|
||||
}
|
||||
}
|
||||
else
|
||||
if(!bUsingQSV)
|
||||
for(int i=0; i<NUM_OUT_BUFFERS; i++)
|
||||
x264_picture_alloc(outPics[i].picOut, X264_CSP_I420, outputCX, outputCY);
|
||||
}
|
||||
|
@ -474,6 +461,17 @@ void OBS::MainCaptureLoop()
|
|||
bool bRenderView = !IsIconic(hwndMain) && bRenderViewEnabled;
|
||||
|
||||
profileIn("frame");
|
||||
|
||||
List<ProfilerNode> threadedProfilers;
|
||||
if(bUseThreaded420)
|
||||
{
|
||||
threadedProfilers.SetSize(numThreads);
|
||||
for(int i = 0; i < numThreads; i++)
|
||||
{
|
||||
::new (&threadedProfilers[i]) ProfilerNode(TEXT("Convert444Threads"), true);
|
||||
threadedProfilers[i].MonitorThread(h420Threads[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_100NS_TIME
|
||||
QWORD qwTime = renderStartTime/10000;
|
||||
|
@ -819,8 +817,10 @@ void OBS::MainCaptureLoop()
|
|||
convertInfo[i].pitch = map.RowPitch;
|
||||
if(bUsingQSV)
|
||||
{
|
||||
convertInfo[i].output[0] = nextPicOut.mfxOut->Data.Y;
|
||||
convertInfo[i].output[1] = nextPicOut.mfxOut->Data.UV;
|
||||
mfxFrameData& data = nextPicOut.mfxOut->Data;
|
||||
videoEncoder->RequestBuffers(&data);
|
||||
convertInfo[i].output[0] = data.Y;
|
||||
convertInfo[i].output[1] = data.UV;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -838,7 +838,12 @@ void OBS::MainCaptureLoop()
|
|||
{
|
||||
outTimes[curOutBuffer] = (DWORD)curStreamTime;
|
||||
if(bUsingQSV)
|
||||
Convert444toNV12((LPBYTE)map.pData, outputCX, map.RowPitch, outputCY, 0, outputCY, &picOut.mfxOut->Data.Y);
|
||||
{
|
||||
mfxFrameData& data = picOut.mfxOut->Data;
|
||||
videoEncoder->RequestBuffers(&data);
|
||||
LPBYTE output[] = {data.Y, data.UV};
|
||||
Convert444toNV12((LPBYTE)map.pData, outputCX, map.RowPitch, outputCY, 0, outputCY, output);
|
||||
}
|
||||
else
|
||||
Convert444to420((LPBYTE)map.pData, outputCX, map.RowPitch, outputCY, 0, outputCY, picOut.picOut->img.plane);
|
||||
prevTexture->Unmap(0);
|
||||
|
@ -985,6 +990,13 @@ void OBS::MainCaptureLoop()
|
|||
}
|
||||
|
||||
profileOut;
|
||||
if(bUseThreaded420)
|
||||
{
|
||||
for(int i = 0; i < numThreads; i++)
|
||||
{
|
||||
threadedProfilers[i].~ProfilerNode();
|
||||
}
|
||||
}
|
||||
profileOut;
|
||||
|
||||
//------------------------------------
|
||||
|
@ -1057,7 +1069,10 @@ void OBS::MainCaptureLoop()
|
|||
delete outPics[i].mfxOut;
|
||||
else
|
||||
for(int i=0; i<NUM_OUT_BUFFERS; i++)
|
||||
{
|
||||
x264_picture_clean(outPics[i].picOut);
|
||||
delete outPics[i].picOut;
|
||||
}
|
||||
}
|
||||
|
||||
Free(h420Threads);
|
||||
|
|
Loading…
Reference in New Issue