win-dshow: Fix virtual camera CPU usage, add more comments

My previous commits would rescale the placeholder for output on every
frame which was not CPU efficient. Now we store a copy of the scaled
placeholder and update it any time the resolution / FPS / format
changes.

A new function GetOutputBufferSize was added to allow calculating the
output size when needed. The existing code assumed a 12-bit format if
the placeholder didn't load, which could cause grey / green output.

An optimization was made to only load and scale the placeholder once
the virtual output starts, as many applications load and unload the
filter multiple times during enumeration of devices.

Various comments were also added to help clarify how the filter works.
This commit is contained in:
Richard Stanway 2021-04-13 02:07:39 +02:00
parent 92eb756dab
commit 47fcbdb0c9
2 changed files with 103 additions and 35 deletions

View File

@ -25,15 +25,9 @@ VCamFilter::VCamFilter()
AddVideoFormat(VideoFormat::YUY2, DEFAULT_CX, DEFAULT_CY,
DEFAULT_INTERVAL);
/* ---------------------------------------- */
/* load placeholder image */
format = VideoFormat::NV12;
if (initialize_placeholder()) {
placeholder.data = get_placeholder_ptr();
get_placeholder_size(&placeholder.cx, &placeholder.cy);
} else {
placeholder.data = nullptr;
}
placeholder.scaled_data = nullptr;
/* ---------------------------------------- */
/* detect if this filter is within obs */
@ -108,12 +102,6 @@ VCamFilter::VCamFilter()
interval = new_interval;
}
nv12_scale_init(&scaler, TARGET_FORMAT_NV12, cx, cy, cx, cy);
if (placeholder.data)
nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12,
GetCX(), GetCY(), placeholder.cx,
placeholder.cy);
/* ---------------------------------------- */
th = std::thread([this] { Thread(); });
@ -126,6 +114,9 @@ VCamFilter::~VCamFilter()
SetEvent(thread_stop);
th.join();
video_queue_close(vq);
if (placeholder.scaled_data)
free(placeholder.scaled_data);
}
const wchar_t *VCamFilter::FilterName() const
@ -173,12 +164,24 @@ void VCamFilter::Thread()
cy = GetCY();
interval = GetInterval();
nv12_scale_init(&scaler, TARGET_FORMAT_NV12, GetCX(), GetCY(), cx, cy);
/* ---------------------------------------- */
/* load placeholder image */
if (placeholder.data)
nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12,
GetCX(), GetCY(), placeholder.cx,
placeholder.cy);
if (initialize_placeholder()) {
placeholder.source_data = get_placeholder_ptr();
get_placeholder_size(&placeholder.cx, &placeholder.cy);
} else {
placeholder.source_data = nullptr;
}
/* Created dynamically based on output resolution changes */
placeholder.scaled_data = nullptr;
nv12_scale_init(&scaler, TARGET_FORMAT_NV12, cx, cy, cx, cy);
nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12, cx, cy,
placeholder.cx, placeholder.cy);
UpdatePlaceholder();
while (!stopped()) {
Frame(filter_time);
@ -193,6 +196,11 @@ void VCamFilter::Frame(uint64_t ts)
uint32_t new_cy = cy;
uint64_t new_interval = interval;
/* cx, cy and interval are the resolution and frame rate of the
virtual camera _source_, ie OBS' output. Do not confuse cx / cy
with GetCX() / GetCY() / GetInterval() which return the virtualcam
filter output! */
if (!vq) {
vq = video_queue_open();
}
@ -200,6 +208,8 @@ void VCamFilter::Frame(uint64_t ts)
enum queue_state state = video_queue_state(vq);
if (state != prev_state) {
if (state == SHARED_QUEUE_STATE_READY) {
/* The virtualcam output from OBS has started, get
the actual cx / cy of the data stream */
video_queue_get_info(vq, &new_cx, &new_cy,
&new_interval);
} else if (state == SHARED_QUEUE_STATE_STOPPING) {
@ -211,37 +221,54 @@ void VCamFilter::Frame(uint64_t ts)
}
if (state != SHARED_QUEUE_STATE_READY) {
/* Virtualcam output not yet started, assume it's
the same resolution as the filter output */
new_cx = GetCX();
new_cy = GetCY();
new_interval = GetInterval();
}
if (new_cx != cx || new_cy != cy || new_interval != interval) {
/* The res / FPS of the video coming from OBS has
changed, update parameters as needed */
if (in_obs) {
/* If the vcam is being used inside obs, adjust
the format we present to match */
SetVideoFormat(GetVideoFormat(), new_cx, new_cy,
new_interval);
}
nv12_scale_init(&scaler, TARGET_FORMAT_NV12, GetCX(), GetCY(),
/* Re-initialize the main scaler to use the new resolution */
nv12_scale_init(&scaler, scaler.format, GetCX(), GetCY(),
new_cx, new_cy);
if (placeholder.data)
nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12,
GetCX(), GetCY(), placeholder.cx,
placeholder.cy);
cx = new_cx;
cy = new_cy;
interval = new_interval;
UpdatePlaceholder();
}
if (GetVideoFormat() == VideoFormat::I420)
scaler.format = placeholder.scaler.format = TARGET_FORMAT_I420;
else if (GetVideoFormat() == VideoFormat::YUY2)
scaler.format = placeholder.scaler.format = TARGET_FORMAT_YUY2;
else
scaler.format = placeholder.scaler.format = TARGET_FORMAT_NV12;
VideoFormat current_format = GetVideoFormat();
if (current_format != format) {
/* The output format changed, update the scalers */
if (current_format == VideoFormat::I420)
scaler.format = placeholder.scaler.format =
TARGET_FORMAT_I420;
else if (current_format == VideoFormat::YUY2)
scaler.format = placeholder.scaler.format =
TARGET_FORMAT_YUY2;
else
scaler.format = placeholder.scaler.format =
TARGET_FORMAT_NV12;
format = current_format;
UpdatePlaceholder();
}
/* Actual output */
uint8_t *ptr;
if (LockSampleData(&ptr)) {
if (state == SHARED_QUEUE_STATE_READY)
@ -264,9 +291,45 @@ void VCamFilter::ShowOBSFrame(uint8_t *ptr)
void VCamFilter::ShowDefaultFrame(uint8_t *ptr)
{
if (placeholder.data) {
nv12_do_scale(&placeholder.scaler, ptr, placeholder.data);
if (placeholder.scaled_data) {
memcpy(ptr, placeholder.scaled_data, GetOutputBufferSize());
} else {
memset(ptr, 127, GetCX() * GetCY() * 3 / 2);
memset(ptr, 127, GetOutputBufferSize());
}
}
/* Called when the output resolution or format has changed to re-scale
the placeholder graphic into the placeholder.scaled_data buffer. */
void VCamFilter::UpdatePlaceholder(void)
{
if (!placeholder.source_data)
return;
if (placeholder.scaled_data)
free(placeholder.scaled_data);
placeholder.scaled_data = (uint8_t *)malloc(GetOutputBufferSize());
if (!placeholder.scaled_data)
return;
if (placeholder.cx == GetCX() && placeholder.cy == GetCY() &&
placeholder.scaler.format == TARGET_FORMAT_NV12) {
/* No scaling necessary if it matches exactly */
memcpy(placeholder.scaled_data, placeholder.source_data,
GetOutputBufferSize());
} else {
nv12_scale_init(&placeholder.scaler, placeholder.scaler.format,
GetCX(), GetCY(), placeholder.cx,
placeholder.cy);
nv12_do_scale(&placeholder.scaler, placeholder.scaled_data,
placeholder.source_data);
}
}
/* Calculate the size of the output buffer based on the filter's
resolution and format */
const int VCamFilter::GetOutputBufferSize(void)
{
int bits = VFormatBits(format);
return GetCX() * GetCY() * bits / 8;
}

View File

@ -6,6 +6,7 @@
#include "../shared-memory-queue.h"
#include "../tiny-nv12-scale.h"
#include "../libdshowcapture/source/output-filter.hpp"
#include "../libdshowcapture/source/dshow-formats.hpp"
#include "../../../libobs/util/windows/WinHandle.hpp"
#define DEFAULT_CX 1920
@ -16,7 +17,8 @@ typedef struct {
int cx;
int cy;
nv12_scale_t scaler;
const uint8_t *data;
const uint8_t *source_data;
uint8_t *scaled_data;
} placeholder_t;
class VCamFilter : public DShow::OutputFilter {
@ -29,6 +31,7 @@ class VCamFilter : public DShow::OutputFilter {
placeholder_t placeholder;
uint32_t cx = DEFAULT_CX;
uint32_t cy = DEFAULT_CY;
DShow::VideoFormat format;
uint64_t interval = DEFAULT_INTERVAL;
WinHandle thread_start;
WinHandle thread_stop;
@ -46,6 +49,8 @@ class VCamFilter : public DShow::OutputFilter {
void Frame(uint64_t ts);
void ShowOBSFrame(uint8_t *ptr);
void ShowDefaultFrame(uint8_t *ptr);
void UpdatePlaceholder(void);
const int GetOutputBufferSize(void);
protected:
const wchar_t *FilterName() const override;