win-dshow: Fix virtual camera CPU usage, add more comments

My previous commits would rescale the placeholder for output on every frame which was not CPU efficient. Now we store a copy of the scaled placeholder and update it any time the resolution / FPS / format changes. A new function GetOutputBufferSize was added to allow calculating the output size when needed. The existing code assumed a 12-bit format if the placeholder didn't load, which could cause grey / green output. An optimization was made to only load and scale the placeholder once the virtual output starts, as many applications load and unload the filter multiple times during enumeration of devices. Various comments were also added to help clarify how the filter works.
2021-04-13 02:07:39 +02:00 · 2021-04-13 02:07:39 +02:00 · 47fcbdb0c9
commit 47fcbdb0c9
parent 92eb756dab
2 changed files with 103 additions and 35 deletions
--- a/plugins/win-dshow/virtualcam-module/virtualcam-filter.cpp
+++ b/plugins/win-dshow/virtualcam-module/virtualcam-filter.cpp
@ -25,15 +25,9 @@ VCamFilter::VCamFilter()
 	AddVideoFormat(VideoFormat::YUY2, DEFAULT_CX, DEFAULT_CY,
 		       DEFAULT_INTERVAL);

-	/* ---------------------------------------- */
-	/* load placeholder image                   */
+	format = VideoFormat::NV12;

-	if (initialize_placeholder()) {
-		placeholder.data = get_placeholder_ptr();
-		get_placeholder_size(&placeholder.cx, &placeholder.cy);
-	} else {
-		placeholder.data = nullptr;
-	}
+	placeholder.scaled_data = nullptr;

 	/* ---------------------------------------- */
 	/* detect if this filter is within obs      */
@ -108,12 +102,6 @@ VCamFilter::VCamFilter()
 		interval = new_interval;
 	}

-	nv12_scale_init(&scaler, TARGET_FORMAT_NV12, cx, cy, cx, cy);
-	if (placeholder.data)
-		nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12,
-				GetCX(), GetCY(), placeholder.cx,
-				placeholder.cy);
-
 	/* ---------------------------------------- */

 	th = std::thread([this] { Thread(); });
@ -126,6 +114,9 @@ VCamFilter::~VCamFilter()
 	SetEvent(thread_stop);
 	th.join();
 	video_queue_close(vq);
+
+	if (placeholder.scaled_data)
+		free(placeholder.scaled_data);
 }

 const wchar_t *VCamFilter::FilterName() const
@ -173,12 +164,24 @@ void VCamFilter::Thread()
 	cy = GetCY();
 	interval = GetInterval();

-	nv12_scale_init(&scaler, TARGET_FORMAT_NV12, GetCX(), GetCY(), cx, cy);
+	/* ---------------------------------------- */
+	/* load placeholder image                   */

-	if (placeholder.data)
-		nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12,
-				GetCX(), GetCY(), placeholder.cx,
-				placeholder.cy);
+	if (initialize_placeholder()) {
+		placeholder.source_data = get_placeholder_ptr();
+		get_placeholder_size(&placeholder.cx, &placeholder.cy);
+	} else {
+		placeholder.source_data = nullptr;
+	}
+
+	/* Created dynamically based on output resolution changes */
+	placeholder.scaled_data = nullptr;
+
+	nv12_scale_init(&scaler, TARGET_FORMAT_NV12, cx, cy, cx, cy);
+	nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12, cx, cy,
+			placeholder.cx, placeholder.cy);
+
+	UpdatePlaceholder();

 	while (!stopped()) {
 		Frame(filter_time);
@ -193,6 +196,11 @@ void VCamFilter::Frame(uint64_t ts)
 	uint32_t new_cy = cy;
 	uint64_t new_interval = interval;

+	/* cx, cy and interval are the resolution and frame rate of the
+	   virtual camera _source_, ie OBS' output. Do not confuse cx / cy
+	   with GetCX() / GetCY() / GetInterval() which return the virtualcam
+	   filter output! */
+
 	if (!vq) {
 		vq = video_queue_open();
 	}
@ -200,6 +208,8 @@ void VCamFilter::Frame(uint64_t ts)
 	enum queue_state state = video_queue_state(vq);
 	if (state != prev_state) {
 		if (state == SHARED_QUEUE_STATE_READY) {
+			/* The virtualcam output from OBS has started, get
+			   the actual cx / cy of the data stream */
 			video_queue_get_info(vq, &new_cx, &new_cy,
 					     &new_interval);
 		} else if (state == SHARED_QUEUE_STATE_STOPPING) {
@ -211,37 +221,54 @@ void VCamFilter::Frame(uint64_t ts)
 	}

 	if (state != SHARED_QUEUE_STATE_READY) {
+		/* Virtualcam output not yet started, assume it's
+		   the same resolution as the filter output */
 		new_cx = GetCX();
 		new_cy = GetCY();
 		new_interval = GetInterval();
 	}

 	if (new_cx != cx || new_cy != cy || new_interval != interval) {
+		/* The res / FPS of the video coming from OBS has
+		   changed, update parameters as needed */
 		if (in_obs) {
+			/* If the vcam is being used inside obs, adjust
+			   the format we present to match */
 			SetVideoFormat(GetVideoFormat(), new_cx, new_cy,
 				       new_interval);
 		}

-		nv12_scale_init(&scaler, TARGET_FORMAT_NV12, GetCX(), GetCY(),
+		/* Re-initialize the main scaler to use the new resolution */
+		nv12_scale_init(&scaler, scaler.format, GetCX(), GetCY(),
 				new_cx, new_cy);

-		if (placeholder.data)
-			nv12_scale_init(&placeholder.scaler, TARGET_FORMAT_NV12,
-					GetCX(), GetCY(), placeholder.cx,
-					placeholder.cy);
-
 		cx = new_cx;
 		cy = new_cy;
 		interval = new_interval;
+
+		UpdatePlaceholder();
 	}

-	if (GetVideoFormat() == VideoFormat::I420)
-		scaler.format = placeholder.scaler.format = TARGET_FORMAT_I420;
-	else if (GetVideoFormat() == VideoFormat::YUY2)
-		scaler.format = placeholder.scaler.format = TARGET_FORMAT_YUY2;
-	else
-		scaler.format = placeholder.scaler.format = TARGET_FORMAT_NV12;
+	VideoFormat current_format = GetVideoFormat();

+	if (current_format != format) {
+		/* The output format changed, update the scalers */
+		if (current_format == VideoFormat::I420)
+			scaler.format = placeholder.scaler.format =
+				TARGET_FORMAT_I420;
+		else if (current_format == VideoFormat::YUY2)
+			scaler.format = placeholder.scaler.format =
+				TARGET_FORMAT_YUY2;
+		else
+			scaler.format = placeholder.scaler.format =
+				TARGET_FORMAT_NV12;
+
+		format = current_format;
+
+		UpdatePlaceholder();
+	}
+
+	/* Actual output */
 	uint8_t *ptr;
 	if (LockSampleData(&ptr)) {
 		if (state == SHARED_QUEUE_STATE_READY)
@ -264,9 +291,45 @@ void VCamFilter::ShowOBSFrame(uint8_t *ptr)

 void VCamFilter::ShowDefaultFrame(uint8_t *ptr)
 {
-	if (placeholder.data) {
-		nv12_do_scale(&placeholder.scaler, ptr, placeholder.data);
+	if (placeholder.scaled_data) {
+		memcpy(ptr, placeholder.scaled_data, GetOutputBufferSize());
 	} else {
-		memset(ptr, 127, GetCX() * GetCY() * 3 / 2);
+		memset(ptr, 127, GetOutputBufferSize());
 	}
 }
+
+/* Called when the output resolution or format has changed to re-scale
+   the placeholder graphic into the placeholder.scaled_data buffer. */
+void VCamFilter::UpdatePlaceholder(void)
+{
+	if (!placeholder.source_data)
+		return;
+
+	if (placeholder.scaled_data)
+		free(placeholder.scaled_data);
+
+	placeholder.scaled_data = (uint8_t *)malloc(GetOutputBufferSize());
+	if (!placeholder.scaled_data)
+		return;
+
+	if (placeholder.cx == GetCX() && placeholder.cy == GetCY() &&
+	    placeholder.scaler.format == TARGET_FORMAT_NV12) {
+		/* No scaling necessary if it matches exactly */
+		memcpy(placeholder.scaled_data, placeholder.source_data,
+		       GetOutputBufferSize());
+	} else {
+		nv12_scale_init(&placeholder.scaler, placeholder.scaler.format,
+				GetCX(), GetCY(), placeholder.cx,
+				placeholder.cy);
+		nv12_do_scale(&placeholder.scaler, placeholder.scaled_data,
+			      placeholder.source_data);
+	}
+}
+
+/* Calculate the size of the output buffer based on the filter's
+   resolution and format */
+const int VCamFilter::GetOutputBufferSize(void)
+{
+	int bits = VFormatBits(format);
+	return GetCX() * GetCY() * bits / 8;
+}
--- a/plugins/win-dshow/virtualcam-module/virtualcam-filter.hpp
+++ b/plugins/win-dshow/virtualcam-module/virtualcam-filter.hpp
@ -6,6 +6,7 @@
 #include "../shared-memory-queue.h"
 #include "../tiny-nv12-scale.h"
 #include "../libdshowcapture/source/output-filter.hpp"
+#include "../libdshowcapture/source/dshow-formats.hpp"
 #include "../../../libobs/util/windows/WinHandle.hpp"

 #define DEFAULT_CX 1920
@ -16,7 +17,8 @@ typedef struct {
 	int cx;
 	int cy;
 	nv12_scale_t scaler;
-	const uint8_t *data;
+	const uint8_t *source_data;
+	uint8_t *scaled_data;
 } placeholder_t;

 class VCamFilter : public DShow::OutputFilter {
@ -29,6 +31,7 @@ class VCamFilter : public DShow::OutputFilter {
 	placeholder_t placeholder;
 	uint32_t cx = DEFAULT_CX;
 	uint32_t cy = DEFAULT_CY;
+	DShow::VideoFormat format;
 	uint64_t interval = DEFAULT_INTERVAL;
 	WinHandle thread_start;
 	WinHandle thread_stop;
@ -46,6 +49,8 @@ class VCamFilter : public DShow::OutputFilter {
 	void Frame(uint64_t ts);
 	void ShowOBSFrame(uint8_t *ptr);
 	void ShowDefaultFrame(uint8_t *ptr);
+	void UpdatePlaceholder(void);
+	const int GetOutputBufferSize(void);

 protected:
 	const wchar_t *FilterName() const override;