frontend-tools: Add caption generation tool (windows)

Allows generating captions via the windows speech recognition API
(SAPI).

This is currently marked as experimental due to speech recognition
technology still being less than ideal.  Speech recognition technology
in general is probably never going to be anywhere near perfect.
Microsoft's speech recognition in particular requires a bit of training
via the windows speech recognition tool to ensure it can dictate better.
Clear speech with a good mic is recognized fairly well, but casual
speech and/or speaking with a poor microphone will have some significant
issues.  Captions can often be way off when speaking casually rather
than with clear diction.
This commit is contained in:
jp9000 2016-11-17 05:30:45 -08:00
parent 2cbb3c2505
commit b8e078f2bf
9 changed files with 1064 additions and 2 deletions

View File

@ -17,8 +17,13 @@ if(WIN32 OR APPLE)
)
endif()
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/frontend-tools-config.h.in"
"${CMAKE_BINARY_DIR}/config/frontend-tools-config.h")
set(frontend-tools_HEADERS
${frontend-tools_HEADERS}
"${CMAKE_BINARY_DIR}/config/frontend-tools-config.h"
output-timer.hpp
tool-helpers.hpp
)
@ -35,6 +40,18 @@ set(frontend-tools_UI
if(WIN32)
set(frontend-tools_PLATFORM_SOURCES
auto-scene-switcher-win.cpp)
if(BUILD_CAPTIONS)
set(frontend-tools_PLATFORM_SOURCES
${frontend-tools_PLATFORM_SOURCES}
captions.cpp
captions-stream.cpp)
set(frontend-tools_PLATFORM_HEADERS
captions.hpp
captions-stream.hpp)
set(frontend-tools_PLATFORM_UI
forms/captions.ui)
endif()
elseif(APPLE)
set(frontend-tools_PLATFORM_SOURCES
auto-scene-switcher-osx.mm)
@ -45,13 +62,16 @@ elseif(APPLE)
${COCOA})
endif()
qt5_wrap_ui(frontend-tools_UI_HEADERS ${frontend-tools_UI})
qt5_wrap_ui(frontend-tools_UI_HEADERS
${frontend-tools_UI}
${frontend-tools_PLATFORM_UI})
add_library(frontend-tools MODULE
${frontend-tools_HEADERS}
${frontend-tools_SOURCES}
${frontend-tools_PLATFORM_SOURCES}
${frontend-tools_UI_HEADERS}
${frontend-tools_PLATFORM_SOURCES}
${frontend-tools_PLATFORM_HEADERS}
)
target_link_libraries(frontend-tools
${frontend-tools_PLATFORM_LIBS}

View File

@ -0,0 +1,418 @@
#include "captions-stream.hpp"
#include <mmreg.h>
#include <util/windows/CoTaskMemPtr.hpp>
#include <util/threading.h>
#include <util/base.h>
using namespace std;
#if 0
#define debugfunc(format, ...) blog(LOG_DEBUG, "[Captions] %s(" format ")", \
__FUNCTION__, ##__VA_ARGS__)
#else
#define debugfunc(format, ...)
#endif
CaptionStream::CaptionStream(DWORD samplerate_) :
samplerate(samplerate_),
event(CreateEvent(nullptr, false, false, nullptr))
{
buf_info.ulMsMinNotification = 50;
buf_info.ulMsBufferSize = 500;
buf_info.ulMsEventBias = 0;
format.wFormatTag = WAVE_FORMAT_PCM;
format.nChannels = 1;
format.nSamplesPerSec = 16000;
format.nAvgBytesPerSec = format.nSamplesPerSec * sizeof(uint16_t);
format.nBlockAlign = 2;
format.wBitsPerSample = 16;
format.cbSize = sizeof(format);
resampler.Reset(&format);
}
void CaptionStream::Stop()
{
{
lock_guard<mutex> lock(m);
circlebuf_free(buf);
}
cv.notify_one();
}
void CaptionStream::PushAudio(const struct audio_data *data, bool muted)
{
uint8_t *output[MAX_AV_PLANES] = {};
uint32_t frames = data->frames;
uint64_t ts_offset;
bool ready = false;
audio_resampler_resample(resampler, output, &frames, &ts_offset,
data->data, data->frames);
if (output[0]) {
if (muted)
memset(output[0], 0, frames * sizeof(int16_t));
lock_guard<mutex> lock(m);
circlebuf_push_back(buf, output[0], frames * sizeof(int16_t));
write_pos += frames * sizeof(int16_t);
if (wait_size && buf->size >= wait_size)
ready = true;
}
if (ready)
cv.notify_one();
}
// IUnknown methods
STDMETHODIMP CaptionStream::QueryInterface(REFIID riid, void **ppv)
{
if (riid == IID_IUnknown) {
AddRef();
*ppv = this;
} else if (riid == IID_IStream) {
AddRef();
*ppv = (IStream*)this;
} else if (riid == IID_ISpStreamFormat) {
AddRef();
*ppv = (ISpStreamFormat*)this;
} else if (riid == IID_ISpAudio) {
AddRef();
*ppv = (ISpAudio*)this;
} else {
*ppv = nullptr;
return E_NOINTERFACE;
}
return NOERROR;
}
STDMETHODIMP_(ULONG) CaptionStream::AddRef()
{
return (ULONG)os_atomic_inc_long(&refs);
}
STDMETHODIMP_(ULONG) CaptionStream::Release()
{
ULONG new_refs = (ULONG)os_atomic_dec_long(&refs);
if (!new_refs)
delete this;
return new_refs;
}
// ISequentialStream methods
STDMETHODIMP CaptionStream::Read(void *data, ULONG bytes, ULONG *read_bytes)
{
HRESULT hr = S_OK;
size_t cur_size;
debugfunc("data, %lu, read_bytes", bytes);
if (!data)
return STG_E_INVALIDPOINTER;
{
lock_guard<mutex> lock1(m);
wait_size = bytes;
cur_size = buf->size;
}
unique_lock<mutex> lock(m);
if (bytes > cur_size)
cv.wait(lock);
if (bytes > (ULONG)buf->size) {
bytes = (ULONG)buf->size;
hr = S_FALSE;
}
if (bytes)
circlebuf_pop_front(buf, data, bytes);
if (read_bytes)
*read_bytes = bytes;
wait_size = 0;
pos.QuadPart += bytes;
return hr;
}
STDMETHODIMP CaptionStream::Write(const void *, ULONG bytes,
ULONG*)
{
debugfunc("data, %lu, written_bytes", bytes);
UNUSED_PARAMETER(bytes);
return STG_E_INVALIDFUNCTION;
}
// IStream methods
STDMETHODIMP CaptionStream::Seek(LARGE_INTEGER move, DWORD origin,
ULARGE_INTEGER *new_pos)
{
debugfunc("%lld, %lx, new_pos", move, origin);
UNUSED_PARAMETER(move);
UNUSED_PARAMETER(origin);
if (!new_pos)
return E_POINTER;
if (origin != SEEK_CUR || move.QuadPart != 0)
return E_NOTIMPL;
*new_pos = pos;
return S_OK;
}
STDMETHODIMP CaptionStream::SetSize(ULARGE_INTEGER new_size)
{
debugfunc("%llu", new_size);
UNUSED_PARAMETER(new_size);
return STG_E_INVALIDFUNCTION;
}
STDMETHODIMP CaptionStream::CopyTo(IStream *stream, ULARGE_INTEGER bytes,
ULARGE_INTEGER *read_bytes,
ULARGE_INTEGER *written_bytes)
{
HRESULT hr;
debugfunc("stream, %llu, read_bytes, written_bytes", bytes);
if (!stream)
return STG_E_INVALIDPOINTER;
ULONG written = 0;
if (bytes.QuadPart > (ULONGLONG)buf->size)
bytes.QuadPart = (ULONGLONG)buf->size;
lock_guard<mutex> lock(m);
temp_buf.resize((size_t)bytes.QuadPart);
circlebuf_peek_front(buf, &temp_buf[0], (size_t)bytes.QuadPart);
hr = stream->Write(temp_buf.data(), (ULONG)bytes.QuadPart, &written);
if (read_bytes)
*read_bytes = bytes;
if (written_bytes)
written_bytes->QuadPart = written;
return hr;
}
STDMETHODIMP CaptionStream::Commit(DWORD commit_flags)
{
debugfunc("%lx", commit_flags);
UNUSED_PARAMETER(commit_flags);
/* TODO? */
return S_OK;
}
STDMETHODIMP CaptionStream::Revert(void)
{
debugfunc("");
return S_OK;
}
STDMETHODIMP CaptionStream::LockRegion(ULARGE_INTEGER offset,
ULARGE_INTEGER size, DWORD type)
{
debugfunc("%llu, %llu, %ld", offset, size, type);
UNUSED_PARAMETER(offset);
UNUSED_PARAMETER(size);
UNUSED_PARAMETER(type);
/* TODO? */
return STG_E_INVALIDFUNCTION;
}
STDMETHODIMP CaptionStream::UnlockRegion(ULARGE_INTEGER offset,
ULARGE_INTEGER size, DWORD type)
{
debugfunc("%llu, %llu, %ld", offset, size, type);
UNUSED_PARAMETER(offset);
UNUSED_PARAMETER(size);
UNUSED_PARAMETER(type);
/* TODO? */
return STG_E_INVALIDFUNCTION;
}
static const wchar_t *stat_name = L"Caption stream";
STDMETHODIMP CaptionStream::Stat(STATSTG *stg, DWORD flag)
{
debugfunc("stg, %lu", flag);
if (!stg)
return E_POINTER;
lock_guard<mutex> lock(m);
*stg = {};
stg->type = STGTY_STREAM;
stg->cbSize.QuadPart = (ULONGLONG)buf->size;
if (flag == STATFLAG_DEFAULT) {
stg->pwcsName = (wchar_t*)CoTaskMemAlloc(sizeof(stat_name));
memcpy(stg->pwcsName, stat_name, sizeof(stat_name));
}
return S_OK;
}
STDMETHODIMP CaptionStream::Clone(IStream **stream)
{
debugfunc("stream");
*stream = nullptr;
return E_NOTIMPL;
}
// ISpStreamFormat methods
STDMETHODIMP CaptionStream::GetFormat(GUID *guid,
WAVEFORMATEX **co_mem_wfex_out)
{
debugfunc("guid, co_mem_wfex_out");
if (!guid || !co_mem_wfex_out)
return E_POINTER;
if (format.wFormatTag == 0) {
*co_mem_wfex_out = nullptr;
return S_OK;
}
void *wfex = CoTaskMemAlloc(sizeof(format));
memcpy(wfex, &format, sizeof(format));
*co_mem_wfex_out = (WAVEFORMATEX*)wfex;
return S_OK;
}
// ISpAudio methods
STDMETHODIMP CaptionStream::SetState(SPAUDIOSTATE state_, ULONGLONG)
{
debugfunc("%lu, reserved", state_);
state = state_;
return S_OK;
}
STDMETHODIMP CaptionStream::SetFormat(REFGUID guid_ref,
const WAVEFORMATEX *wfex)
{
debugfunc("guid, wfex");
if (!wfex)
return E_INVALIDARG;
if (guid_ref == SPDFID_WaveFormatEx) {
lock_guard<mutex> lock(m);
memcpy(&format, wfex, sizeof(format));
resampler.Reset(wfex);
/* 50 msec */
DWORD size = format.nSamplesPerSec / 20;
DWORD byte_size = size * format.nBlockAlign;
circlebuf_reserve(buf, (size_t)byte_size);
}
return S_OK;
}
STDMETHODIMP CaptionStream::GetStatus(SPAUDIOSTATUS *status)
{
debugfunc("status");
if (!status)
return E_POINTER;
/* TODO? */
lock_guard<mutex> lock(m);
*status = {};
status->cbNonBlockingIO = (ULONG)buf->size;
status->State = state;
status->CurSeekPos = pos.QuadPart;
status->CurDevicePos = write_pos;
return S_OK;
}
STDMETHODIMP CaptionStream::SetBufferInfo(const SPAUDIOBUFFERINFO *buf_info_)
{
debugfunc("buf_info");
/* TODO */
buf_info = *buf_info_;
return S_OK;
}
STDMETHODIMP CaptionStream::GetBufferInfo(SPAUDIOBUFFERINFO *buf_info_)
{
debugfunc("buf_info");
if (!buf_info_)
return E_POINTER;
*buf_info_ = buf_info;
return S_OK;
}
STDMETHODIMP CaptionStream::GetDefaultFormat(GUID *format,
WAVEFORMATEX **co_mem_wfex_out)
{
debugfunc("format, co_mem_wfex_out");
if (!format || !co_mem_wfex_out)
return E_POINTER;
void *wfex = CoTaskMemAlloc(sizeof(format));
memcpy(wfex, &format, sizeof(format));
*format = SPDFID_WaveFormatEx;
*co_mem_wfex_out = (WAVEFORMATEX*)wfex;
return S_OK;
}
STDMETHODIMP_(HANDLE) CaptionStream::EventHandle(void)
{
debugfunc("");
return event;
}
STDMETHODIMP CaptionStream::GetVolumeLevel(ULONG *level)
{
debugfunc("level");
if (!level)
return E_POINTER;
*level = vol;
return S_OK;
}
STDMETHODIMP CaptionStream::SetVolumeLevel(ULONG level)
{
debugfunc("%lu", level);
vol = level;
return S_OK;
}
STDMETHODIMP CaptionStream::GetBufferNotifySize(ULONG *size)
{
debugfunc("size");
if (!size)
return E_POINTER;
*size = notify_size;
return S_OK;
}
STDMETHODIMP CaptionStream::SetBufferNotifySize(ULONG size)
{
debugfunc("%lu", size);
notify_size = size;
return S_OK;
}

View File

@ -0,0 +1,119 @@
#include <windows.h>
#include <sapi.h>
#include <condition_variable>
#include <mutex>
#include <vector>
#include <obs.h>
#include <media-io/audio-resampler.h>
#include <util/circlebuf.h>
#include <util/windows/WinHandle.hpp>
#include <fstream>
class CircleBuf {
circlebuf buf = {};
public:
inline ~CircleBuf() {circlebuf_free(&buf);}
inline operator circlebuf*() {return &buf;}
inline circlebuf *operator->() {return &buf;}
};
class Resampler {
audio_resampler_t *resampler = nullptr;
public:
inline void Reset(const WAVEFORMATEX *wfex)
{
const struct audio_output_info *aoi =
audio_output_get_info(obs_get_audio());
struct resample_info src;
src.samples_per_sec = aoi->samples_per_sec;
src.format = aoi->format;
src.speakers = aoi->speakers;
struct resample_info dst;
dst.samples_per_sec = uint32_t(wfex->nSamplesPerSec);
dst.format = AUDIO_FORMAT_16BIT;
dst.speakers = (enum speaker_layout)wfex->nChannels;
if (resampler)
audio_resampler_destroy(resampler);
resampler = audio_resampler_create(&dst, &src);
}
inline ~Resampler() {audio_resampler_destroy(resampler);}
inline operator audio_resampler_t*() {return resampler;}
};
class CaptionStream : public ISpAudio {
volatile long refs = 1;
SPAUDIOBUFFERINFO buf_info = {};
ULONG notify_size = 0;
SPAUDIOSTATE state;
WinHandle event;
ULONG vol = 0;
std::condition_variable cv;
std::mutex m;
std::vector<int16_t> temp_buf;
WAVEFORMATEX format = {};
Resampler resampler;
CircleBuf buf;
ULONG wait_size = 0;
DWORD samplerate = 0;
ULARGE_INTEGER pos = {};
ULONGLONG write_pos = 0;
public:
CaptionStream(DWORD samplerate);
void Stop();
void PushAudio(const struct audio_data *audio_data, bool muted);
// IUnknown methods
STDMETHODIMP QueryInterface(REFIID riid, void **ppv) override;
STDMETHODIMP_(ULONG) AddRef() override;
STDMETHODIMP_(ULONG) Release() override;
// ISequentialStream methods
STDMETHODIMP Read(void *data, ULONG bytes, ULONG *read_bytes) override;
STDMETHODIMP Write(const void *data, ULONG bytes, ULONG *written_bytes)
override;
// IStream methods
STDMETHODIMP Seek(LARGE_INTEGER move, DWORD origin,
ULARGE_INTEGER *new_pos) override;
STDMETHODIMP SetSize(ULARGE_INTEGER new_size) override;
STDMETHODIMP CopyTo(IStream *stream, ULARGE_INTEGER bytes,
ULARGE_INTEGER *read_bytes,
ULARGE_INTEGER *written_bytes) override;
STDMETHODIMP Commit(DWORD commit_flags) override;
STDMETHODIMP Revert(void) override;
STDMETHODIMP LockRegion(ULARGE_INTEGER offset, ULARGE_INTEGER size,
DWORD type) override;
STDMETHODIMP UnlockRegion(ULARGE_INTEGER offset, ULARGE_INTEGER size,
DWORD type) override;
STDMETHODIMP Stat(STATSTG *stg, DWORD flags) override;
STDMETHODIMP Clone(IStream **stream) override;
// ISpStreamFormat methods
STDMETHODIMP GetFormat(GUID *guid, WAVEFORMATEX **co_mem_wfex_out)
override;
// ISpAudio methods
STDMETHODIMP SetState(SPAUDIOSTATE state, ULONGLONG reserved) override;
STDMETHODIMP SetFormat(REFGUID guid_ref, const WAVEFORMATEX *wfex)
override;
STDMETHODIMP GetStatus(SPAUDIOSTATUS *status) override;
STDMETHODIMP SetBufferInfo(const SPAUDIOBUFFERINFO *buf_info) override;
STDMETHODIMP GetBufferInfo(SPAUDIOBUFFERINFO *buf_info) override;
STDMETHODIMP GetDefaultFormat(GUID *format,
WAVEFORMATEX **co_mem_wfex_out) override;
STDMETHODIMP_(HANDLE) EventHandle(void) override;
STDMETHODIMP GetVolumeLevel(ULONG *level) override;
STDMETHODIMP SetVolumeLevel(ULONG level) override;
STDMETHODIMP GetBufferNotifySize(ULONG *size) override;
STDMETHODIMP SetBufferNotifySize(ULONG size) override;
};

View File

@ -0,0 +1,354 @@
#include <obs-frontend-api.h>
#include "captions-stream.hpp"
#include "captions.hpp"
#include "tool-helpers.hpp"
#include <sphelper.h>
#include <util/platform.h>
#include <util/windows/HRError.hpp>
#include <util/windows/ComPtr.hpp>
#include <util/windows/CoTaskMemPtr.hpp>
#include <util/threading.h>
#include <obs-module.h>
#include <string>
#include <thread>
#include <mutex>
#define do_log(type, format, ...) blog(type, "[Captions] " format, \
##__VA_ARGS__)
#define error(format, ...) do_log(LOG_ERROR, format, ##__VA_ARGS__)
#define debug(format, ...) do_log(LOG_DEBUG, format, ##__VA_ARGS__)
using namespace std;
struct obs_captions {
thread th;
recursive_mutex m;
WinHandle stop_event;
string source_name;
OBSWeakSource source;
void main_thread();
void start();
void stop();
inline obs_captions() :
stop_event(CreateEvent(nullptr, false, false, nullptr))
{
}
inline ~obs_captions() {stop();}
};
static obs_captions *captions = nullptr;
/* ------------------------------------------------------------------------- */
CaptionsDialog::CaptionsDialog(QWidget *parent) :
QDialog(parent),
ui(new Ui_CaptionsDialog)
{
ui->setupUi(this);
lock_guard<recursive_mutex> lock(captions->m);
auto cb = [this] (obs_source_t *source)
{
uint32_t caps = obs_source_get_output_flags(source);
QString name = obs_source_get_name(source);
if (caps & OBS_SOURCE_AUDIO)
ui->source->addItem(name);
OBSWeakSource weak = OBSGetWeakRef(source);
if (weak == captions->source)
ui->source->setCurrentText(name);
return true;
};
using cb_t = decltype(cb);
ui->source->blockSignals(true);
ui->source->addItem(QStringLiteral(""));
ui->source->setCurrentIndex(0);
obs_enum_sources([] (void *data, obs_source_t *source) {
return (*static_cast<cb_t*>(data))(source);}, &cb);
ui->source->blockSignals(false);
ui->enable->blockSignals(true);
ui->enable->setChecked(captions->th.joinable());
ui->enable->blockSignals(false);
}
void CaptionsDialog::on_source_currentIndexChanged(int)
{
bool started = captions->th.joinable();
if (started)
captions->stop();
captions->m.lock();
captions->source_name = ui->source->currentText().toUtf8().constData();
captions->source = GetWeakSourceByName(captions->source_name.c_str());
captions->m.unlock();
if (started)
captions->start();
}
void CaptionsDialog::on_enable_clicked(bool checked)
{
if (checked)
captions->start();
else
captions->stop();
}
/* ------------------------------------------------------------------------- */
void obs_captions::main_thread()
try {
ComPtr<CaptionStream> audio;
ComPtr<ISpObjectToken> token;
ComPtr<ISpRecoGrammar> grammar;
ComPtr<ISpRecognizer> recognizer;
ComPtr<ISpRecoContext> context;
HRESULT hr;
auto cb = [&] (const struct audio_data *audio_data,
bool muted)
{
audio->PushAudio(audio_data, muted);
};
using cb_t = decltype(cb);
auto pre_cb = [] (void *param, obs_source_t*,
const struct audio_data *audio_data, bool muted)
{
return (*static_cast<cb_t*>(param))(audio_data, muted);
};
os_set_thread_name(__FUNCTION__);
CoInitialize(nullptr);
hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", nullptr,
&token);
if (FAILED(hr))
throw HRError("SpFindBestToken failed", hr);
hr = CoCreateInstance(CLSID_SpInprocRecognizer, nullptr, CLSCTX_ALL,
__uuidof(ISpRecognizer), (void**)&recognizer);
if (FAILED(hr))
throw HRError("CoCreateInstance for recognizer failed", hr);
hr = recognizer->SetRecognizer(token);
if (FAILED(hr))
throw HRError("SetRecognizer failed", hr);
hr = recognizer->SetRecoState(SPRST_INACTIVE);
if (FAILED(hr))
throw HRError("SetRecoState(SPRST_INACTIVE) failed", hr);
hr = recognizer->CreateRecoContext(&context);
if (FAILED(hr))
throw HRError("CreateRecoContext failed", hr);
ULONGLONG interest = SPFEI(SPEI_RECOGNITION) |
SPFEI(SPEI_END_SR_STREAM);
hr = context->SetInterest(interest, interest);
if (FAILED(hr))
throw HRError("SetInterest failed", hr);
HANDLE notify;
hr = context->SetNotifyWin32Event();
if (FAILED(hr))
throw HRError("SetNotifyWin32Event", hr);
notify = context->GetNotifyEventHandle();
if (notify == INVALID_HANDLE_VALUE)
throw HRError("GetNotifyEventHandle failed", E_NOINTERFACE);
size_t sample_rate = audio_output_get_sample_rate(obs_get_audio());
audio = new CaptionStream((DWORD)sample_rate);
audio->Release();
hr = recognizer->SetInput(audio, false);
if (FAILED(hr))
throw HRError("SetInput failed", hr);
hr = context->CreateGrammar(1, &grammar);
if (FAILED(hr))
throw HRError("CreateGrammar failed", hr);
hr = grammar->LoadDictation(nullptr, SPLO_STATIC);
if (FAILED(hr))
throw HRError("LoadDictation failed", hr);
hr = grammar->SetDictationState(SPRS_ACTIVE);
if (FAILED(hr))
throw HRError("SetDictationState failed", hr);
hr = recognizer->SetRecoState(SPRST_ACTIVE);
if (FAILED(hr))
throw HRError("SetRecoState(SPRST_ACTIVE) failed", hr);
HANDLE events[] = {notify, stop_event};
{
captions->source = GetWeakSourceByName(
captions->source_name.c_str());
OBSSource strong = OBSGetStrongRef(source);
if (strong)
obs_source_add_audio_capture_callback(strong,
pre_cb, &cb);
}
for (;;) {
DWORD ret = WaitForMultipleObjects(2, events, false, INFINITE);
if (ret != WAIT_OBJECT_0)
break;
CSpEvent event;
bool exit = false;
while (event.GetFrom(context) == S_OK) {
if (event.eEventId == SPEI_RECOGNITION) {
ISpRecoResult *result = event.RecoResult();
CoTaskMemPtr<wchar_t> text;
hr = result->GetText((ULONG)-1, (ULONG)-1,
true, &text, nullptr);
if (FAILED(hr))
continue;
char text_utf8[512];
os_wcs_to_utf8(text, 0, text_utf8, 512);
obs_output_t *output =
obs_frontend_get_streaming_output();
if (output)
obs_output_output_caption_text1(output,
text_utf8);
debug("\"%s\"", text_utf8);
obs_output_release(output);
} else if (event.eEventId == SPEI_END_SR_STREAM) {
exit = true;
break;
}
}
if (exit)
break;
}
{
OBSSource strong = OBSGetStrongRef(source);
if (strong)
obs_source_remove_audio_capture_callback(strong,
pre_cb, &cb);
}
audio->Stop();
CoUninitialize();
} catch (HRError err) {
error("%s failed: %s (%lX)", __FUNCTION__, err.str, err.hr);
CoUninitialize();
}
void obs_captions::start()
{
if (!captions->th.joinable())
captions->th = thread([] () {captions->main_thread();});
}
void obs_captions::stop()
{
if (!captions->th.joinable())
return;
SetEvent(captions->stop_event);
captions->th.join();
}
/* ------------------------------------------------------------------------- */
extern "C" void FreeCaptions()
{
delete captions;
captions = nullptr;
}
static void obs_event(enum obs_frontend_event event, void *)
{
if (event == OBS_FRONTEND_EVENT_EXIT)
FreeCaptions();
}
static void save_caption_data(obs_data_t *save_data, bool saving, void*)
{
if (saving) {
lock_guard<recursive_mutex> lock(captions->m);
obs_data_t *obj = obs_data_create();
obs_data_set_string(obj, "source",
captions->source_name.c_str());
obs_data_set_bool(obj, "enabled", captions->th.joinable());
obs_data_set_obj(save_data, "captions", obj);
obs_data_release(obj);
} else {
captions->stop();
captions->m.lock();
obs_data_t *obj = obs_data_get_obj(save_data, "captions");
if (!obj)
obj = obs_data_create();
bool enabled = obs_data_get_bool(obj, "enabled");
captions->source_name = obs_data_get_string(obj, "source");
captions->source = GetWeakSourceByName(
captions->source_name.c_str());
obs_data_release(obj);
captions->m.unlock();
if (enabled)
captions->start();
}
}
extern "C" void InitCaptions()
{
QAction *action = (QAction*)obs_frontend_add_tools_menu_qaction(
obs_module_text("Captions"));
captions = new obs_captions;
auto cb = [] ()
{
obs_frontend_push_ui_translation(obs_module_get_string);
QWidget *window =
(QWidget*)obs_frontend_get_main_window();
CaptionsDialog dialog(window);
dialog.exec();
obs_frontend_pop_ui_translation();
};
obs_frontend_add_save_callback(save_caption_data, nullptr);
obs_frontend_add_event_callback(obs_event, nullptr);
action->connect(action, &QAction::triggered, cb);
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <QDialog>
#include <memory>
#include "ui_captions.h"
class CaptionsDialog : public QDialog {
Q_OBJECT
std::unique_ptr<Ui_CaptionsDialog> ui;
public:
CaptionsDialog(QWidget *parent);
public slots:
void on_source_currentIndexChanged(int idx);
void on_enable_clicked(bool checked);
};

View File

@ -11,6 +11,9 @@ Inactive="Inactive"
Start="Start"
Stop="Stop"
Captions="Captions (Experimental)"
Captions.AudioSource="Audio source:"
OutputTimer="Output Timer"
OutputTimer.Stream="Stop streaming after:"
OutputTimer.Record="Stop recording after:"

View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>CaptionsDialog</class>
<widget class="QDialog" name="CaptionsDialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>519</width>
<height>140</height>
</rect>
</property>
<property name="windowTitle">
<string>Captions</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<layout class="QFormLayout" name="formLayout">
<item row="1" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Captions.AudioSource</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="source">
<property name="insertPolicy">
<enum>QComboBox::InsertAlphabetically</enum>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="enable">
<property name="text">
<string>Enable</string>
</property>
</widget>
</item>
</layout>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</spacer>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="accept">
<property name="text">
<string>OK</string>
</property>
</widget>
</item>
<item>
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
</layout>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>accept</sender>
<signal>clicked()</signal>
<receiver>CaptionsDialog</receiver>
<slot>accept()</slot>
<hints>
<hint type="sourcelabel">
<x>268</x>
<y>331</y>
</hint>
<hint type="destinationlabel">
<x>229</x>
<y>-11</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -0,0 +1,3 @@
#pragma once
#define BUILD_CAPTIONS @BUILD_CAPTIONS@

View File

@ -1,4 +1,5 @@
#include <obs-module.h>
#include "frontend-tools-config.h"
OBS_DECLARE_MODULE()
OBS_MODULE_USE_DEFAULT_LOCALE("frontend-tools", "en-US")
@ -7,6 +8,12 @@ OBS_MODULE_USE_DEFAULT_LOCALE("frontend-tools", "en-US")
void InitSceneSwitcher();
void FreeSceneSwitcher();
#endif
#if defined(_WIN32) && BUILD_CAPTIONS
void InitCaptions();
void FreeCaptions();
#endif
void InitOutputTimer();
void FreeOutputTimer();
@ -14,6 +21,9 @@ bool obs_module_load(void)
{
#if defined(_WIN32) || defined(__APPLE__)
InitSceneSwitcher();
#endif
#if defined(_WIN32) && BUILD_CAPTIONS
InitCaptions();
#endif
InitOutputTimer();
return true;
@ -23,6 +33,9 @@ void obs_module_unload(void)
{
#if defined(_WIN32) || defined(__APPLE__)
FreeSceneSwitcher();
#endif
#if defined(_WIN32) && BUILD_CAPTIONS
FreeCaptions();
#endif
FreeOutputTimer();
}