obs-ffmpeg: Enable multiple audio tracks for FFmpeg output
This allows multiple audio tracks for the FFmpeg output. Closes obsproject/obs-studio#1351
This commit is contained in:
parent
b8a3ae1b10
commit
340fb9d669
@ -45,6 +45,8 @@ struct ffmpeg_cfg {
|
||||
int audio_encoder_id;
|
||||
const char *video_settings;
|
||||
const char *audio_settings;
|
||||
int audio_mix_count;
|
||||
int audio_tracks;
|
||||
enum AVPixelFormat format;
|
||||
enum AVColorRange color_range;
|
||||
enum AVColorSpace color_space;
|
||||
@ -56,7 +58,7 @@ struct ffmpeg_cfg {
|
||||
|
||||
struct ffmpeg_data {
|
||||
AVStream *video;
|
||||
AVStream *audio;
|
||||
AVStream **audio_streams;
|
||||
AVCodec *acodec;
|
||||
AVCodec *vcodec;
|
||||
AVFormatContext *output;
|
||||
@ -68,14 +70,18 @@ struct ffmpeg_data {
|
||||
|
||||
uint64_t start_timestamp;
|
||||
|
||||
int64_t total_samples;
|
||||
int64_t total_samples[MAX_AUDIO_MIXES];
|
||||
uint32_t audio_samplerate;
|
||||
enum audio_format audio_format;
|
||||
size_t audio_planes;
|
||||
size_t audio_size;
|
||||
struct circlebuf excess_frames[MAX_AV_PLANES];
|
||||
uint8_t *samples[MAX_AV_PLANES];
|
||||
AVFrame *aframe;
|
||||
int num_audio_streams;
|
||||
|
||||
/* audio_tracks is a bitmask storing the indices of the mixes */
|
||||
int audio_tracks;
|
||||
struct circlebuf excess_frames[MAX_AUDIO_MIXES][MAX_AV_PLANES];
|
||||
uint8_t *samples[MAX_AUDIO_MIXES][MAX_AV_PLANES];
|
||||
AVFrame *aframe[MAX_AUDIO_MIXES];
|
||||
|
||||
struct ffmpeg_cfg config;
|
||||
|
||||
@ -273,9 +279,9 @@ static bool create_video_stream(struct ffmpeg_data *data)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool open_audio_codec(struct ffmpeg_data *data)
|
||||
static bool open_audio_codec(struct ffmpeg_data *data, int idx)
|
||||
{
|
||||
AVCodecContext *context = data->audio->codec;
|
||||
AVCodecContext *context = data->audio_streams[idx]->codec;
|
||||
char **opts = strlist_split(data->config.audio_settings, ' ', false);
|
||||
int ret;
|
||||
|
||||
@ -284,16 +290,16 @@ static bool open_audio_codec(struct ffmpeg_data *data)
|
||||
strlist_free(opts);
|
||||
}
|
||||
|
||||
data->aframe = av_frame_alloc();
|
||||
if (!data->aframe) {
|
||||
data->aframe[idx] = av_frame_alloc();
|
||||
if (!data->aframe[idx]) {
|
||||
blog(LOG_WARNING, "Failed to allocate audio frame");
|
||||
return false;
|
||||
}
|
||||
|
||||
data->aframe->format = context->sample_fmt;
|
||||
data->aframe->channels = context->channels;
|
||||
data->aframe->channel_layout = context->channel_layout;
|
||||
data->aframe->sample_rate = context->sample_rate;
|
||||
data->aframe[idx]->format = context->sample_fmt;
|
||||
data->aframe[idx]->channels = context->channels;
|
||||
data->aframe[idx]->channel_layout = context->channel_layout;
|
||||
data->aframe[idx]->sample_rate = context->sample_rate;
|
||||
|
||||
context->strict_std_compliance = -2;
|
||||
|
||||
@ -306,7 +312,7 @@ static bool open_audio_codec(struct ffmpeg_data *data)
|
||||
|
||||
data->frame_size = context->frame_size ? context->frame_size : 1024;
|
||||
|
||||
ret = av_samples_alloc(data->samples, NULL, context->channels,
|
||||
ret = av_samples_alloc(data->samples[idx], NULL, context->channels,
|
||||
data->frame_size, context->sample_fmt, 0);
|
||||
if (ret < 0) {
|
||||
blog(LOG_WARNING, "Failed to create audio buffer: %s",
|
||||
@ -317,9 +323,10 @@ static bool open_audio_codec(struct ffmpeg_data *data)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool create_audio_stream(struct ffmpeg_data *data)
|
||||
static bool create_audio_stream(struct ffmpeg_data *data, int idx)
|
||||
{
|
||||
AVCodecContext *context;
|
||||
AVStream *stream;
|
||||
struct obs_audio_info aoi;
|
||||
|
||||
if (!obs_get_audio_info(&aoi)) {
|
||||
@ -327,17 +334,18 @@ static bool create_audio_stream(struct ffmpeg_data *data)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!new_stream(data, &data->audio, &data->acodec,
|
||||
if (!new_stream(data, &stream, &data->acodec,
|
||||
data->output->oformat->audio_codec,
|
||||
data->config.audio_encoder))
|
||||
return false;
|
||||
|
||||
context = data->audio->codec;
|
||||
context->bit_rate = data->config.audio_bitrate * 1000;
|
||||
context->time_base = (AVRational){ 1, aoi.samples_per_sec };
|
||||
context->channels = get_audio_channels(aoi.speakers);
|
||||
context->sample_rate = aoi.samples_per_sec;
|
||||
context->channel_layout =
|
||||
data->audio_streams[idx] = stream;
|
||||
context = data->audio_streams[idx]->codec;
|
||||
context->bit_rate = data->config.audio_bitrate * 1000;
|
||||
context->time_base = (AVRational){ 1, aoi.samples_per_sec };
|
||||
context->channels = get_audio_channels(aoi.speakers);
|
||||
context->sample_rate = aoi.samples_per_sec;
|
||||
context->channel_layout =
|
||||
av_get_default_channel_layout(context->channels);
|
||||
|
||||
//AVlib default channel layout for 5 channels is 5.0 ; fix for 4.1
|
||||
@ -347,7 +355,7 @@ static bool create_audio_stream(struct ffmpeg_data *data)
|
||||
context->sample_fmt = data->acodec->sample_fmts ?
|
||||
data->acodec->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
|
||||
|
||||
data->audio->time_base = context->time_base;
|
||||
data->audio_streams[idx]->time_base = context->time_base;
|
||||
|
||||
data->audio_samplerate = aoi.samples_per_sec;
|
||||
data->audio_format = convert_ffmpeg_sample_format(context->sample_fmt);
|
||||
@ -357,7 +365,7 @@ static bool create_audio_stream(struct ffmpeg_data *data)
|
||||
if (data->output->oformat->flags & AVFMT_GLOBALHEADER)
|
||||
context->flags |= CODEC_FLAG_GLOBAL_H;
|
||||
|
||||
return open_audio_codec(data);
|
||||
return open_audio_codec(data, idx);
|
||||
}
|
||||
|
||||
static inline bool init_streams(struct ffmpeg_data *data)
|
||||
@ -368,9 +376,14 @@ static inline bool init_streams(struct ffmpeg_data *data)
|
||||
if (!create_video_stream(data))
|
||||
return false;
|
||||
|
||||
if (format->audio_codec != AV_CODEC_ID_NONE)
|
||||
if (!create_audio_stream(data))
|
||||
return false;
|
||||
if (format->audio_codec != AV_CODEC_ID_NONE && data->num_audio_streams) {
|
||||
data->audio_streams = calloc(1,
|
||||
data->num_audio_streams * sizeof(void*));
|
||||
for (int i = 0; i < data->num_audio_streams; i++) {
|
||||
if (!create_audio_stream(data, i))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -457,12 +470,14 @@ static void close_video(struct ffmpeg_data *data)
|
||||
|
||||
static void close_audio(struct ffmpeg_data *data)
|
||||
{
|
||||
for (size_t i = 0; i < MAX_AV_PLANES; i++)
|
||||
circlebuf_free(&data->excess_frames[i]);
|
||||
for (int idx = 0; idx < data->num_audio_streams; idx++) {
|
||||
for (size_t i = 0; i < MAX_AV_PLANES; i++)
|
||||
circlebuf_free(&data->excess_frames[idx][i]);
|
||||
|
||||
av_freep(&data->samples[0]);
|
||||
avcodec_close(data->audio->codec);
|
||||
av_frame_free(&data->aframe);
|
||||
av_freep(&data->samples[idx][0]);
|
||||
avcodec_close(data->audio_streams[idx]->codec);
|
||||
av_frame_free(&data->aframe[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
static void ffmpeg_data_free(struct ffmpeg_data *data)
|
||||
@ -472,8 +487,11 @@ static void ffmpeg_data_free(struct ffmpeg_data *data)
|
||||
|
||||
if (data->video)
|
||||
close_video(data);
|
||||
if (data->audio)
|
||||
if (data->audio_streams) {
|
||||
close_audio(data);
|
||||
free(data->audio_streams);
|
||||
data->audio_streams = NULL;
|
||||
}
|
||||
|
||||
if (data->output) {
|
||||
if ((data->output->oformat->flags & AVFMT_NOFILE) == 0)
|
||||
@ -528,7 +546,8 @@ static bool ffmpeg_data_init(struct ffmpeg_data *data,
|
||||
|
||||
memset(data, 0, sizeof(struct ffmpeg_data));
|
||||
data->config = *config;
|
||||
|
||||
data->num_audio_streams = config->audio_mix_count;
|
||||
data->audio_tracks = config->audio_tracks;
|
||||
if (!config->url || !*config->url)
|
||||
return false;
|
||||
|
||||
@ -766,7 +785,7 @@ static void receive_video(void *param, struct video_data *frame)
|
||||
data->total_frames++;
|
||||
}
|
||||
|
||||
static void encode_audio(struct ffmpeg_output *output,
|
||||
static void encode_audio(struct ffmpeg_output *output, int idx,
|
||||
struct AVCodecContext *context, size_t block_size)
|
||||
{
|
||||
struct ffmpeg_data *data = &output->ff_data;
|
||||
@ -775,13 +794,13 @@ static void encode_audio(struct ffmpeg_output *output,
|
||||
int ret, got_packet;
|
||||
size_t total_size = data->frame_size * block_size * context->channels;
|
||||
|
||||
data->aframe->nb_samples = data->frame_size;
|
||||
data->aframe->pts = av_rescale_q(data->total_samples,
|
||||
data->aframe[idx]->nb_samples = data->frame_size;
|
||||
data->aframe[idx]->pts = av_rescale_q(data->total_samples[idx],
|
||||
(AVRational){1, context->sample_rate},
|
||||
context->time_base);
|
||||
|
||||
ret = avcodec_fill_audio_frame(data->aframe, context->channels,
|
||||
context->sample_fmt, data->samples[0],
|
||||
ret = avcodec_fill_audio_frame(data->aframe[idx], context->channels,
|
||||
context->sample_fmt, data->samples[idx][0],
|
||||
(int)total_size, 1);
|
||||
if (ret < 0) {
|
||||
blog(LOG_WARNING, "encode_audio: avcodec_fill_audio_frame "
|
||||
@ -789,10 +808,10 @@ static void encode_audio(struct ffmpeg_output *output,
|
||||
return;
|
||||
}
|
||||
|
||||
data->total_samples += data->frame_size;
|
||||
data->total_samples[idx] += data->frame_size;
|
||||
|
||||
#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(57, 40, 101)
|
||||
ret = avcodec_send_frame(context, data->aframe);
|
||||
ret = avcodec_send_frame(context, data->aframe[idx]);
|
||||
if (ret == 0)
|
||||
ret = avcodec_receive_packet(context, &packet);
|
||||
|
||||
@ -801,7 +820,7 @@ static void encode_audio(struct ffmpeg_output *output,
|
||||
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
|
||||
ret = 0;
|
||||
#else
|
||||
ret = avcodec_encode_audio2(context, &packet, data->aframe,
|
||||
ret = avcodec_encode_audio2(context, &packet, data->aframe[idx],
|
||||
&got_packet);
|
||||
#endif
|
||||
if (ret < 0) {
|
||||
@ -813,11 +832,13 @@ static void encode_audio(struct ffmpeg_output *output,
|
||||
if (!got_packet)
|
||||
return;
|
||||
|
||||
packet.pts = rescale_ts(packet.pts, context, data->audio->time_base);
|
||||
packet.dts = rescale_ts(packet.dts, context, data->audio->time_base);
|
||||
packet.pts = rescale_ts(packet.pts, context,
|
||||
data->audio_streams[idx]->time_base);
|
||||
packet.dts = rescale_ts(packet.dts, context,
|
||||
data->audio_streams[idx]->time_base);
|
||||
packet.duration = (int)av_rescale_q(packet.duration, context->time_base,
|
||||
data->audio->time_base);
|
||||
packet.stream_index = data->audio->index;
|
||||
data->audio_streams[idx]->time_base);
|
||||
packet.stream_index = data->audio_streams[idx]->index;
|
||||
|
||||
pthread_mutex_lock(&output->write_mutex);
|
||||
da_push_back(output->packets, &packet);
|
||||
@ -853,18 +874,34 @@ static bool prepare_audio(struct ffmpeg_data *data,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void receive_audio(void *param, struct audio_data *frame)
|
||||
/* Given a bitmask for the selected tracks and the mix index,
|
||||
* this returns the stream index which will be passed to the muxer. */
|
||||
static int get_track_order(int track_config, size_t mix_index)
|
||||
{
|
||||
int position = 0;
|
||||
for (size_t i = 0; i < mix_index; i++) {
|
||||
if (track_config & 1 << i)
|
||||
position++;
|
||||
}
|
||||
return position;
|
||||
}
|
||||
|
||||
static void receive_audio(void *param, size_t mix_idx, struct audio_data *frame)
|
||||
{
|
||||
struct ffmpeg_output *output = param;
|
||||
struct ffmpeg_data *data = &output->ff_data;
|
||||
size_t frame_size_bytes;
|
||||
struct audio_data in;
|
||||
int track_order;
|
||||
|
||||
// codec doesn't support audio or none configured
|
||||
if (!data->audio)
|
||||
/* check that the track was selected */
|
||||
if ((data->audio_tracks & (1 << mix_idx)) == 0)
|
||||
return;
|
||||
|
||||
AVCodecContext *context = data->audio->codec;
|
||||
/* get track order (first selected, etc ...) */
|
||||
track_order = get_track_order(data->audio_tracks, mix_idx);
|
||||
|
||||
AVCodecContext *context = data->audio_streams[track_order]->codec;
|
||||
|
||||
if (!data->start_timestamp)
|
||||
return;
|
||||
@ -877,15 +914,16 @@ static void receive_audio(void *param, struct audio_data *frame)
|
||||
frame_size_bytes = (size_t)data->frame_size * data->audio_size;
|
||||
|
||||
for (size_t i = 0; i < data->audio_planes; i++)
|
||||
circlebuf_push_back(&data->excess_frames[i], in.data[i],
|
||||
in.frames * data->audio_size);
|
||||
circlebuf_push_back(&data->excess_frames[track_order][i],
|
||||
in.data[i], in.frames * data->audio_size);
|
||||
|
||||
while (data->excess_frames[0].size >= frame_size_bytes) {
|
||||
while (data->excess_frames[track_order][0].size >= frame_size_bytes) {
|
||||
for (size_t i = 0; i < data->audio_planes; i++)
|
||||
circlebuf_pop_front(&data->excess_frames[i],
|
||||
data->samples[i], frame_size_bytes);
|
||||
circlebuf_pop_front(&data->excess_frames[track_order][i],
|
||||
data->samples[track_order][i],
|
||||
frame_size_bytes);
|
||||
|
||||
encode_audio(output, context, data->audio_size);
|
||||
encode_audio(output, track_order, context, data->audio_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -901,7 +939,7 @@ static uint64_t get_packet_sys_dts(struct ffmpeg_output *output,
|
||||
time_base = data->video->time_base;
|
||||
start_ts = output->video_start_ts;
|
||||
} else {
|
||||
time_base = data->audio->time_base;
|
||||
time_base = data->audio_streams[0]->time_base;
|
||||
start_ts = output->audio_start_ts;
|
||||
}
|
||||
|
||||
@ -990,6 +1028,18 @@ static inline const char *get_string_or_null(obs_data_t *settings,
|
||||
return value;
|
||||
}
|
||||
|
||||
static int get_audio_mix_count(int audio_mix_mask)
|
||||
{
|
||||
int mix_count = 0;
|
||||
for (int i = 0; i < MAX_AUDIO_MIXES; i++) {
|
||||
if ((audio_mix_mask & (1 << i)) != 0) {
|
||||
mix_count++;
|
||||
}
|
||||
}
|
||||
|
||||
return mix_count;
|
||||
}
|
||||
|
||||
static bool try_connect(struct ffmpeg_output *output)
|
||||
{
|
||||
video_t *video = obs_output_video(output->output);
|
||||
@ -1025,6 +1075,8 @@ static bool try_connect(struct ffmpeg_output *output)
|
||||
config.height = (int)obs_output_get_height(output->output);
|
||||
config.format = obs_to_ffmpeg_video_format(
|
||||
video_output_get_format(video));
|
||||
config.audio_tracks = (int)obs_output_get_mixers(output->output);
|
||||
config.audio_mix_count = get_audio_mix_count(config.audio_tracks);
|
||||
|
||||
if (format_is_yuv(voi->format)) {
|
||||
config.color_range = voi->range == VIDEO_RANGE_FULL ?
|
||||
@ -1157,13 +1209,15 @@ static uint64_t ffmpeg_output_total_bytes(void *data)
|
||||
|
||||
struct obs_output_info ffmpeg_output = {
|
||||
.id = "ffmpeg_output",
|
||||
.flags = OBS_OUTPUT_AUDIO | OBS_OUTPUT_VIDEO,
|
||||
.flags = OBS_OUTPUT_AUDIO |
|
||||
OBS_OUTPUT_VIDEO |
|
||||
OBS_OUTPUT_MULTI_TRACK,
|
||||
.get_name = ffmpeg_output_getname,
|
||||
.create = ffmpeg_output_create,
|
||||
.destroy = ffmpeg_output_destroy,
|
||||
.start = ffmpeg_output_start,
|
||||
.stop = ffmpeg_output_stop,
|
||||
.raw_video = receive_video,
|
||||
.raw_audio = receive_audio,
|
||||
.raw_audio2 = receive_audio,
|
||||
.get_total_bytes = ffmpeg_output_total_bytes,
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user