diff --git a/libobs/CMakeLists.txt b/libobs/CMakeLists.txt
index f37f57237..71ae5c6c6 100644
--- a/libobs/CMakeLists.txt
+++ b/libobs/CMakeLists.txt
@@ -284,6 +284,7 @@ set(libobs_libobs_SOURCES
 	obs-display.c
 	obs-view.c
 	obs-scene.c
+	obs-audio.c
 	obs-video.c)
 set(libobs_libobs_HEADERS
 	${libobs_PLATFORM_HEADERS}
diff --git a/libobs/obs-audio.c b/libobs/obs-audio.c
new file mode 100644
index 000000000..619a74f63
--- /dev/null
+++ b/libobs/obs-audio.c
@@ -0,0 +1,418 @@
+/******************************************************************************
+    Copyright (C) 2015 by Hugh Bailey <obs.jim@gmail.com>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+******************************************************************************/
+
+#include <inttypes.h>
+#include "obs-internal.h"
+
+struct ts_info {
+	uint64_t start;
+	uint64_t end;
+};
+
+#define DEBUG_AUDIO 0
+#define MAX_BUFFERING_TICKS 80
+
+static void push_audio_tree(obs_source_t *parent, obs_source_t *source, void *p)
+{
+	struct obs_core_audio *audio = p;
+
+	if (da_find(audio->render_order, &source, 0) == DARRAY_INVALID) {
+		obs_source_addref(source);
+		da_push_back(audio->render_order, &source);
+	}
+
+	UNUSED_PARAMETER(parent);
+}
+
+static inline size_t convert_time_to_frames(size_t sample_rate, uint64_t t)
+{
+	return (size_t)(t * (uint64_t)sample_rate / 1000000000ULL);
+}
+
+static inline void mix_audio(struct audio_output_data *mixes,
+		obs_source_t *source, size_t channels, size_t sample_rate,
+		struct ts_info *ts)
+{
+	size_t total_floats = AUDIO_OUTPUT_FRAMES;
+	size_t start_point = 0;
+
+	if (source->audio_ts < ts->start || ts->end <= source->audio_ts)
+		return;
+
+	if (source->audio_ts != ts->start) {
+		start_point = convert_time_to_frames(sample_rate,
+				source->audio_ts - ts->start);
+		if (start_point == AUDIO_OUTPUT_FRAMES)
+			return;
+
+		total_floats -= start_point;
+	}
+
+	for (size_t mix_idx = 0; mix_idx < MAX_AUDIO_MIXES; mix_idx++) {
+		for (size_t ch = 0; ch < channels; ch++) {
+			register float *mix = mixes[mix_idx].data[ch];
+			register float *aud =
+				source->audio_output_buf[mix_idx][ch];
+			register float *end;
+
+			mix += start_point;
+			end = aud + total_floats;
+
+			while (aud < end)
+				*(mix++) += *(aud++);
+		}
+	}
+}
+
+static void ignore_audio(obs_source_t *source, size_t channels,
+		size_t sample_rate)
+{
+	size_t num_floats = source->audio_input_buf[0].size / sizeof(float);
+
+	if (num_floats) {
+		for (size_t ch = 0; ch < channels; ch++)
+			circlebuf_pop_front(&source->audio_input_buf[ch], NULL,
+					source->audio_input_buf[ch].size);
+
+		source->audio_ts += (uint64_t)num_floats * 1000000000ULL /
+			(uint64_t)sample_rate;
+	}
+}
+
+static inline void discard_audio(struct obs_core_audio *audio,
+		obs_source_t *source, size_t channels, size_t sample_rate,
+		struct ts_info *ts)
+{
+	size_t total_floats = AUDIO_OUTPUT_FRAMES;
+	size_t size;
+
+#if DEBUG_AUDIO == 1
+	bool is_audio_source = source->info.output_flags & OBS_SOURCE_AUDIO;
+#endif
+
+	if (source->info.audio_render) {
+		source->audio_ts = 0;
+		return;
+	}
+
+	if (ts->end <= source->audio_ts) {
+#if DEBUG_AUDIO == 1
+		blog(LOG_DEBUG, "can't discard, source "
+				"timestamp (%"PRIu64") >= "
+				"end timestamp (%"PRIu64")",
+				source->audio_ts, ts->end);
+#endif
+		return;
+	}
+
+	if (source->audio_ts < (ts->start - 1)) {
+#if DEBUG_AUDIO == 1
+		if (is_audio_source) {
+			blog(LOG_DEBUG, "can't discard, source "
+					"timestamp (%"PRIu64") < "
+					"start timestamp (%"PRIu64")",
+					source->audio_ts, ts->start);
+		}
+#endif
+		if (audio->total_buffering_ticks == MAX_BUFFERING_TICKS)
+			ignore_audio(source, channels, sample_rate);
+		return;
+	}
+
+	if (source->audio_ts != ts->start &&
+	    source->audio_ts != (ts->start - 1)) {
+		size_t start_point = convert_time_to_frames(sample_rate,
+				source->audio_ts - ts->start);
+		if (start_point == AUDIO_OUTPUT_FRAMES) {
+#if DEBUG_AUDIO == 1
+			if (is_audio_source)
+				blog(LOG_DEBUG, "can't dicard, start point is "
+						"at audio frame count");
+#endif
+			return;
+		}
+
+		total_floats -= start_point;
+	}
+
+	size = total_floats * sizeof(float);
+
+	if (source->audio_input_buf[0].size < size) {
+#if DEBUG_AUDIO == 1
+		if (is_audio_source)
+			blog(LOG_DEBUG, "can't discard, data still pending");
+#endif
+		return;
+	}
+
+	for (size_t ch = 0; ch < channels; ch++)
+		circlebuf_pop_front(&source->audio_input_buf[ch], NULL, size);
+
+#if DEBUG_AUDIO == 1
+	if (is_audio_source)
+		blog(LOG_DEBUG, "audio discarded, new ts: %"PRIu64,
+				ts->end);
+#endif
+
+	source->audio_ts = ts->end;
+}
+
+static void add_audio_buffering(struct obs_core_audio *audio,
+		size_t sample_rate, struct ts_info *ts, uint64_t min_ts)
+{
+	struct ts_info new_ts;
+	uint64_t offset;
+	uint64_t frames;
+	int ticks;
+
+	if (audio->total_buffering_ticks == MAX_BUFFERING_TICKS)
+		return;
+
+	if (!audio->buffering_wait_ticks)
+		audio->buffered_ts = ts->start;
+
+	offset = ts->start - min_ts;
+	frames = ns_to_audio_frames(sample_rate, offset);
+	ticks = (int)((frames + AUDIO_OUTPUT_FRAMES - 1) / AUDIO_OUTPUT_FRAMES);
+
+	audio->total_buffering_ticks += ticks;
+
+	if (audio->total_buffering_ticks >= MAX_BUFFERING_TICKS) {
+		ticks -= audio->total_buffering_ticks - MAX_BUFFERING_TICKS;
+		audio->total_buffering_ticks = MAX_BUFFERING_TICKS;
+		blog(LOG_WARNING, "Max audio buffering reached!");
+	}
+
+	blog(LOG_INFO, "adding %d ticks of buffering, total buffering is "
+			"now %d", ticks, audio->total_buffering_ticks);
+#if DEBUG_AUDIO == 1
+	blog(LOG_DEBUG, "min_ts (%"PRIu64") < start timestamp "
+			"(%"PRIu64")", min_ts, ts->start);
+	blog(LOG_DEBUG, "old buffered ts: %"PRIu64"-%"PRIu64,
+			ts->start, ts->end);
+#endif
+
+	new_ts.start = audio->buffered_ts - audio_frames_to_ns(sample_rate,
+			audio->buffering_wait_ticks * AUDIO_OUTPUT_FRAMES);
+
+	while (ticks--) {
+		int cur_ticks = ++audio->buffering_wait_ticks;
+
+		new_ts.end = new_ts.start;
+		new_ts.start = audio->buffered_ts - audio_frames_to_ns(
+				sample_rate,
+				cur_ticks * AUDIO_OUTPUT_FRAMES);
+
+#if DEBUG_AUDIO == 1
+		blog(LOG_DEBUG, "add buffered ts: %"PRIu64"-%"PRIu64,
+				new_ts.start, new_ts.end);
+#endif
+
+		circlebuf_push_front(&audio->buffered_timestamps, &new_ts,
+				sizeof(new_ts));
+	}
+
+	*ts = new_ts;
+}
+
+static bool audio_buffer_insuffient(struct obs_source *source,
+		size_t sample_rate, uint64_t min_ts)
+{
+	size_t total_floats = AUDIO_OUTPUT_FRAMES;
+	size_t size;
+
+	if (source->info.audio_render || source->audio_pending ||
+	    !source->audio_ts) {
+		return false;
+	}
+
+	if (source->audio_ts != min_ts &&
+	    source->audio_ts != (min_ts - 1)) {
+		size_t start_point = convert_time_to_frames(sample_rate,
+				source->audio_ts - min_ts);
+		if (start_point >= AUDIO_OUTPUT_FRAMES)
+			return false;
+
+		total_floats -= start_point;
+	}
+
+	size = total_floats * sizeof(float);
+
+	if (source->audio_input_buf[0].size < size) {
+		source->audio_pending = true;
+		source->audio_ts = 0;
+		source->timing_adjust = 0;
+		source->timing_set = false;
+		source->next_audio_ts_min = 0;
+		source->next_audio_sys_ts_min = 0;
+		return true;
+	}
+
+	return false;
+}
+
+static inline void find_min_ts(struct obs_core_data *data,
+		uint64_t *min_ts)
+{
+	struct obs_source *source = data->first_audio_source;
+	while (source) {
+		if (!source->audio_pending && source->audio_ts &&
+				source->audio_ts < *min_ts)
+			*min_ts = source->audio_ts;
+
+		source = (struct obs_source*)source->next_audio_source;
+	}
+}
+
+static inline bool mark_invalid_sources(struct obs_core_data *data,
+		size_t sample_rate, uint64_t min_ts)
+{
+	bool recalculate = false;
+
+	struct obs_source *source = data->first_audio_source;
+	while (source) {
+		recalculate |= audio_buffer_insuffient(source, sample_rate,
+				min_ts);
+		source = (struct obs_source*)source->next_audio_source;
+	}
+
+	return recalculate;
+}
+
+static inline void calc_min_ts(struct obs_core_data *data,
+		size_t sample_rate, uint64_t *min_ts)
+{
+	find_min_ts(data, min_ts);
+	if (mark_invalid_sources(data, sample_rate, *min_ts))
+		find_min_ts(data, min_ts);
+}
+
+static inline void release_audio_sources(struct obs_core_audio *audio)
+{
+	for (size_t i = 0; i < audio->render_order.num; i++)
+		obs_source_release(audio->render_order.array[i]);
+}
+
+bool audio_callback(void *param,
+		uint64_t start_ts_in, uint64_t end_ts_in, uint64_t *out_ts,
+		uint32_t mixers, struct audio_output_data *mixes)
+{
+	struct obs_core_data *data = &obs->data;
+	struct obs_core_audio *audio = &obs->audio;
+	struct obs_source *source;
+	size_t sample_rate = audio_output_get_sample_rate(audio->audio);
+	size_t channels = audio_output_get_channels(audio->audio);
+	struct ts_info ts = {start_ts_in, end_ts_in};
+	size_t audio_size;
+	uint64_t min_ts;
+
+	da_resize(audio->render_order, 0);
+	da_resize(audio->root_nodes, 0);
+
+	circlebuf_push_back(&audio->buffered_timestamps, &ts, sizeof(ts));
+	circlebuf_peek_front(&audio->buffered_timestamps, &ts, sizeof(ts));
+	min_ts = ts.start;
+
+	audio_size = AUDIO_OUTPUT_FRAMES * sizeof(float);
+
+#if DEBUG_AUDIO == 1
+	blog(LOG_DEBUG, "ts %llu-%llu", ts.start, ts.end);
+#endif
+
+	/* ------------------------------------------------ */
+	/* build audio render order
+	 * NOTE: these are source channels, not audio channels */
+	for (uint32_t i = 0; i < MAX_CHANNELS; i++) {
+		obs_source_t *source = obs_get_output_source(i);
+		if (source) {
+			obs_source_enum_active_tree(source, push_audio_tree,
+					audio);
+			push_audio_tree(NULL, source, audio);
+			da_push_back(audio->root_nodes, &source);
+			obs_source_release(source);
+		}
+	}
+
+	/* ------------------------------------------------ */
+	/* render audio data */
+	for (size_t i = 0; i < audio->render_order.num; i++) {
+		obs_source_t *source = audio->render_order.array[i];
+		obs_source_audio_render(source, mixers, channels, sample_rate,
+				audio_size);
+	}
+
+	/* ------------------------------------------------ */
+	/* get minimum audio timestamp */
+	pthread_mutex_lock(&data->audio_sources_mutex);
+	calc_min_ts(data, sample_rate, &min_ts);
+	pthread_mutex_unlock(&data->audio_sources_mutex);
+
+	/* ------------------------------------------------ */
+	/* if a source has gone backward in time, buffer */
+	if (min_ts < ts.start)
+		add_audio_buffering(audio, sample_rate, &ts, min_ts);
+
+	/* ------------------------------------------------ */
+	/* mix audio */
+	if (!audio->buffering_wait_ticks) {
+		for (size_t i = 0; i < audio->root_nodes.num; i++) {
+			obs_source_t *source = audio->root_nodes.array[i];
+
+			if (source->audio_pending)
+				continue;
+
+			pthread_mutex_lock(&source->audio_buf_mutex);
+
+			if (source->audio_output_buf[0][0] && source->audio_ts)
+				mix_audio(mixes, source, channels, sample_rate,
+						&ts);
+
+			pthread_mutex_unlock(&source->audio_buf_mutex);
+		}
+	}
+
+	/* ------------------------------------------------ */
+	/* discard audio */
+	pthread_mutex_lock(&data->audio_sources_mutex);
+
+	source = data->first_audio_source;
+	while (source) {
+		pthread_mutex_lock(&source->audio_buf_mutex);
+		discard_audio(audio, source, channels, sample_rate, &ts);
+		pthread_mutex_unlock(&source->audio_buf_mutex);
+
+		source = (struct obs_source*)source->next_audio_source;
+	}
+
+	pthread_mutex_unlock(&data->audio_sources_mutex);
+
+	/* ------------------------------------------------ */
+	/* release audio sources */
+	release_audio_sources(audio);
+
+	circlebuf_pop_front(&audio->buffered_timestamps, NULL, sizeof(ts));
+
+	*out_ts = ts.start;
+
+	if (audio->buffering_wait_ticks) {
+		audio->buffering_wait_ticks--;
+		return false;
+	}
+
+	UNUSED_PARAMETER(param);
+	return true;
+}
diff --git a/libobs/obs-internal.h b/libobs/obs-internal.h
index f75ca5bd2..45b2d0e25 100644
--- a/libobs/obs-internal.h
+++ b/libobs/obs-internal.h
@@ -267,6 +267,14 @@ struct obs_core_audio {
 	/* TODO: sound output subsystem */
 	audio_t                         *audio;
 
+	DARRAY(struct obs_source*)      render_order;
+	DARRAY(struct obs_source*)      root_nodes;
+
+	uint64_t                        buffered_ts;
+	struct circlebuf                buffered_timestamps;
+	int                             buffering_wait_ticks;
+	int                             total_buffering_ticks;
+
 	float                           user_volume;
 };
 
@@ -361,6 +369,10 @@ extern struct obs_core *obs;
 
 extern void *obs_video_thread(void *param);
 
+extern bool audio_callback(void *param,
+		uint64_t start_ts_in, uint64_t end_ts_in, uint64_t *out_ts,
+		uint32_t mixers, struct audio_output_data *mixes);
+
 
 /* ------------------------------------------------------------------------- */
 /* obs shared context data */
@@ -450,6 +462,22 @@ struct async_frame {
 	bool used;
 };
 
+enum audio_action_type {
+	AUDIO_ACTION_VOL,
+	AUDIO_ACTION_MUTE,
+	AUDIO_ACTION_PTT,
+	AUDIO_ACTION_PTM,
+};
+
+struct audio_action {
+	uint64_t timestamp;
+	enum audio_action_type type;
+	union {
+		float vol;
+		bool  set;
+	};
+};
+
 struct obs_weak_source {
 	struct obs_weak_ref ref;
 	struct obs_source *source;
@@ -499,20 +527,24 @@ struct obs_source {
 	/* audio */
 	bool                            audio_failed;
 	bool                            audio_pending;
+	bool                            user_muted;
 	bool                            muted;
 	struct obs_source               *next_audio_source;
 	struct obs_source               **prev_next_audio_source;
 	uint64_t                        audio_ts;
 	struct circlebuf                audio_input_buf[MAX_AUDIO_CHANNELS];
+	DARRAY(struct audio_action)     audio_actions;
 	float                           *audio_output_buf[MAX_AUDIO_MIXES][MAX_AUDIO_CHANNELS];
 	struct resample_info            sample_info;
 	audio_resampler_t               *resampler;
+	pthread_mutex_t                 audio_actions_mutex;
 	pthread_mutex_t                 audio_buf_mutex;
 	pthread_mutex_t                 audio_mutex;
 	struct obs_audio_data           audio_data;
 	size_t                          audio_storage_size;
 	uint32_t                        audio_mixers;
 	float                           user_volume;
+	float                           volume;
 	int64_t                         sync_offset;
 
 	/* async video data */
@@ -555,8 +587,10 @@ struct obs_source {
 	obs_hotkey_id                   push_to_talk_key;
 	bool                            push_to_mute_enabled : 1;
 	bool                            push_to_mute_pressed : 1;
+	bool                            user_push_to_mute_pressed : 1;
 	bool                            push_to_talk_enabled : 1;
 	bool                            push_to_talk_pressed : 1;
+	bool                            user_push_to_talk_pressed : 1;
 	uint64_t                        push_to_mute_delay;
 	uint64_t                        push_to_mute_stop_time;
 	uint64_t                        push_to_talk_delay;
diff --git a/libobs/obs-source.c b/libobs/obs-source.c
index eba761e08..ef97836ac 100644
--- a/libobs/obs-source.c
+++ b/libobs/obs-source.c
@@ -148,6 +148,7 @@ bool obs_source_init(struct obs_source *source)
 	pthread_mutexattr_t attr;
 
 	source->user_volume = 1.0f;
+	source->volume = 1.0f;
 	source->sync_offset = 0;
 	pthread_mutex_init_value(&source->filter_mutex);
 	pthread_mutex_init_value(&source->async_mutex);
@@ -162,6 +163,8 @@ bool obs_source_init(struct obs_source *source)
 		return false;
 	if (pthread_mutex_init(&source->audio_buf_mutex, NULL) != 0)
 		return false;
+	if (pthread_mutex_init(&source->audio_actions_mutex, NULL) != 0)
+		return false;
 	if (pthread_mutex_init(&source->audio_mutex, NULL) != 0)
 		return false;
 	if (pthread_mutex_init(&source->async_mutex, NULL) != 0)
@@ -225,27 +228,43 @@ static bool obs_source_hotkey_unmute(void *data,
 static void obs_source_hotkey_push_to_mute(void *data,
 		obs_hotkey_id id, obs_hotkey_t *key, bool pressed)
 {
+	struct audio_action action = {
+		.timestamp = os_gettime_ns(),
+		.type      = AUDIO_ACTION_PTM,
+		.set       = pressed
+	};
+
 	UNUSED_PARAMETER(id);
 	UNUSED_PARAMETER(key);
 
 	struct obs_source *source = data;
 
-	pthread_mutex_lock(&source->audio_mutex);
-	source->push_to_mute_pressed = pressed;
-	pthread_mutex_unlock(&source->audio_mutex);
+	pthread_mutex_lock(&source->audio_actions_mutex);
+	da_push_back(source->audio_actions, &action);
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	source->user_push_to_mute_pressed = pressed;
 }
 
 static void obs_source_hotkey_push_to_talk(void *data,
 		obs_hotkey_id id, obs_hotkey_t *key, bool pressed)
 {
+	struct audio_action action = {
+		.timestamp = os_gettime_ns(),
+		.type      = AUDIO_ACTION_PTT,
+		.set       = pressed
+	};
+
 	UNUSED_PARAMETER(id);
 	UNUSED_PARAMETER(key);
 
 	struct obs_source *source = data;
 
-	pthread_mutex_lock(&source->audio_mutex);
-	source->push_to_talk_pressed = pressed;
-	pthread_mutex_unlock(&source->audio_mutex);
+	pthread_mutex_lock(&source->audio_actions_mutex);
+	da_push_back(source->audio_actions, &action);
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	source->user_push_to_talk_pressed = pressed;
 }
 
 static void obs_source_init_audio_hotkeys(struct obs_source *source)
@@ -423,10 +442,12 @@ void obs_source_destroy(struct obs_source *source)
 	audio_resampler_destroy(source->resampler);
 	bfree(source->audio_output_buf[0][0]);
 
+	da_free(source->audio_actions);
 	da_free(source->async_cache);
 	da_free(source->async_frames);
 	da_free(source->filters);
 	pthread_mutex_destroy(&source->filter_mutex);
+	pthread_mutex_destroy(&source->audio_actions_mutex);
 	pthread_mutex_destroy(&source->audio_buf_mutex);
 	pthread_mutex_destroy(&source->audio_mutex);
 	pthread_mutex_destroy(&source->async_mutex);
@@ -874,8 +895,16 @@ static inline uint64_t conv_frames_to_time(const size_t sample_rate,
 	return (uint64_t)frames * 1000000000ULL / (uint64_t)sample_rate;
 }
 
+static inline size_t conv_time_to_frames(const size_t sample_rate,
+		const uint64_t duration)
+{
+	return (size_t)(duration * (uint64_t)sample_rate / 1000000000ULL);
+}
+
 /* maximum timestamp variance in nanoseconds */
 #define MAX_TS_VAR          2000000000ULL
+/* maximum buffer size */
+#define MAX_BUF_SIZE        (1000 * AUDIO_OUTPUT_FRAMES * sizeof(float))
 
 static inline void reset_audio_timing(obs_source_t *source, uint64_t timestamp,
 		uint64_t os_time)
@@ -957,9 +986,17 @@ static void source_output_audio_place(obs_source_t *source,
 			in->timestamp);
 #endif
 
-	for (size_t i = 0; i < channels; i++)
+	/* do not allow the circular buffers to become too big */
+	if ((buf_placement + size) > MAX_BUF_SIZE)
+		return;
+
+	for (size_t i = 0; i < channels; i++) {
 		circlebuf_place(&source->audio_input_buf[i], buf_placement,
 				in->data[i], size);
+		circlebuf_pop_back(&source->audio_input_buf[i], NULL,
+				source->audio_input_buf[i].size -
+				(buf_placement + size));
+	}
 }
 
 static inline void source_output_audio_push_back(obs_source_t *source,
@@ -967,28 +1004,33 @@ static inline void source_output_audio_push_back(obs_source_t *source,
 {
 	audio_t *audio = obs->audio.audio;
 	size_t channels = audio_output_get_channels(audio);
+	size_t size = in->frames * sizeof(float);
+
+	/* do not allow the circular buffers to become too big */
+	if ((source->audio_input_buf[0].size + size) > MAX_BUF_SIZE)
+		return;
 
 	for (size_t i = 0; i < channels; i++)
 		circlebuf_push_back(&source->audio_input_buf[i],
-				in->data[i], in->frames * sizeof(float));
+				in->data[i], size);
 }
 
 static inline bool source_muted(obs_source_t *source, uint64_t os_time)
 {
-	if (source->push_to_mute_enabled && source->push_to_mute_pressed)
+	if (source->push_to_mute_enabled && source->user_push_to_mute_pressed)
 		source->push_to_mute_stop_time = os_time +
 			source->push_to_mute_delay * 1000000;
 
-	if (source->push_to_talk_enabled && source->push_to_talk_pressed)
+	if (source->push_to_talk_enabled && source->user_push_to_talk_pressed)
 		source->push_to_talk_stop_time = os_time +
 			source->push_to_talk_delay * 1000000;
 
-	bool push_to_mute_active = source->push_to_mute_pressed ||
+	bool push_to_mute_active = source->user_push_to_mute_pressed ||
 		os_time < source->push_to_mute_stop_time;
-	bool push_to_talk_active = source->push_to_talk_pressed ||
+	bool push_to_talk_active = source->user_push_to_talk_pressed ||
 		os_time < source->push_to_talk_stop_time;
 
-	return !source->enabled || source->muted ||
+	return !source->enabled || source->user_muted ||
 			(source->push_to_mute_enabled && push_to_mute_active) ||
 			(source->push_to_talk_enabled && !push_to_talk_active);
 }
@@ -1030,8 +1072,14 @@ static void source_output_audio_data(obs_source_t *source,
 
 	in.timestamp += source->timing_adjust + source->sync_offset;
 
-	if (source->next_audio_sys_ts_min == in.timestamp)
+	if (source->next_audio_sys_ts_min == in.timestamp) {
 		push_back = true;
+	} else {
+		diff = uint64_diff(source->next_audio_sys_ts_min, in.timestamp);
+		if (diff < TS_SMOOTHING_THRESHOLD)
+			push_back = true;
+	}
+
 	source->next_audio_sys_ts_min = source->next_audio_ts_min +
 		source->timing_adjust + source->sync_offset;
 
@@ -2623,6 +2671,12 @@ proc_handler_t *obs_source_get_proc_handler(const obs_source_t *source)
 void obs_source_set_volume(obs_source_t *source, float volume)
 {
 	if (obs_source_valid(source, "obs_source_set_volume")) {
+		struct audio_action action = {
+			.timestamp = os_gettime_ns(),
+			.type      = AUDIO_ACTION_VOL,
+			.vol       = volume
+		};
+
 		struct calldata data = {0};
 		calldata_set_ptr(&data, "source", source);
 		calldata_set_float(&data, "volume", volume);
@@ -2633,6 +2687,10 @@ void obs_source_set_volume(obs_source_t *source, float volume)
 		volume = (float)calldata_float(&data, "volume");
 		calldata_free(&data);
 
+		pthread_mutex_lock(&source->audio_actions_mutex);
+		da_push_back(source->audio_actions, &action);
+		pthread_mutex_unlock(&source->audio_actions_mutex);
+
 		source->user_volume = volume;
 	}
 }
@@ -3030,17 +3088,22 @@ void obs_source_set_enabled(obs_source_t *source, bool enabled)
 bool obs_source_muted(const obs_source_t *source)
 {
 	return obs_source_valid(source, "obs_source_muted") ?
-		source->muted : false;
+		source->user_muted : false;
 }
 
 void obs_source_set_muted(obs_source_t *source, bool muted)
 {
 	struct calldata data = {0};
+	struct audio_action action = {
+		.timestamp = os_gettime_ns(),
+		.type      = AUDIO_ACTION_MUTE,
+		.set       = muted
+	};
 
 	if (!obs_source_valid(source, "obs_source_set_muted"))
 		return;
 
-	source->muted = muted;
+	source->user_muted = muted;
 
 	calldata_set_ptr(&data, "source", source);
 	calldata_set_bool(&data, "muted", muted);
@@ -3048,6 +3111,10 @@ void obs_source_set_muted(obs_source_t *source, bool muted)
 	signal_handler_signal(source->context.signals, "mute", &data);
 
 	calldata_free(&data);
+
+	pthread_mutex_lock(&source->audio_actions_mutex);
+	da_push_back(source->audio_actions, &action);
+	pthread_mutex_unlock(&source->audio_actions_mutex);
 }
 
 static void source_signal_push_to_changed(obs_source_t *source,
@@ -3196,6 +3263,262 @@ void *obs_source_get_type_data(obs_source_t *source)
 		? source->info.type_data : NULL;
 }
 
+static float get_source_volume(obs_source_t *source, uint64_t os_time)
+{
+	if (source->push_to_mute_enabled && source->push_to_mute_pressed)
+		source->push_to_mute_stop_time = os_time +
+			source->push_to_mute_delay * 1000000;
+
+	if (source->push_to_talk_enabled && source->push_to_talk_pressed)
+		source->push_to_talk_stop_time = os_time +
+			source->push_to_talk_delay * 1000000;
+
+	bool push_to_mute_active = source->push_to_mute_pressed ||
+		os_time < source->push_to_mute_stop_time;
+	bool push_to_talk_active = source->push_to_talk_pressed ||
+		os_time < source->push_to_talk_stop_time;
+
+	bool muted = !source->enabled || source->muted ||
+			(source->push_to_mute_enabled && push_to_mute_active) ||
+			(source->push_to_talk_enabled && !push_to_talk_active);
+
+	if (muted || close_float(source->volume, 0.0f, 0.0001f))
+		return 0.0f;
+	if (close_float(source->volume, 1.0f, 0.0001f))
+		return 1.0f;
+
+	return source->volume;
+}
+
+static inline void multiply_output_audio(obs_source_t *source, size_t mix,
+		size_t channels, float vol)
+{
+	register float *out = source->audio_output_buf[mix][0];
+	register float *end = out + AUDIO_OUTPUT_FRAMES * channels;
+
+	while (out < end)
+		*(out++) *= vol;
+}
+
+static inline void multiply_vol_data(obs_source_t *source, size_t mix,
+		size_t channels, float *vol_data)
+{
+	for (size_t ch = 0; ch < channels; ch++) {
+		register float *out = source->audio_output_buf[mix][ch];
+		register float *end = out + AUDIO_OUTPUT_FRAMES;
+		register float *vol = vol_data;
+
+		while (out < end)
+			*(out++) *= *(vol++);
+	}
+}
+
+static inline void apply_audio_action(obs_source_t *source,
+		const struct audio_action *action)
+{
+	switch (action->type) {
+	case AUDIO_ACTION_VOL:
+		source->volume = action->vol; break;
+	case AUDIO_ACTION_MUTE:
+		source->muted = action->set; break;
+	case AUDIO_ACTION_PTT:
+		source->push_to_talk_pressed = action->set; break;
+	case AUDIO_ACTION_PTM:
+		source->push_to_mute_pressed = action->set; break;
+	}
+}
+
+static void apply_audio_actions(obs_source_t *source, size_t channels,
+		size_t sample_rate)
+{
+	float *vol_data = malloc(sizeof(float) * AUDIO_OUTPUT_FRAMES);
+	float cur_vol = get_source_volume(source, source->audio_ts);
+	size_t frame_num = 0;
+
+	pthread_mutex_lock(&source->audio_actions_mutex);
+
+	for (size_t i = 0; i < source->audio_actions.num; i++) {
+		struct audio_action action = source->audio_actions.array[i];
+		uint64_t timestamp = action.timestamp;
+		size_t new_frame_num;
+
+		if (timestamp < source->audio_ts)
+			timestamp = source->audio_ts;
+
+		new_frame_num = conv_time_to_frames(sample_rate,
+				timestamp - source->audio_ts);
+
+		if (new_frame_num >= AUDIO_OUTPUT_FRAMES)
+			break;
+
+		da_erase(source->audio_actions, i--);
+
+		apply_audio_action(source, &action);
+
+		if (new_frame_num > frame_num) {
+			for (; frame_num < new_frame_num; frame_num++)
+				vol_data[frame_num] = cur_vol;
+		}
+
+		cur_vol = get_source_volume(source, timestamp);
+	}
+
+	for (; frame_num < AUDIO_OUTPUT_FRAMES; frame_num++)
+		vol_data[frame_num] = cur_vol;
+
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		if ((source->audio_mixers & (1 << mix)) != 0)
+			multiply_vol_data(source, mix, channels, vol_data);
+	}
+
+	free(vol_data);
+}
+
+static void apply_audio_volume(obs_source_t *source, uint32_t mixers,
+		size_t channels, size_t sample_rate)
+{
+	struct audio_action action;
+	bool actions_pending;
+	float vol;
+
+	pthread_mutex_lock(&source->audio_actions_mutex);
+
+	actions_pending = source->audio_actions.num > 0;
+	if (actions_pending)
+		action = source->audio_actions.array[0];
+
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	if (actions_pending) {
+		uint64_t duration = conv_frames_to_time(sample_rate,
+				AUDIO_OUTPUT_FRAMES);
+
+		if (action.timestamp < (source->audio_ts + duration)) {
+			apply_audio_actions(source, channels, sample_rate);
+			return;
+		}
+	}
+
+	vol = get_source_volume(source, source->audio_ts);
+	if (vol == 1.0f)
+		return;
+
+	if (vol == 0.0f || mixers == 0) {
+		memset(source->audio_output_buf[0][0], 0,
+				AUDIO_OUTPUT_FRAMES * sizeof(float) *
+				MAX_AUDIO_CHANNELS * MAX_AUDIO_MIXES);
+		return;
+	}
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		uint32_t mix_and_val = (1 << mix);
+		if ((source->audio_mixers & mix_and_val) != 0 &&
+		    (mixers & mix_and_val) != 0)
+			multiply_output_audio(source, mix, channels, vol);
+	}
+}
+
+static void custom_audio_render(obs_source_t *source, uint32_t mixers,
+		size_t channels, size_t sample_rate)
+{
+	struct obs_source_audio_mix audio_data;
+	bool success;
+	uint64_t ts;
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		for (size_t ch = 0; ch < channels; ch++)
+			audio_data.output[mix].data[ch] =
+				source->audio_output_buf[mix][ch];
+	}
+
+	memset(audio_data.output[0].data[0], 0, AUDIO_OUTPUT_FRAMES *
+			MAX_AUDIO_MIXES * channels * sizeof(float));
+
+	success = source->info.audio_render(source->context.data, &ts,
+			&audio_data, mixers, channels, sample_rate);
+	source->audio_ts = success ? ts : 0;
+	source->audio_pending = !success;
+
+	if (!success || !source->audio_ts || !mixers)
+		return;
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		if ((source->audio_mixers & (1 << mix)) == 0) {
+			memset(source->audio_output_buf[mix][0], 0,
+					sizeof(float) * AUDIO_OUTPUT_FRAMES *
+					channels);
+			continue;
+		}
+	}
+
+	apply_audio_volume(source, mixers, channels, sample_rate);
+}
+
+static inline void process_audio_source_tick(obs_source_t *source,
+		uint32_t mixers, size_t channels, size_t sample_rate,
+		size_t size)
+{
+	pthread_mutex_lock(&source->audio_buf_mutex);
+
+	for (size_t ch = 0; ch < channels; ch++)
+		circlebuf_peek_front(&source->audio_input_buf[ch],
+				source->audio_output_buf[0][ch],
+				size);
+
+	pthread_mutex_unlock(&source->audio_buf_mutex);
+
+	for (size_t mix = 1; mix < MAX_AUDIO_MIXES; mix++) {
+		uint32_t mix_and_val = (1 << mix);
+
+		if ((source->audio_mixers & mix_and_val) == 0 ||
+		    (mixers & mix_and_val) == 0) {
+			memset(source->audio_output_buf[mix][0],
+					0, size * channels);
+			continue;
+		}
+
+		for (size_t ch = 0; ch < channels; ch++)
+			memcpy(source->audio_output_buf[mix][ch],
+					source->audio_output_buf[0][ch], size);
+	}
+
+	if ((source->audio_mixers & 1) == 0 || (mixers & 1) == 0)
+		memset(source->audio_output_buf[0][0], 0,
+				size * channels);
+
+	apply_audio_volume(source, mixers, channels, sample_rate);
+	source->audio_pending = false;
+}
+
+void obs_source_audio_render(obs_source_t *source, uint32_t mixers,
+		size_t channels, size_t sample_rate, size_t size)
+{
+	if (!source || !source->audio_output_buf[0][0]) {
+		source->audio_pending = true;
+		return;
+	}
+
+	if (source->info.audio_render) {
+		custom_audio_render(source, mixers, channels, sample_rate);
+		return;
+	}
+
+	if (!source->audio_ts || source->audio_input_buf[0].size < size) {
+		source->audio_pending = true;
+		return;
+	}
+
+	process_audio_source_tick(source, mixers, channels, sample_rate, size);
+}
+
+bool obs_source_audio_pending(const obs_source_t *source)
+{
+	return obs_source_valid(source, "obs_source_audio_pending") ?
+		source->audio_pending : false;
+}
+
 uint64_t obs_source_get_audio_timestamp(const obs_source_t *source)
 {
 	return obs_source_valid(source, "obs_source_get_audio_timestamp") ?
diff --git a/libobs/obs.c b/libobs/obs.c
index b38006baf..7d5d1024c 100644
--- a/libobs/obs.c
+++ b/libobs/obs.c
@@ -484,6 +484,10 @@ static void obs_free_audio(void)
 	if (audio->audio)
 		audio_output_close(audio->audio);
 
+	circlebuf_free(&audio->buffered_timestamps);
+	da_free(audio->render_order);
+	da_free(audio->root_nodes);
+
 	memset(audio, 0, sizeof(struct obs_core_audio));
 }
 
@@ -780,11 +784,11 @@ void obs_shutdown(void)
 	stop_video();
 	stop_hotkeys();
 
+	obs_free_audio();
 	obs_free_data();
 	obs_free_video();
 	obs_free_hotkeys();
 	obs_free_graphics();
-	obs_free_audio();
 	proc_handler_destroy(obs->procs);
 	signal_handler_destroy(obs->signals);
 
@@ -922,16 +926,14 @@ bool obs_reset_audio(const struct obs_audio_info *oai)
 	ai.samples_per_sec = oai->samples_per_sec;
 	ai.format = AUDIO_FORMAT_FLOAT_PLANAR;
 	ai.speakers = oai->speakers;
-	ai.buffer_ms = oai->buffer_ms;
+	ai.input_callback = audio_callback;
 
 	blog(LOG_INFO, "---------------------------------");
 	blog(LOG_INFO, "audio settings reset:\n"
 	               "\tsamples per sec: %d\n"
-	               "\tspeakers:        %d\n"
-	               "\tbuffering (ms):  %d",
+	               "\tspeakers:        %d",
 	               (int)ai.samples_per_sec,
-	               (int)ai.speakers,
-	               (int)ai.buffer_ms);
+	               (int)ai.speakers);
 
 	return obs_init_audio(&ai);
 }
@@ -976,7 +978,6 @@ bool obs_get_audio_info(struct obs_audio_info *oai)
 
 	oai->samples_per_sec = info->samples_per_sec;
 	oai->speakers = info->speakers;
-	oai->buffer_ms = info->buffer_ms;
 	return true;
 }
 
diff --git a/libobs/obs.h b/libobs/obs.h
index e76c128c9..bfc206a37 100644
--- a/libobs/obs.h
+++ b/libobs/obs.h
@@ -180,7 +180,6 @@ struct obs_video_info {
 struct obs_audio_info {
 	uint32_t            samples_per_sec;
 	enum speaker_layout speakers;
-	uint64_t            buffer_ms;
 };
 
 /**
@@ -974,6 +973,7 @@ EXPORT uint32_t obs_source_get_base_width(obs_source_t *source);
 /** Gets the base height for a source (not taking in to account filtering) */
 EXPORT uint32_t obs_source_get_base_height(obs_source_t *source);
 
+EXPORT bool obs_source_audio_pending(const obs_source_t *source);
 EXPORT uint64_t obs_source_get_audio_timestamp(const obs_source_t *source);
 EXPORT void obs_source_get_audio_mix(const obs_source_t *source,
 		struct obs_source_audio_mix *audio);