summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2025-02-26 19:00:15 -0500
committerpommicket <pommicket@gmail.com>2025-02-26 19:07:23 -0500
commitb821a8178a64132f058aedf7620f2a76524c6e01 (patch)
tree9824df448aa9fc30aa9e99d3926a0fcbf2008537
parent849700a4499ae0c77b7c87c2d022fb18b0daf4f9 (diff)
audio
-rw-r--r--video.c101
1 files changed, 84 insertions, 17 deletions
diff --git a/video.c b/video.c
index aa033c0..8fecead 100644
--- a/video.c
+++ b/video.c
@@ -16,9 +16,10 @@
struct VideoContext {
double start_time;
- int audio_frame_samples;
ogg_stream_state video_stream, audio_stream;
vorbis_dsp_state vorbis;
+ vorbis_info vorbis_info;
+ vorbis_block vorbis_block;
vpx_codec_ctx_t vpx;
vpx_image_t vpx_image;
int64_t next_video_pts;
@@ -101,6 +102,12 @@ static bool write_packet_to_stream(VideoContext *ctx, ogg_stream_state *stream,
return true;
}
+// inverse of vp8_gptopts in https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/oggparsevp8.c
+// see also: https://github.com/FFmpeg/FFmpeg/blob/99e2af4e7837ca09b97d93a562dc12947179fc48/libavformat/oggenc.c#L671
+static uint64_t vp8_pts_to_gp(int64_t pts, bool is_key_frame) {
+ return (uint64_t)pts << 32 | (uint64_t)!is_key_frame << 3;
+}
+
bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t height, int fps, int quality) {
if (!ctx) return false;
if (ctx->recording) {
@@ -119,8 +126,12 @@ bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t
log_error("ogg_stream_init(video_stream) failed");
return false;
}
+ if (ogg_stream_init(&ctx->audio_stream, serial_number + 1) < 0) {
+ log_error("ogg_stream_init(audio_stream) failed");
+ return false;
+ }
vpx_codec_enc_cfg_t cfg = {0};
- // NOTE: vp9 encoder seems to be much slower
+ // NOTE: vp9 encoder seems to be much slower and OggVP9 isn't a thing (yet)
vpx_codec_iface_t *vp8 = vpx_codec_vp8_cx();
int err = vpx_codec_enc_config_default(vp8, &cfg, 0);
if (err != 0) {
@@ -178,11 +189,45 @@ bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t
ogg_packet packet = {
.packet = (uint8_t *)&header,
.bytes = sizeof header,
+ .granulepos = vp8_pts_to_gp(0, false),
.b_o_s = true,
.e_o_s = false,
};
write_packet_to_stream(ctx, &ctx->video_stream, &packet);
bool have_audio = false;
+ vorbis_info_init(&ctx->vorbis_info);
+ if ((err = vorbis_encode_init_vbr(&ctx->vorbis_info, 2, 44100, 0.9f)) != 0) {
+ log_error("vorbis_encode_init_vbr failed (error %d)", err);
+ goto no_audio;
+ }
+ if ((err = vorbis_encode_setup_init(&ctx->vorbis_info)) != 0) {
+ log_error("vorbis_encode_setup_init failed (error %d)", err);
+ goto no_audio;
+ }
+ if (vorbis_analysis_init(&ctx->vorbis, &ctx->vorbis_info) != 0) {
+ log_error("vorbis_analysis_init failed");
+ goto no_audio;
+ }
+ if (vorbis_block_init(&ctx->vorbis, &ctx->vorbis_block) != 0) {
+ log_error("vorbis_block_init failed");
+ goto no_audio;
+ }
+ vorbis_comment comments = {0};
+ vorbis_comment_init(&comments);
+ ogg_packet header_packets[3] = {0};
+ if (vorbis_analysis_headerout(&ctx->vorbis, &comments,
+ &header_packets[0], &header_packets[1], &header_packets[2]) != 0) {
+ log_error("vorbis_analysis_headerout failed");
+ goto no_audio;
+ }
+ vorbis_comment_clear(&comments);
+ for (int i = 0; i < 3; i++) {
+ if (!write_packet_to_stream(ctx, &ctx->audio_stream, &header_packets[i])) {
+ goto no_audio;
+ }
+ }
+ have_audio = true;
+no_audio:
atomic_store(&ctx->audio_head, 0);
ctx->recording = true;
ctx->start_time = get_time_double();
@@ -195,11 +240,6 @@ bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t
return true;
}
-// inverse of vp8_gptopts in https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/oggparsevp8.c
-// see also: https://github.com/FFmpeg/FFmpeg/blob/99e2af4e7837ca09b97d93a562dc12947179fc48/libavformat/oggenc.c#L671
-static uint64_t vp8_pts_to_gp(int64_t pts, bool is_key_frame) {
- return (uint64_t)pts << 32 | (uint64_t)!is_key_frame << 3;
-}
static void write_video_frame(VideoContext *ctx, vpx_image_t *image, int64_t pts) {
int err = vpx_codec_encode(&ctx->vpx, image, pts, 1, 0, 1000000 / (ctx->framerate * 2));
@@ -223,6 +263,31 @@ static void write_video_frame(VideoContext *ctx, vpx_image_t *image, int64_t pts
}
+static void write_audio_frame(VideoContext *ctx, int nsamples) {
+ int err = vorbis_analysis_wrote(&ctx->vorbis, nsamples);
+ if (err != 0) {
+ log_error("vorbis_analysis_wrote failed (error %d)", err);
+ }
+ while ((err = vorbis_analysis_blockout(&ctx->vorbis, &ctx->vorbis_block)) > 0) {
+ if ((err = vorbis_analysis(&ctx->vorbis_block, NULL)) != 0) {
+ log_error("vorbis_analysis failed (error %d)", err);
+ }
+ if ((err = vorbis_bitrate_addblock(&ctx->vorbis_block)) != 0) {
+ log_error("vorbis_bitrate_addblock failed (error %d)", err);
+ }
+ ogg_packet oggp;
+ while ((err = vorbis_bitrate_flushpacket(&ctx->vorbis, &oggp)) > 0) {
+ write_packet_to_stream(ctx, &ctx->audio_stream, &oggp);
+ }
+ if (err < 0) {
+ log_error("vorbis_bitrate_flushpacket failed (error %d)", err);
+ }
+ }
+ if (err < 0) {
+ log_error("vorbis_analysis_blockout failed (error %d)", err);
+ }
+}
+
bool video_submit_frame(VideoContext *ctx, Camera *camera) {
if (!ctx || !camera || !ctx->recording) return false;
double curr_time = get_time_double();
@@ -233,17 +298,16 @@ bool video_submit_frame(VideoContext *ctx, Camera *camera) {
uint32_t head = atomic_load_explicit(&ctx->audio_head, memory_order_relaxed);
uint32_t tail = atomic_load(&ctx->audio_tail);
while (true) {
- break;(void)tail;
- // TODO
- #if 0
- uint32_t nfloats = (uint32_t)ctx->audio_frame_samples * 2;
+ uint32_t audio_frame_samples = 1024; // value recommended by vorbis
+ uint32_t nfloats = (uint32_t)audio_frame_samples * 2;
bool frame_ready = false;
if (head + nfloats < AUDIO_QUEUE_SIZE) {
// easy case
frame_ready = head + nfloats <= tail || head > tail /* tail wrapped around */;
if (frame_ready) {
+ float **buffer = vorbis_analysis_buffer(&ctx->vorbis, audio_frame_samples);
for (uint32_t s = 0; s < nfloats; s++) {
- ((float *)ctx->audio_frame->data[s % 2])[s / 2] = ctx->audio_queue[head + s];
+ buffer[s % 2][s / 2] = ctx->audio_queue[head + s];
}
head += nfloats;
}
@@ -251,22 +315,22 @@ bool video_submit_frame(VideoContext *ctx, Camera *camera) {
// "wrap around" case
frame_ready = head + nfloats - AUDIO_QUEUE_SIZE <= tail && tail < head;
if (frame_ready) {
+ float **buffer = vorbis_analysis_buffer(&ctx->vorbis, audio_frame_samples);
for (uint32_t s = 0; s < AUDIO_QUEUE_SIZE - head; s++) {
- ((float *)ctx->audio_frame->data[s % 2])[s / 2] = ctx->audio_queue[head + s];
+ buffer[s % 2][s / 2] = ctx->audio_queue[head + s];
}
for (uint32_t s = 0; s < head + nfloats - AUDIO_QUEUE_SIZE; s++) {
uint32_t i = AUDIO_QUEUE_SIZE - head + s;
- ((float *)ctx->audio_frame->data[i % 2])[i / 2] = ctx->audio_queue[s];
+ buffer[i % 2][i / 2] = ctx->audio_queue[s];
}
head = head + nfloats - AUDIO_QUEUE_SIZE;
}
}
if (frame_ready) {
- write_frame(ctx, ctx->audio_encoder, ctx->audio_stream, ctx->audio_frame);
+ write_audio_frame(ctx, audio_frame_samples);
} else {
break;
}
- #endif
}
atomic_store(&ctx->audio_head, head);
}
@@ -308,7 +372,8 @@ void video_stop(VideoContext *ctx) {
.e_o_s = true,
};
write_packet_to_stream(ctx, &ctx->video_stream, &oggp);
- // TODO: flush audio encoder
+ // flush audio encoder
+ write_audio_frame(ctx, 0);
}
if (ctx->outfile) {
fclose(ctx->outfile);
@@ -323,6 +388,8 @@ void video_stop(VideoContext *ctx) {
ctx->vpx_image.planes[0] = NULL;
}
vorbis_dsp_clear(&ctx->vorbis);
+ vorbis_info_clear(&ctx->vorbis_info);
+ vorbis_block_clear(&ctx->vorbis_block);
ogg_stream_clear(&ctx->video_stream);
ogg_stream_clear(&ctx->audio_stream);
}