summaryrefslogtreecommitdiff
path: root/video.c
diff options
context:
space:
mode:
Diffstat (limited to 'video.c')
-rw-r--r--video.c346
1 files changed, 152 insertions, 194 deletions
diff --git a/video.c b/video.c
index 166ccee..aa033c0 100644
--- a/video.c
+++ b/video.c
@@ -2,10 +2,11 @@
#include <stdatomic.h>
#include <threads.h>
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
#include <SDL.h>
#include <unistd.h>
+#include <ogg/ogg.h>
+#include <vorbis/vorbisenc.h>
+#include <vpx/vp8cx.h>
#include "log.h"
#include "camera.h"
@@ -15,18 +16,16 @@
struct VideoContext {
double start_time;
- AVFormatContext *avf_context;
- AVCodecContext *video_encoder;
- AVCodecContext *audio_encoder;
- AVFrame *video_frame;
- AVFrame *audio_frame;
int audio_frame_samples;
- AVPacket *av_packet;
- AVStream *video_stream;
- AVStream *audio_stream;
+ ogg_stream_state video_stream, audio_stream;
+ vorbis_dsp_state vorbis;
+ vpx_codec_ctx_t vpx;
+ vpx_image_t vpx_image;
int64_t next_video_pts;
- int64_t next_audio_pts;
+ int64_t video_packetno;
+ int framerate;
SDL_AudioDeviceID audio_device;
+ FILE *outfile;
bool recording;
// ring buffer of audio data.
float audio_queue[AUDIO_QUEUE_SIZE];
@@ -70,7 +69,6 @@ VideoContext *video_init(void) {
if (!ctx) return NULL;
atomic_init(&ctx->audio_head, 0);
atomic_init(&ctx->audio_tail, 0);
-
SDL_AudioSpec desired = {
.channels = 2,
.freq = 44100,
@@ -86,143 +84,110 @@ VideoContext *video_init(void) {
return ctx;
}
-bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t height, int fps, int quality) {
- if (!ctx) return false;
- if (ctx->recording) {
- return true;
- }
- video_stop(ctx);
- bool have_audio = false;
- int err = avformat_alloc_output_context2(&ctx->avf_context, NULL, NULL, filename);
- if (!ctx->avf_context) {
- log_error("avformat_alloc_output_context2 \"%s\": %s", filename, av_err2str(err));
+static bool write_packet_to_stream(VideoContext *ctx, ogg_stream_state *stream, ogg_packet *packet) {
+ if (ogg_stream_packetin(stream, packet) != 0) {
+ log_error("ogg_stream_packetin failed");
return false;
}
- const AVOutputFormat *fmt = ctx->avf_context->oformat;
- const AVCodec *video_codec = avcodec_find_encoder(fmt->video_codec);
- if (!video_codec) {
- log_error("couldn't find encoder for video codec %s", avcodec_get_name(fmt->video_codec));
- return false;
+ ogg_page page;
+ while (ogg_stream_pageout(stream, &page) != 0) {
+ fwrite(page.header, 1, page.header_len, ctx->outfile);
+ fwrite(page.body, 1, page.body_len, ctx->outfile);
}
- ctx->video_stream = avformat_new_stream(ctx->avf_context, NULL);
- if (!ctx->video_stream) {
- log_error("avformat_new_stream (audio): %s", av_err2str(err));
+ if (ferror(ctx->outfile)) {
+ log_error("error writing video output");
return false;
}
- ctx->video_stream->id = 0;
- ctx->video_encoder = avcodec_alloc_context3(video_codec);
- if (!ctx->video_encoder) {
- log_error("couldn't create video encoding context for codec %s", avcodec_get_name(fmt->video_codec));
- return false;
- }
- ctx->av_packet = av_packet_alloc();
- if (!ctx->av_packet) {
- log_error("couldn't allocate video packet");
- return false;
+ return true;
+}
+
+bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t height, int fps, int quality) {
+ if (!ctx) return false;
+ if (ctx->recording) {
+ return true;
}
- ctx->video_encoder->codec_id = fmt->video_codec;
- ctx->video_encoder->bit_rate = (int64_t)quality * width * height;
- ctx->video_encoder->width = width;
- ctx->video_encoder->height = height;
- ctx->video_encoder->time_base = ctx->video_stream->time_base = (AVRational){1, fps};
- ctx->video_encoder->gop_size = 12;
- ctx->video_encoder->pix_fmt = AV_PIX_FMT_YUV420P;
- if (ctx->avf_context->oformat->flags & AVFMT_GLOBALHEADER)
- ctx->video_encoder->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
- err = avcodec_open2(ctx->video_encoder, video_codec, NULL);
- if (err < 0) {
- log_error("avcodec_open2 for video codec %s: %s", avcodec_get_name(fmt->video_codec), av_err2str(err));
- return false;
+ video_stop(ctx);
+ ctx->framerate = fps;
+ ctx->outfile = fopen(filename, "wb");
+ if (!ctx->outfile) {
+ log_perror("couldn't create %s", filename);
}
- err = avcodec_parameters_from_context(ctx->video_stream->codecpar, ctx->video_encoder);
- if (err < 0) {
- log_error("avcodec_parameters_from_context for video codec %s: %s", avcodec_get_name(fmt->video_codec), av_err2str(err));
+ struct timespec ts = {1, 1};
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ int serial_number = (int)((int32_t)ts.tv_nsec + 1000000000 * ((int32_t)ts.tv_sec % 2));
+ if (ogg_stream_init(&ctx->video_stream, serial_number) < 0) {
+ log_error("ogg_stream_init(video_stream) failed");
return false;
}
- ctx->video_frame = av_frame_alloc();
- if (!ctx->video_frame) {
- log_error("couldn't allocate video frame");
+ vpx_codec_enc_cfg_t cfg = {0};
+ // NOTE: vp9 encoder seems to be much slower
+ vpx_codec_iface_t *vp8 = vpx_codec_vp8_cx();
+ int err = vpx_codec_enc_config_default(vp8, &cfg, 0);
+ if (err != 0) {
+ log_error("vpx_codec_enc_config_default: %s", vpx_codec_err_to_string(err));
return false;
}
- ctx->video_frame->format = AV_PIX_FMT_YUV420P;
- ctx->video_frame->width = ctx->video_encoder->width;
- ctx->video_frame->height = ctx->video_encoder->height;
- err = av_frame_get_buffer(ctx->video_frame, 0);
- if (err < 0) {
- log_error("av_frame_get_buffer for video: %s", av_err2str(err));
+ cfg.g_w = width;
+ cfg.g_h = height;
+ cfg.g_timebase.num = 1;
+ cfg.g_timebase.den = fps;
+ cfg.rc_target_bitrate = (unsigned)quality * (unsigned)width * (unsigned)height;
+ err = vpx_codec_enc_init(&ctx->vpx, vp8, &cfg, 0);
+ if (err != 0) {
+ log_error("vpx_codec_enc_init: %s", vpx_codec_err_to_string(err));
return false;
}
- err = avio_open(&ctx->avf_context->pb, filename, AVIO_FLAG_WRITE);
- if (err < 0) {
- log_error("avio_open \"%s\": %s", filename, av_err2str(err));
+ if (!vpx_img_alloc(&ctx->vpx_image, VPX_IMG_FMT_I420, width, height, 1)) {
+ log_error("couldn't allocate VPX image");
return false;
}
- const AVCodec *audio_codec = avcodec_find_encoder(fmt->audio_codec);
- if (!audio_codec) {
- log_error("avcodec_find_encoder for audio codec %s: %s", avcodec_get_name(fmt->audio_codec), av_err2str(err));
- goto no_audio;
- }
- ctx->audio_encoder = avcodec_alloc_context3(audio_codec);
- if (!ctx->audio_encoder) {
- log_error("avcodec_alloc_context3 for audio codec %s: %s", avcodec_get_name(fmt->audio_codec), av_err2str(err));
- goto no_audio;
- }
- // only FLTP is supported by AAC encoder
- ctx->audio_encoder->sample_fmt = AV_SAMPLE_FMT_FLTP;
- ctx->audio_encoder->bit_rate = (int64_t)192 * 1024;
- ctx->audio_encoder->sample_rate = 44100;
- static const AVChannelLayout channel_layout = AV_CHANNEL_LAYOUT_STEREO;
- av_channel_layout_copy(&ctx->audio_encoder->ch_layout, &channel_layout);
- if (ctx->avf_context->oformat->flags & AVFMT_GLOBALHEADER)
- ctx->audio_encoder->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
- err = avcodec_open2(ctx->audio_encoder, audio_codec, NULL);
- if (err < 0) {
- log_error("avcodec_open2 for audio codec %s: %s", avcodec_get_name(fmt->audio_codec), av_err2str(err));
- goto no_audio;
- }
- ctx->audio_frame_samples = audio_codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE
- ? 4096
- : ctx->audio_encoder->frame_size;
- ctx->audio_frame = av_frame_alloc();
- if (!ctx->audio_frame) {
- log_error("couldn't allocate audio frame");
- goto no_audio;
- }
- ctx->audio_frame->format = AV_SAMPLE_FMT_FLTP;
- av_channel_layout_copy(&ctx->audio_frame->ch_layout, &channel_layout);
- ctx->audio_frame->sample_rate = 44100;
- ctx->audio_frame->nb_samples = ctx->audio_frame_samples;
- err = av_frame_get_buffer(ctx->audio_frame, 0);
- if (err < 0) {
- log_error("av_frame_get_buffer (audio): %s", av_err2str(err));
- goto no_audio;
- }
-
- // create stream last so that if stuff above fails we don't have a broken stream in the avformat context
- ctx->audio_stream = avformat_new_stream(ctx->avf_context, audio_codec);
- if (!ctx->audio_stream) {
- log_error("avformat_new_stream (audio): %s", av_err2str(err));
- goto no_audio;
- }
- ctx->audio_stream->id = 1;
- ctx->audio_stream->time_base = (AVRational){1, 44100};
- err = avcodec_parameters_from_context(ctx->audio_stream->codecpar, ctx->audio_encoder);
- if (err < 0) {
- log_error("avcodec_parameters_from_context (audio): %s", av_err2str(err));
- goto no_audio;
- }
- have_audio = true;
-no_audio:
- err = avformat_write_header(ctx->avf_context, NULL);
- if (err < 0) {
- log_error("avformat_write_header: %s", av_err2str(err));
+ // I can't find any documentation of OggVP8
+ // This was pieced together from ogg_build_vp8_headers in
+ // https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/oggenc.c
+ typedef struct {
+ char magic[5];
+ uint8_t stream_type;
+ uint8_t version[2];
+ // doesn't seem very forwards-thinking to have these be 16-bit. oh well.
+ uint16_t width;
+ uint16_t height;
+ uint8_t sample_aspect_ratio_num[3];
+ uint8_t sample_aspect_ratio_den[3];
+ // not aligned to 4 bytes ):
+ uint16_t framerate_num_hi;
+ uint16_t framerate_num_lo;
+ uint16_t framerate_den_hi;
+ uint16_t framerate_den_lo;
+ } OggVP8Header;
+ if (width > UINT16_MAX || height > UINT16_MAX) {
+ log_error("video resolution too high");
return false;
}
+ OggVP8Header header = {
+ .magic = "OVP80",
+ .stream_type = 1,
+ .version = {1, 0},
+ // big-endian for some reason....
+ .width = SDL_SwapBE16((uint16_t)width),
+ .height = SDL_SwapBE16((uint16_t)height),
+ .sample_aspect_ratio_num = {0, 0, 1},
+ .sample_aspect_ratio_den = {0, 0, 1},
+ .framerate_num_lo = SDL_SwapBE16((uint16_t)fps),
+ .framerate_den_lo = SDL_SwapBE16(1),
+ };
+ ogg_packet packet = {
+ .packet = (uint8_t *)&header,
+ .bytes = sizeof header,
+ .b_o_s = true,
+ .e_o_s = false,
+ };
+ write_packet_to_stream(ctx, &ctx->video_stream, &packet);
+ bool have_audio = false;
atomic_store(&ctx->audio_head, 0);
- ctx->next_video_pts = 0;
- ctx->next_audio_pts = 0;
ctx->recording = true;
ctx->start_time = get_time_double();
+ ctx->next_video_pts = 0;
+ ctx->video_packetno = 1;
if (have_audio) {
// start recording audio
SDL_PauseAudioDevice(ctx->audio_device, 0);
@@ -230,33 +195,34 @@ no_audio:
return true;
}
+// inverse of vp8_gptopts in https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/oggparsevp8.c
+// see also: https://github.com/FFmpeg/FFmpeg/blob/99e2af4e7837ca09b97d93a562dc12947179fc48/libavformat/oggenc.c#L671
+static uint64_t vp8_pts_to_gp(int64_t pts, bool is_key_frame) {
+ return (uint64_t)pts << 32 | (uint64_t)!is_key_frame << 3;
+}
-static bool write_frame(VideoContext *ctx, AVCodecContext *encoder, AVStream *stream, AVFrame *frame) {
- int err = avcodec_send_frame(encoder, frame);
- if (err < 0) {
- log_error("avcodec_send_frame (stream %d): %s", stream->index, av_err2str(err));
- return false;
+static void write_video_frame(VideoContext *ctx, vpx_image_t *image, int64_t pts) {
+ int err = vpx_codec_encode(&ctx->vpx, image, pts, 1, 0, 1000000 / (ctx->framerate * 2));
+ if (err != 0) {
+ log_error("vpx_codec_encode: %s", vpx_codec_err_to_string(err));
}
- while (true) {
- err = avcodec_receive_packet(encoder, ctx->av_packet);
- if (err == AVERROR(EAGAIN) || err == AVERROR_EOF) {
- break;
- }
- if (err < 0) {
- log_error("avcodec_receive_packet (stream %d): %s", stream->index, av_err2str(err));
- return false;
- }
- ctx->av_packet->stream_index = stream->index;
- av_packet_rescale_ts(ctx->av_packet, encoder->time_base, stream->time_base);
- err = av_interleaved_write_frame(ctx->avf_context, ctx->av_packet);
- if (err < 0) {
- log_error("av_interleaved_write_frame (stream %d): %s", stream->index, av_err2str(err));
- return false;
- }
+ const vpx_codec_cx_pkt_t *pkt = NULL;
+ vpx_codec_iter_t iter = NULL;
+ while ((pkt = vpx_codec_get_cx_data(&ctx->vpx, &iter))) {
+ if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) continue;
+ ogg_packet oggp = {
+ .packet = pkt->data.frame.buf,
+ .bytes = pkt->data.frame.sz,
+ .granulepos = vp8_pts_to_gp(pkt->data.frame.pts, pkt->data.frame.flags & VPX_FRAME_IS_KEY),
+ .b_o_s = false,
+ .packetno = ctx->video_packetno++,
+ .e_o_s = false,
+ };
+ write_packet_to_stream(ctx, &ctx->video_stream, &oggp);
}
- return true;
}
+
bool video_submit_frame(VideoContext *ctx, Camera *camera) {
if (!ctx || !camera || !ctx->recording) return false;
double curr_time = get_time_double();
@@ -267,12 +233,9 @@ bool video_submit_frame(VideoContext *ctx, Camera *camera) {
uint32_t head = atomic_load_explicit(&ctx->audio_head, memory_order_relaxed);
uint32_t tail = atomic_load(&ctx->audio_tail);
while (true) {
- int err = av_frame_make_writable(ctx->audio_frame);
- if (err < 0) {
- log_error("av_frame_make_writable (video): %s", av_err2str(err));
- break;
- }
- ctx->audio_frame->pts = ctx->next_audio_pts;
+ break;(void)tail;
+ // TODO
+ #if 0
uint32_t nfloats = (uint32_t)ctx->audio_frame_samples * 2;
bool frame_ready = false;
if (head + nfloats < AUDIO_QUEUE_SIZE) {
@@ -299,28 +262,21 @@ bool video_submit_frame(VideoContext *ctx, Camera *camera) {
}
}
if (frame_ready) {
- ctx->next_audio_pts += ctx->audio_frame_samples;
write_frame(ctx, ctx->audio_encoder, ctx->audio_stream, ctx->audio_frame);
} else {
break;
}
+ #endif
}
atomic_store(&ctx->audio_head, head);
}
// process video
- int64_t video_pts = (int64_t)(time_since_start
- * ctx->video_encoder->time_base.den
- / ctx->video_encoder->time_base.num);
- if (video_pts >= ctx->next_video_pts) {
- int err = av_frame_make_writable(ctx->video_frame);
- if (err < 0) {
- log_error("av_frame_make_writable (audio): %s", av_err2str(err));
- return false;
+ int64_t pts = (int64_t)(time_since_start * ctx->framerate);
+ if (pts >= ctx->next_video_pts) {
+ if (camera_copy_to_vpx_image(camera, &ctx->vpx_image)) {
+ write_video_frame(ctx, &ctx->vpx_image, pts);
}
- ctx->video_frame->pts = video_pts;
- camera_copy_to_av_frame(camera, ctx->video_frame);
- write_frame(ctx, ctx->video_encoder, ctx->video_stream, ctx->video_frame);
- ctx->next_video_pts = video_pts + 1;
+ ctx->next_video_pts = pts + 1;
}
return true;
}
@@ -341,32 +297,34 @@ void video_stop(VideoContext *ctx) {
atomic_store(&ctx->audio_tail, 0);
ctx->recording = false;
// flush video encoder
- write_frame(ctx, ctx->video_encoder, ctx->video_stream, NULL);
- // flush audio encoder
- write_frame(ctx, ctx->audio_encoder, ctx->audio_stream, NULL);
- int err = av_write_trailer(ctx->avf_context);
- if (err < 0) {
- log_error("av_write_trailer: %s", av_err2str(err));
- }
- avio_closep(&ctx->avf_context->pb);
+ write_video_frame(ctx, NULL, -1);
+ // finish video stream
+ ogg_packet oggp = {
+ .packet = NULL,
+ .bytes = 0,
+ .granulepos = vp8_pts_to_gp(ctx->next_video_pts, false),
+ .b_o_s = false,
+ .packetno = ctx->video_packetno++,
+ .e_o_s = true,
+ };
+ write_packet_to_stream(ctx, &ctx->video_stream, &oggp);
+ // TODO: flush audio encoder
}
- if (ctx->video_encoder)
- avcodec_free_context(&ctx->video_encoder);
- if (ctx->audio_encoder)
- avcodec_free_context(&ctx->audio_encoder);
- if (ctx->video_frame)
- av_frame_free(&ctx->video_frame);
- if (ctx->audio_frame)
- av_frame_free(&ctx->audio_frame);
- if (ctx->avf_context) {
- if (ctx->avf_context->pb) {
- avio_closep(&ctx->avf_context->pb);
- }
- avformat_free_context(ctx->avf_context);
- ctx->avf_context = NULL;
+ if (ctx->outfile) {
+ fclose(ctx->outfile);
+ ctx->outfile = NULL;
+ }
+ if (ctx->vpx.iface) {
+ vpx_codec_destroy(&ctx->vpx);
+ ctx->vpx.iface = NULL;
+ }
+ if (ctx->vpx_image.planes[0]) {
+ vpx_img_free(&ctx->vpx_image);
+ ctx->vpx_image.planes[0] = NULL;
}
- if (ctx->av_packet)
- av_packet_free(&ctx->av_packet);
+ vorbis_dsp_clear(&ctx->vorbis);
+ ogg_stream_clear(&ctx->video_stream);
+ ogg_stream_clear(&ctx->audio_stream);
}
void video_quit(VideoContext *ctx) {