ogg vp8!

author: pommicket <pommicket@gmail.com> 2025-02-26 14:37:44 -0500
committer: pommicket <pommicket@gmail.com> 2025-02-26 19:07:21 -0500
commit: 849700a4499ae0c77b7c87c2d022fb18b0daf4f9 (patch)
tree: 1ad7dbe2d4a50a8853193091560a2d93832e224c
parent: b263baec1d18376f81c20b76220e329af5e2f0df (diff)
6 files changed, 177 insertions, 275 deletions
diff --git a/README.md b/README.md
index 4a6e4c7..7dbe4cd 100644
--- a/README.md
+++ b/README.md
@@ -20,9 +20,6 @@ If you want automatic updates, you can also
 sudo apt install camlet
 ```
 
-Unfortunately this will probably only work on Debian stable because ffmpeg likes to break
-their libraries every few months by changing the .so version.
-
 If you are on a non-Debian-based Linux
 distribution, you will have to build from source (see instructions below).
 
@@ -38,12 +35,12 @@ distribution, you will have to build from source (see instructions below).
 ## Building from source
 
 camlet requires meson-build, a C compiler, and the development libraries
-for SDL2, SDL2\_ttf, GL (headers only), v4l2, udev, sodium, jpeglib (from IJG), avcodec, avformat, and fontconfig.
+for SDL2, SDL2\_ttf, GL (headers only), v4l2, udev, sodium, jpeglib (from IJG), ogg, vorbis, theora, and fontconfig.
 
 These can all be installed on Debian/Ubuntu with
 
 ```sh
-sudo apt install clang meson libv4l-dev libudev-dev libsodium-dev libfontconfig-dev libgl-dev libsdl2-dev libsdl2-ttf-dev libjpeg-dev libavcodec-dev libavformat-dev
+sudo apt install clang meson libv4l-dev libudev-dev libsodium-dev libfontconfig-dev libgl-dev libsdl2-dev libsdl2-ttf-dev libjpeg-dev libvorbis-dev libogg-dev libvpx-dev
 ```
 
 You can build the debug version of camlet with `make` (outputs `camlet.debug`), the release
diff --git a/camera.c b/camera.c
index dcee6f3..64303f2 100644
--- a/camera.c
+++ b/camera.c
@@ -8,10 +8,12 @@
 #include <fcntl.h>
 #include <time.h>
 #include <tgmath.h>
+#include <errno.h>
+#include <theora/theoraenc.h>
 #include "3rd_party/stb_image_write.h"
 #include <jpeglib.h>
-#include <libavcodec/avcodec.h>
 #include "log.h"
+#include <vpx/vp8cx.h>
 
 #define CAMERA_MAX_BUFFERS 4
 struct Camera {
@@ -39,7 +41,6 @@ struct Camera {
 	uint64_t *framerates_supported;
 	size_t mmap_size[CAMERA_MAX_BUFFERS];
 	uint8_t *mmap_frames[CAMERA_MAX_BUFFERS];
-	uint8_t *userp_frames[CAMERA_MAX_BUFFERS];
 	// buffer used for jpeg decompression and format conversion for saving images
 	// should always be big enough for a 8bpc RGB frame
 	uint8_t *decompression_buf;
@@ -276,40 +277,6 @@ PictureFormat camera_closest_picfmt(Camera *camera, PictureFormat desired) {
 	return best_format;
 }
 
-static bool camera_setup_with_userp(Camera *camera) {
-	camera->access_method = CAMERA_ACCESS_USERP;
-	return false;
-/*
-TODO: test me with a camera that supports userptr i/o
-	struct v4l2_requestbuffers req = {0};
-	camera->streaming = true;
-	req.count = CAMERA_MAX_BUFFERS;
-	req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	req.memory = V4L2_MEMORY_USERPTR;
-	if (v4l2_ioctl(camera->fd, VIDIOC_REQBUFS, &req) != 0) {
-		log_perror("v4l2_ioctl VIDIOC_REQBUFS");
-		return false;
-	}
-	for (int i = 0; i < CAMERA_MAX_BUFFERS; i++) {
-		camera->userp_frames[i] = calloc(1, camera->curr_format.fmt.pix.sizeimage);
-		struct v4l2_buffer buf = {0};
-		buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-		buf.memory = V4L2_MEMORY_USERPTR;
-		buf.index = i;
-		buf.m.userptr = (unsigned long)camera->userp_frames[i];
-		buf.length = camera->curr_format.fmt.pix.sizeimage;
-		if (v4l2_ioctl(camera->fd, VIDIOC_QBUF, &buf) != 0) {
-			log_perror("v4l2_ioctl VIDIOC_QBUF");
-		}
-	}
-	if (v4l2_ioctl(camera->fd,
-		VIDIOC_STREAMON,
-		(enum v4l2_buf_type[1]) { V4L2_BUF_TYPE_VIDEO_CAPTURE }) != 0) {
-		log_perror("v4l2_ioctl VIDIOC_STREAMON");
-		return false;
-	}
-	return true;*/
-}
 static bool camera_stop_io(Camera *camera) {
 	if (!camera->streaming)
 		return true;
@@ -353,10 +320,8 @@ static uint8_t *camera_curr_frame(Camera *camera) {
 		return NULL;
 	if (camera->read_frame)
 		return camera->read_frame;
-	if (camera->mmap_frames[camera->curr_frame_idx])
-		return camera->mmap_frames[camera->curr_frame_idx];
-	assert(camera->userp_frames[camera->curr_frame_idx]);
-	return camera->userp_frames[camera->curr_frame_idx];
+	assert(camera->mmap_frames[camera->curr_frame_idx]);
+	return camera->mmap_frames[camera->curr_frame_idx];
 }
 
 static float clampf(float x, float min, float max) {
@@ -548,13 +513,8 @@ bool camera_next_frame(Camera *camera) {
 		camera->frame_bytes_set = v4l2_read(camera->fd, camera->read_frame, camera->curr_format.fmt.pix.sizeimage);
 		camera->any_frames = true;
 		break;
-	case CAMERA_ACCESS_MMAP:
+	case CAMERA_ACCESS_MMAP: {
 		memory = V4L2_MEMORY_MMAP;
-		goto buf;
-	case CAMERA_ACCESS_USERP:
-		memory = V4L2_MEMORY_USERPTR;
-		goto buf;
-	buf: {
 		if (camera->frame_buffer.type) {
 			// queue back in previous buffer
 			v4l2_ioctl(camera->fd, VIDIOC_QBUF, &camera->frame_buffer);
@@ -746,8 +706,6 @@ void camera_close(Camera *camera) {
 			v4l2_munmap(camera->mmap_frames[i], camera->mmap_size[i]);
 			camera->mmap_frames[i] = NULL;
 		}
-		free(camera->userp_frames[i]);
-		camera->userp_frames[i] = NULL;
 	}
 	if (camera->fd >= 0) {
 		if (camera->streaming) {
@@ -898,12 +856,6 @@ bool camera_set_format(Camera *camera, PictureFormat picfmt, int desired_framera
 		camera_stop_io(camera);
 		// try read instead
 		return camera_setup_with_read(camera);
-	case CAMERA_ACCESS_USERP:
-		if (camera_setup_with_userp(camera))
-			return true;
-		camera_stop_io(camera);
-		// try read instead
-		return camera_setup_with_read(camera);
 	default:
 		#if DEBUG
 		assert(false);
@@ -918,7 +870,6 @@ bool camera_open(Camera *camera, PictureFormat desired_format, int desired_frame
 	// camera should not already be open
 	assert(!camera->read_frame);
 	assert(!camera->mmap_frames[0]);
-	assert(!camera->userp_frames[0]);
 	camera->fd = v4l2_open(camera->devnode, O_RDWR | O_CLOEXEC);
 	if (camera->fd < 0) {
 		log_perror("v4l2_open \"%s\"", camera->devnode);
@@ -1100,15 +1051,15 @@ const char *camera_devnode(Camera *camera) {
 	return camera->devnode;
 }
 
-bool camera_copy_to_av_frame(Camera *camera, struct AVFrame *frame_out) {
+bool camera_copy_to_vpx_image(Camera *camera, struct vpx_image *frame_out) {
 	uint8_t *frame_in = camera_curr_frame(camera);
 	int32_t frame_width = camera_frame_width(camera);
 	int32_t frame_height = camera_frame_height(camera);
 	if (!frame_in
-		|| frame_width != frame_out->width
-		|| frame_height != frame_out->height
+		|| frame_width != (int32_t)frame_out->w
+		|| frame_height != (int32_t)frame_out->h
 		|| camera_pixel_format(camera) != V4L2_PIX_FMT_YUV420
-		|| frame_out->format != AV_PIX_FMT_YUV420P) {
+		|| frame_out->fmt != VPX_IMG_FMT_I420) {
 		static atomic_flag warned = ATOMIC_FLAG_INIT;
 		if (!atomic_flag_test_and_set_explicit(&warned, memory_order_relaxed)) {
 			log_error("%s: Bad picture format.", __func__);
@@ -1117,20 +1068,20 @@ bool camera_copy_to_av_frame(Camera *camera, struct AVFrame *frame_out) {
 	}
 	// copy Y plane
 	for (int64_t y = 0; y < frame_height; y++) {
-		memcpy(&frame_out->data[0][y * frame_out->linesize[0]],
+		memcpy(&frame_out->planes[0][y * frame_out->stride[0]],
 			&frame_in[y * frame_width], frame_width);
 	}
 	// copy Cb plane
 	int64_t cb_offset = (int64_t)frame_width * frame_height;
 	for (int64_t y = 0; y < frame_height / 2; y++) {
-		memcpy(&frame_out->data[1][y * frame_out->linesize[1]],
+		memcpy(&frame_out->planes[1][y * frame_out->stride[1]],
 			&frame_in[cb_offset + y * (frame_width / 2)],
 			frame_width / 2);
 	}
 	// copy Cr plane
 	int64_t cr_offset = cb_offset + (int64_t)frame_width / 2 * frame_height / 2;
 	for (int64_t y = 0; y < frame_height / 2; y++) {
-		memcpy(&frame_out->data[2][y * frame_out->linesize[2]],
+		memcpy(&frame_out->planes[2][y * frame_out->stride[2]],
 			&frame_in[cr_offset + y * (frame_width / 2)],
 			frame_width / 2);
 	}
diff --git a/camera.h b/camera.h
index 71f4788..da19b1e 100644
--- a/camera.h
+++ b/camera.h
@@ -6,7 +6,8 @@
 #include <stddef.h>
 #include <string.h>
 #include <GL/glcorearb.h>
-struct AVFrame;
+
+struct vpx_image;
 
 typedef uint32_t PixelFormat;
 typedef struct Camera Camera;
@@ -28,8 +29,6 @@ typedef enum {
 	CAMERA_ACCESS_MMAP,
 	// access camera via read calls
 	CAMERA_ACCESS_READ,
-	// access camera via user-pointer streaming
-	CAMERA_ACCESS_USERP,
 } CameraAccessMethod;
 
 /// macro trickery to avoid having to write every GL function multiple times
@@ -135,10 +134,7 @@ bool camera_open(Camera *camera, PictureFormat desired_format, int desired_frame
 Hash camera_hash(Camera *camera);
 void camera_hash_str(Camera *camera, char str[HASH_STR_SIZE]);
 bool camera_set_format(Camera *camera, PictureFormat picfmt, int desired_framerate, CameraAccessMethod access, bool force);
-/// Copy current frame from camera to AVFrame.
-///
-/// Returns `true` on success. Currently only works if both the camera and the AVFrame are in the YUV420 format.
-bool camera_copy_to_av_frame(Camera *camera, struct AVFrame *frame);
+bool camera_copy_to_vpx_image(Camera *camera, struct vpx_image *image);
 void camera_free(Camera *camera);
 
 #endif
diff --git a/main.c b/main.c
index 4fc4a8f..6c2705e 100644
--- a/main.c
+++ b/main.c
@@ -837,7 +837,7 @@ static bool take_picture(State *state) {
 			// add nanoseconds as well
 			snprintf(path + strlen(path), sizeof path - strlen(path), "-%lu", (unsigned long)ts.tv_nsec);
 		}
-		const char *extension = state->mode == MODE_VIDEO ? "mp4" : image_format_extensions[settings->image_format];
+		const char *extension = state->mode == MODE_VIDEO ? "ogv" : image_format_extensions[settings->image_format];
 		snprintf(path + strlen(path), sizeof path - strlen(path), ".%s", extension);
 		int fd = open(path, O_EXCL | O_CREAT, 0644);
 		if (fd == -1 && errno == EEXIST) {
diff --git a/meson.build b/meson.build
index 37d9474..2e4ee93 100644
--- a/meson.build
+++ b/meson.build
@@ -13,11 +13,11 @@ gl = dependency('GL')
 sodium = dependency('libsodium')
 fontconfig = dependency('fontconfig')
 jpeg = dependency('libjpeg')
-avcodec = dependency('libavcodec')
-avformat = dependency('libavformat')
-avutil = dependency('libavutil')
-pulse = dependency('libpulse')
-pulse_simple = dependency('libpulse-simple')
+ogg = dependency('ogg')
+vorbis = dependency('vorbis')
+vorbisenc = dependency('vorbisenc')
+vpx = dependency('vpx')
+
 m_dep = cc.find_library('m', required: false)
 if m_dep.found()
 	add_project_link_arguments('-lm', language: 'c')
@@ -28,5 +28,5 @@ else
 	debug_def = '-DDEBUG=0'
 endif
 executable('camlet', 'main.c', 'camera.c', 'video.c', 'log.c', 'camlet_icon.c', '3rd_party/stb_image_write.c',
-	dependencies: [v4l2, sdl2, sdl2_ttf, gl, udev, sodium, fontconfig, jpeg, avcodec, avformat, avutil, pulse, pulse_simple],
+	dependencies: [v4l2, sdl2, sdl2_ttf, gl, udev, sodium, fontconfig, jpeg, ogg, vorbis, vorbisenc, vpx],
 	c_args: ['-Wno-unused-function', '-Wno-format-truncation', '-Wshadow', debug_def])
diff --git a/video.c b/video.c
index 166ccee..aa033c0 100644
--- a/video.c
+++ b/video.c
@@ -2,10 +2,11 @@
 
 #include <stdatomic.h>
 #include <threads.h>
-#include <libavcodec/avcodec.h>
-#include <libavformat/avformat.h>
 #include <SDL.h>
 #include <unistd.h>
+#include <ogg/ogg.h>
+#include <vorbis/vorbisenc.h>
+#include <vpx/vp8cx.h>
 
 #include "log.h"
 #include "camera.h"
@@ -15,18 +16,16 @@
 
 struct VideoContext {
 	double start_time;
-	AVFormatContext *avf_context;
-	AVCodecContext *video_encoder;
-	AVCodecContext *audio_encoder;
-	AVFrame *video_frame;
-	AVFrame *audio_frame;
 	int audio_frame_samples;
-	AVPacket *av_packet;
-	AVStream *video_stream;
-	AVStream *audio_stream;
+	ogg_stream_state video_stream, audio_stream;
+	vorbis_dsp_state vorbis;
+	vpx_codec_ctx_t vpx;
+	vpx_image_t vpx_image;
 	int64_t next_video_pts;
-	int64_t next_audio_pts;
+	int64_t video_packetno;
+	int framerate;
 	SDL_AudioDeviceID audio_device;
+	FILE *outfile;
 	bool recording;
 	// ring buffer of audio data.
 	float audio_queue[AUDIO_QUEUE_SIZE];
@@ -70,7 +69,6 @@ VideoContext *video_init(void) {
 	if (!ctx) return NULL;
 	atomic_init(&ctx->audio_head, 0);
 	atomic_init(&ctx->audio_tail, 0);
-	
 	SDL_AudioSpec desired = {
 		.channels = 2,
 		.freq = 44100,
@@ -86,143 +84,110 @@ VideoContext *video_init(void) {
 	return ctx;
 }
 
-bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t height, int fps, int quality) {
-	if (!ctx) return false;
-	if (ctx->recording) {
-		return true;
-	}
-	video_stop(ctx);
-	bool have_audio = false;
-	int err = avformat_alloc_output_context2(&ctx->avf_context, NULL, NULL, filename);
-	if (!ctx->avf_context) {
-		log_error("avformat_alloc_output_context2 \"%s\": %s", filename, av_err2str(err));
+static bool write_packet_to_stream(VideoContext *ctx, ogg_stream_state *stream, ogg_packet *packet) {
+	if (ogg_stream_packetin(stream, packet) != 0) {
+		log_error("ogg_stream_packetin failed");
 		return false;
 	}
-	const AVOutputFormat *fmt = ctx->avf_context->oformat;
-	const AVCodec *video_codec = avcodec_find_encoder(fmt->video_codec);
-	if (!video_codec) {
-		log_error("couldn't find encoder for video codec %s", avcodec_get_name(fmt->video_codec));
-		return false;
+	ogg_page page;
+	while (ogg_stream_pageout(stream, &page) != 0) {
+		fwrite(page.header, 1, page.header_len, ctx->outfile);
+		fwrite(page.body, 1, page.body_len, ctx->outfile);
 	}
-	ctx->video_stream = avformat_new_stream(ctx->avf_context, NULL);
-	if (!ctx->video_stream) {
-		log_error("avformat_new_stream (audio): %s", av_err2str(err));
+	if (ferror(ctx->outfile)) {
+		log_error("error writing video output");
 		return false;
 	}
-	ctx->video_stream->id = 0;
-	ctx->video_encoder = avcodec_alloc_context3(video_codec);
-	if (!ctx->video_encoder) {
-		log_error("couldn't create video encoding context for codec %s", avcodec_get_name(fmt->video_codec));
-		return false;
-	}
-	ctx->av_packet = av_packet_alloc();
-	if (!ctx->av_packet) {
-		log_error("couldn't allocate video packet");
-		return false;
+	return true;
+}
+
+bool video_start(VideoContext *ctx, const char *filename, int32_t width, int32_t height, int fps, int quality) {
+	if (!ctx) return false;
+	if (ctx->recording) {
+		return true;
 	}
-	ctx->video_encoder->codec_id = fmt->video_codec;
-	ctx->video_encoder->bit_rate = (int64_t)quality * width * height;
-	ctx->video_encoder->width = width;
-	ctx->video_encoder->height = height;
-	ctx->video_encoder->time_base = ctx->video_stream->time_base = (AVRational){1, fps};
-	ctx->video_encoder->gop_size = 12;
-	ctx->video_encoder->pix_fmt = AV_PIX_FMT_YUV420P;
-	if (ctx->avf_context->oformat->flags & AVFMT_GLOBALHEADER)
-		ctx->video_encoder->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
-	err = avcodec_open2(ctx->video_encoder, video_codec, NULL);
-	if (err < 0) {
-		log_error("avcodec_open2 for video codec %s: %s", avcodec_get_name(fmt->video_codec), av_err2str(err));
-		return false;
+	video_stop(ctx);
+	ctx->framerate = fps;
+	ctx->outfile = fopen(filename, "wb");
+	if (!ctx->outfile) {
+		log_perror("couldn't create %s", filename);
 	}
-	err = avcodec_parameters_from_context(ctx->video_stream->codecpar, ctx->video_encoder);
-	if (err < 0) {
-		log_error("avcodec_parameters_from_context for video codec %s: %s", avcodec_get_name(fmt->video_codec), av_err2str(err));
+	struct timespec ts = {1, 1};
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	int serial_number = (int)((int32_t)ts.tv_nsec + 1000000000 * ((int32_t)ts.tv_sec % 2));
+	if (ogg_stream_init(&ctx->video_stream, serial_number) < 0) {
+		log_error("ogg_stream_init(video_stream) failed");
 		return false;
 	}
-	ctx->video_frame = av_frame_alloc();
-	if (!ctx->video_frame) {
-		log_error("couldn't allocate video frame");
+	vpx_codec_enc_cfg_t cfg = {0};
+	// NOTE: vp9 encoder seems to be much slower
+	vpx_codec_iface_t *vp8 = vpx_codec_vp8_cx();
+	int err = vpx_codec_enc_config_default(vp8, &cfg, 0);
+	if (err != 0) {
+		log_error("vpx_codec_enc_config_default: %s", vpx_codec_err_to_string(err));
 		return false;
 	}
-	ctx->video_frame->format = AV_PIX_FMT_YUV420P;
-	ctx->video_frame->width = ctx->video_encoder->width;
-	ctx->video_frame->height = ctx->video_encoder->height;
-	err = av_frame_get_buffer(ctx->video_frame, 0);
-	if (err < 0) {
-		log_error("av_frame_get_buffer for video: %s", av_err2str(err));
+	cfg.g_w = width;
+	cfg.g_h = height;
+	cfg.g_timebase.num = 1;
+	cfg.g_timebase.den = fps;
+	cfg.rc_target_bitrate = (unsigned)quality * (unsigned)width * (unsigned)height;
+	err = vpx_codec_enc_init(&ctx->vpx, vp8, &cfg, 0);
+	if (err != 0) {
+		log_error("vpx_codec_enc_init: %s", vpx_codec_err_to_string(err));
 		return false;
 	}
-	err = avio_open(&ctx->avf_context->pb, filename, AVIO_FLAG_WRITE);
-	if (err < 0) {
-		log_error("avio_open \"%s\": %s", filename, av_err2str(err));
+	if (!vpx_img_alloc(&ctx->vpx_image, VPX_IMG_FMT_I420, width, height, 1)) {
+		log_error("couldn't allocate VPX image");
 		return false;
 	}
-	const AVCodec *audio_codec = avcodec_find_encoder(fmt->audio_codec);
-	if (!audio_codec) {
-		log_error("avcodec_find_encoder for audio codec %s: %s", avcodec_get_name(fmt->audio_codec), av_err2str(err));
-		goto no_audio;
-	}
-	ctx->audio_encoder = avcodec_alloc_context3(audio_codec);
-	if (!ctx->audio_encoder) {
-		log_error("avcodec_alloc_context3 for audio codec %s: %s", avcodec_get_name(fmt->audio_codec), av_err2str(err));
-		goto no_audio;
-	}
-	// only FLTP is supported by AAC encoder
-	ctx->audio_encoder->sample_fmt = AV_SAMPLE_FMT_FLTP;
-	ctx->audio_encoder->bit_rate = (int64_t)192 * 1024;
-	ctx->audio_encoder->sample_rate = 44100;
-	static const AVChannelLayout channel_layout = AV_CHANNEL_LAYOUT_STEREO;
-	av_channel_layout_copy(&ctx->audio_encoder->ch_layout, &channel_layout);
-	if (ctx->avf_context->oformat->flags & AVFMT_GLOBALHEADER)
-		ctx->audio_encoder->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
-	err = avcodec_open2(ctx->audio_encoder, audio_codec, NULL);
-	if (err < 0) {
-		log_error("avcodec_open2 for audio codec %s: %s", avcodec_get_name(fmt->audio_codec), av_err2str(err));
-		goto no_audio;
-	}
-	ctx->audio_frame_samples = audio_codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE
-		? 4096
-		: ctx->audio_encoder->frame_size;
-	ctx->audio_frame = av_frame_alloc();
-	if (!ctx->audio_frame) {
-		log_error("couldn't allocate audio frame");
-		goto no_audio;
-	}
-	ctx->audio_frame->format = AV_SAMPLE_FMT_FLTP;
-	av_channel_layout_copy(&ctx->audio_frame->ch_layout, &channel_layout);
-	ctx->audio_frame->sample_rate = 44100;
-	ctx->audio_frame->nb_samples = ctx->audio_frame_samples;
-	err = av_frame_get_buffer(ctx->audio_frame, 0);
-	if (err < 0) {
-		log_error("av_frame_get_buffer (audio): %s", av_err2str(err));
-		goto no_audio;
-	}
-
-	// create stream last so that if stuff above fails we don't have a broken stream in the avformat context
-	ctx->audio_stream = avformat_new_stream(ctx->avf_context, audio_codec);
-	if (!ctx->audio_stream) {
-		log_error("avformat_new_stream (audio): %s", av_err2str(err));
-		goto no_audio;
-	}
-	ctx->audio_stream->id = 1;
-	ctx->audio_stream->time_base = (AVRational){1, 44100};
-	err = avcodec_parameters_from_context(ctx->audio_stream->codecpar, ctx->audio_encoder);
-	if (err < 0) {
-		log_error("avcodec_parameters_from_context (audio): %s", av_err2str(err));
-		goto no_audio;
-	}
-	have_audio = true;
-no_audio:
-	err = avformat_write_header(ctx->avf_context, NULL);
-	if (err < 0) {
-		log_error("avformat_write_header: %s", av_err2str(err));
+	// I can't find any documentation of OggVP8
+	//  This was pieced together from ogg_build_vp8_headers in
+	//   https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/oggenc.c
+	typedef struct {
+		char magic[5];
+		uint8_t stream_type;
+		uint8_t version[2];
+		// doesn't seem very forwards-thinking to have these be 16-bit. oh well.
+		uint16_t width;
+		uint16_t height;
+		uint8_t sample_aspect_ratio_num[3];
+		uint8_t sample_aspect_ratio_den[3];
+		// not aligned to 4 bytes ):
+		uint16_t framerate_num_hi;
+		uint16_t framerate_num_lo;
+		uint16_t framerate_den_hi;
+		uint16_t framerate_den_lo;
+	} OggVP8Header;
+	if (width > UINT16_MAX || height > UINT16_MAX) {
+		log_error("video resolution too high");
 		return false;
 	}
+	OggVP8Header header = {
+		.magic = "OVP80",
+		.stream_type = 1,
+		.version = {1, 0},
+		// big-endian for some reason....
+		.width = SDL_SwapBE16((uint16_t)width),
+		.height = SDL_SwapBE16((uint16_t)height),
+		.sample_aspect_ratio_num = {0, 0, 1},
+		.sample_aspect_ratio_den = {0, 0, 1},
+		.framerate_num_lo = SDL_SwapBE16((uint16_t)fps),
+		.framerate_den_lo = SDL_SwapBE16(1),
+	};
+	ogg_packet packet = {
+		.packet = (uint8_t *)&header,
+		.bytes = sizeof header,
+		.b_o_s = true,
+		.e_o_s = false,
+	};
+	write_packet_to_stream(ctx, &ctx->video_stream, &packet);
+	bool have_audio = false;
 	atomic_store(&ctx->audio_head, 0);
-	ctx->next_video_pts = 0;
-	ctx->next_audio_pts = 0;
 	ctx->recording = true;
 	ctx->start_time = get_time_double();
+	ctx->next_video_pts = 0;
+	ctx->video_packetno = 1;
 	if (have_audio) {
 		// start recording audio
 		SDL_PauseAudioDevice(ctx->audio_device, 0);
@@ -230,33 +195,34 @@ no_audio:
 	return true;
 }
 
+// inverse of vp8_gptopts in  https://github.com/FFmpeg/FFmpeg/blob/master/libavformat/oggparsevp8.c
+// see also: https://github.com/FFmpeg/FFmpeg/blob/99e2af4e7837ca09b97d93a562dc12947179fc48/libavformat/oggenc.c#L671
+static uint64_t vp8_pts_to_gp(int64_t pts, bool is_key_frame) {
+	return (uint64_t)pts << 32 | (uint64_t)!is_key_frame << 3;
+}
 
-static bool write_frame(VideoContext *ctx, AVCodecContext *encoder, AVStream *stream, AVFrame *frame) {
-	int err = avcodec_send_frame(encoder, frame);
-	if (err < 0) {
-		log_error("avcodec_send_frame (stream %d): %s", stream->index, av_err2str(err));
-		return false;
+static void write_video_frame(VideoContext *ctx, vpx_image_t *image, int64_t pts) {
+	int err = vpx_codec_encode(&ctx->vpx, image, pts, 1, 0, 1000000 / (ctx->framerate * 2));
+	if (err != 0) {
+		log_error("vpx_codec_encode: %s", vpx_codec_err_to_string(err));
 	}
-	while (true) {
-		err = avcodec_receive_packet(encoder, ctx->av_packet);
-		if (err == AVERROR(EAGAIN) || err == AVERROR_EOF) {
-			break;
-		}
-		if (err < 0) {
-			log_error("avcodec_receive_packet (stream %d): %s", stream->index, av_err2str(err));
-			return false;
-		}
-		ctx->av_packet->stream_index = stream->index;
-		av_packet_rescale_ts(ctx->av_packet, encoder->time_base, stream->time_base);
-		err = av_interleaved_write_frame(ctx->avf_context, ctx->av_packet);
-		if (err < 0) {
-			log_error("av_interleaved_write_frame (stream %d): %s", stream->index, av_err2str(err));
-			return false;
-		}
+	const vpx_codec_cx_pkt_t *pkt = NULL;
+	vpx_codec_iter_t iter = NULL;
+	while ((pkt = vpx_codec_get_cx_data(&ctx->vpx, &iter))) {
+		if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) continue;
+		ogg_packet oggp = {
+			.packet = pkt->data.frame.buf,
+			.bytes = pkt->data.frame.sz,
+			.granulepos = vp8_pts_to_gp(pkt->data.frame.pts, pkt->data.frame.flags & VPX_FRAME_IS_KEY),
+			.b_o_s = false,
+			.packetno = ctx->video_packetno++,
+			.e_o_s = false,
+		};
+		write_packet_to_stream(ctx, &ctx->video_stream, &oggp);
 	}
-	return true;
 }
 
+
 bool video_submit_frame(VideoContext *ctx, Camera *camera) {
 	if (!ctx || !camera || !ctx->recording) return false;
 	double curr_time = get_time_double();
@@ -267,12 +233,9 @@ bool video_submit_frame(VideoContext *ctx, Camera *camera) {
 		uint32_t head = atomic_load_explicit(&ctx->audio_head, memory_order_relaxed);
 		uint32_t tail = atomic_load(&ctx->audio_tail);
 		while (true) {
-			int err = av_frame_make_writable(ctx->audio_frame);
-			if (err < 0) {
-				log_error("av_frame_make_writable (video): %s", av_err2str(err));
-				break;
-			}
-			ctx->audio_frame->pts = ctx->next_audio_pts;
+			break;(void)tail;
+			// TODO
+			#if 0
 			uint32_t nfloats = (uint32_t)ctx->audio_frame_samples * 2;
 			bool frame_ready = false;
 			if (head + nfloats < AUDIO_QUEUE_SIZE) {
@@ -299,28 +262,21 @@ bool video_submit_frame(VideoContext *ctx, Camera *camera) {
 				}
 			}
 			if (frame_ready) {
-				ctx->next_audio_pts += ctx->audio_frame_samples;
 				write_frame(ctx, ctx->audio_encoder, ctx->audio_stream, ctx->audio_frame);
 			} else {
 				break;
 			}
+			#endif
 		}
 		atomic_store(&ctx->audio_head, head);
 	}
 	// process video
-	int64_t video_pts = (int64_t)(time_since_start
-		* ctx->video_encoder->time_base.den
-		/ ctx->video_encoder->time_base.num);
-	if (video_pts >= ctx->next_video_pts) {
-		int err = av_frame_make_writable(ctx->video_frame);
-		if (err < 0) {
-			log_error("av_frame_make_writable (audio): %s", av_err2str(err));
-			return false;
+	int64_t pts = (int64_t)(time_since_start * ctx->framerate);
+	if (pts >= ctx->next_video_pts) {
+		if (camera_copy_to_vpx_image(camera, &ctx->vpx_image)) {
+			write_video_frame(ctx, &ctx->vpx_image, pts);
 		}
-		ctx->video_frame->pts = video_pts;
-		camera_copy_to_av_frame(camera, ctx->video_frame);
-		write_frame(ctx, ctx->video_encoder, ctx->video_stream, ctx->video_frame);
-		ctx->next_video_pts = video_pts + 1;
+		ctx->next_video_pts = pts + 1;
 	}
 	return true;
 }
@@ -341,32 +297,34 @@ void video_stop(VideoContext *ctx) {
 		atomic_store(&ctx->audio_tail, 0);
 		ctx->recording = false;
 		// flush video encoder
-		write_frame(ctx, ctx->video_encoder, ctx->video_stream, NULL);
-		// flush audio encoder
-		write_frame(ctx, ctx->audio_encoder, ctx->audio_stream, NULL);
-		int err = av_write_trailer(ctx->avf_context);
-		if (err < 0) {
-			log_error("av_write_trailer: %s", av_err2str(err));
-		}
-		avio_closep(&ctx->avf_context->pb);
+		write_video_frame(ctx, NULL, -1);
+		// finish video stream
+		ogg_packet oggp = {
+			.packet = NULL,
+			.bytes = 0,
+			.granulepos = vp8_pts_to_gp(ctx->next_video_pts, false),
+			.b_o_s = false,
+			.packetno = ctx->video_packetno++,
+			.e_o_s = true,
+		};
+		write_packet_to_stream(ctx, &ctx->video_stream, &oggp);
+		// TODO: flush audio encoder
 	}
-	if (ctx->video_encoder)
-		avcodec_free_context(&ctx->video_encoder);
-	if (ctx->audio_encoder)
-		avcodec_free_context(&ctx->audio_encoder);
-	if (ctx->video_frame)
-		av_frame_free(&ctx->video_frame);
-	if (ctx->audio_frame)
-		av_frame_free(&ctx->audio_frame);
-	if (ctx->avf_context) {
-		if (ctx->avf_context->pb) {
-			avio_closep(&ctx->avf_context->pb);
-		}
-		avformat_free_context(ctx->avf_context);
-		ctx->avf_context = NULL;
+	if (ctx->outfile) {
+		fclose(ctx->outfile);
+		ctx->outfile = NULL;
+	}
+	if (ctx->vpx.iface) {
+		vpx_codec_destroy(&ctx->vpx);
+		ctx->vpx.iface = NULL;
+	}
+	if (ctx->vpx_image.planes[0]) {
+		vpx_img_free(&ctx->vpx_image);
+		ctx->vpx_image.planes[0] = NULL;
 	}
-	if (ctx->av_packet)
-		av_packet_free(&ctx->av_packet);
+	vorbis_dsp_clear(&ctx->vorbis);
+	ogg_stream_clear(&ctx->video_stream);
+	ogg_stream_clear(&ctx->audio_stream);
 }
 
 void video_quit(VideoContext *ctx) {
author	pommicket <pommicket@gmail.com>	2025-02-26 14:37:44 -0500
committer	pommicket <pommicket@gmail.com>	2025-02-26 19:07:21 -0500
commit	849700a4499ae0c77b7c87c2d022fb18b0daf4f9 (patch)
tree	1ad7dbe2d4a50a8853193091560a2d93832e224c
parent	b263baec1d18376f81c20b76220e329af5e2f0df (diff)