From 2316435af85ef515b019115fe9365d4606c9b4d6 Mon Sep 17 00:00:00 2001
From: Roz K <roz@rozk.net>
Date: Fri, 3 Oct 2025 15:10:49 +0200
Subject: [PATCH] demuxer, decoder not working

---
 mp4/context.py | 54 -------------------------------
 mp4/decoder.py | 46 ++++++++++++++++++++++++++
 mp4/demuxer.py | 39 +++++++++++++++++-----
 mp4/frame.py   | 19 +++++++++++
 mp4/libav.py   | 88 +++++++++++++++++++++++++++++++++++++++++---------
 pve.py         |  8 +++++
 6 files changed, 176 insertions(+), 78 deletions(-)
 delete mode 100644 mp4/context.py
 create mode 100644 mp4/decoder.py
 create mode 100644 mp4/frame.py

diff --git a/mp4/context.py b/mp4/context.py
deleted file mode 100644
index c6fbbcd..0000000
--- a/mp4/context.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# RozK
-
-from . import libav
-from .codec import Codec
-from .stream import NullStream, Stream
-from .packet import Packet
-
-class Context:
-    __slots__ = '_ref'
-
-    def __init__(self):
-        self._ref = libav.alloc_context()
-        if not self._ref:
-            raise MemoryError
-
-    def __del__(self):
-        if self._ref:
-            libav.free_context(self._ref)
-
-    @property
-    def _as_parameter_(self):
-        return self._ref
-
-    def open_input(self, url):
-        if not self._ref:
-            return
-        errcode = libav.open_input(self._ref, url)
-        if errcode < 0:
-            raise Exception(f"Failed to open: {url}")
-        errcode = libav.find_stream_info(self._ref)
-        if errcode < 0:
-            libav.close_input(self._ref)
-            raise Exception("Failed to find stream info")
-
-    def close_input(self):
-        if self._ref:
-            libav.close_input(self._ref)
-
-    def find_stream(self, type):
-        if not self._ref:
-            return NullStream()
-        index, codec_ref = libav.find_best_stream(self._ref, type)
-        if index < 0 or not codec_ref:
-            return NullStream()
-        return Stream(index, Codec(codec_ref))
-
-    def read_packet(self):
-        if not self._ref:
-            return None
-        packet = Packet()
-        errcode = libav.read_frame(self._ref, packet)
-        if errcode < 0:
-            return None
-        return packet
diff --git a/mp4/decoder.py b/mp4/decoder.py
new file mode 100644
index 0000000..6edc54c
--- /dev/null
+++ b/mp4/decoder.py
@@ -0,0 +1,46 @@
+# RozK
+
+import errno
+
+from . import libav
+from .frame import Frame
+
+class Decoder:
+    __slots__ = '_ref'
+
+    def __init__(self, codec):
+        self._ref = libav.codec_alloc_context(codec)
+        if not self._ref:
+            raise MemoryError
+        errcode = libav.codec_open(self._ref, codec)
+        if errcode < 0:
+            libav.codec_free_context(self._ref)
+            raise Exception("Failed to open codec context")
+
+    def __del__(self):
+        if self._ref:
+            libav.codec_free_context(self._ref)
+
+    def _recieve(self, frames):
+        while True:
+            frame = Frame()
+            errcode = libav.codec_receive_frame(self._ref, frame)
+            if errcode == 0:
+                frames.append(frame)
+            elif errcode == errno.EAGAIN:
+                break
+            else:
+                raise Exception(f"Failed to receive frame: {errcode}")
+
+    def decode(self, packet):
+        if not self._ref:
+            return None
+        frames = []
+        while True:
+            errcode = libav.codec_send_packet(self._ref, packet)
+            if errcode != 0 and errcode != errno.EAGAIN:
+                raise Exception(f"Failed to send packet: {errcode}")
+            self._recieve(frames)
+            if errcode == 0:
+                break
+        return frames
diff --git a/mp4/demuxer.py b/mp4/demuxer.py
index fd6d881..33e9407 100644
--- a/mp4/demuxer.py
+++ b/mp4/demuxer.py
@@ -1,19 +1,42 @@
 # RozK
 
 from . import libav
-from .context import Context
+from .codec import Codec
+from .stream import NullStream, Stream
+from .packet import Packet
 
 class Demuxer:
-    __slots__ = 'context', 'video_stream', 'audio_stream'
+    __slots__ = '_ref', 'video_stream', 'audio_stream'
 
     def __init__(self, path):
-        self.context = Context()
-        self.context.open_input("file:" + path)
-        self.video_stream = self.context.find_stream(libav.AVMEDIA_TYPE_VIDEO)
-        self.audio_stream = self.context.find_stream(libav.AVMEDIA_TYPE_AUDIO)
+        self._ref = libav.format_alloc_context()
+        if not self._ref:
+            raise MemoryError
+        errcode = libav.format_open_input(self._ref, "file:" + path)
+        if errcode < 0:
+            raise Exception(f"Failed to open: {path}")
+        errcode = libav.format_find_stream_info(self._ref)
+        if errcode < 0:
+            libav.format_close_input(self._ref)
+            raise Exception("Failed to find stream info")
+        self.video_stream = self._find_stream(libav.AVMEDIA_TYPE_VIDEO)
+        self.audio_stream = self._find_stream(libav.AVMEDIA_TYPE_AUDIO)
+
+    def _find_stream(self, type):
+        index, codec_ref = libav.format_find_best_stream(self._ref, type)
+        if index < 0 or not codec_ref:
+            return NullStream()
+        return Stream(index, Codec(codec_ref))
 
     def read_packet(self):
-        return self.context.read_packet()
+        if not self._ref:
+            return None
+        packet = Packet()
+        errcode = libav.read_frame(self._ref, packet)
+        if errcode < 0:
+            return None
+        return packet
 
     def close(self):
-        self.context.close_input()
+        if self._ref:
+            libav.format_close_input(self._ref)
diff --git a/mp4/frame.py b/mp4/frame.py
new file mode 100644
index 0000000..75c1fb3
--- /dev/null
+++ b/mp4/frame.py
@@ -0,0 +1,19 @@
+# RozK
+
+from . import libav
+
+class Frame:
+    __slots__ = '_ref'
+
+    def __init__(self):
+        self._ref = libav.frame_alloc()
+        if not self._ref:
+            raise MemoryError
+
+    def __del__(self):
+        if self._ref:
+            libav.frame_free(self._ref)
+
+    @property
+    def _as_parameter_(self):
+        return self._ref
diff --git a/mp4/libav.py b/mp4/libav.py
index 3890650..d9e0f0a 100644
--- a/mp4/libav.py
+++ b/mp4/libav.py
@@ -3,9 +3,28 @@
 
 import ctypes
 
+_avutil = ctypes.cdll.LoadLibrary('libavutil.so')
 _avformat = ctypes.cdll.LoadLibrary('libavformat.so')
 _avcodec = ctypes.cdll.LoadLibrary('libavcodec.so')
 
+class AVFrame(ctypes.Structure):
+    pass
+
+AVFrame_p = ctypes.POINTER(AVFrame)
+AVFrame_pp = ctypes.POINTER(AVFrame_p)
+
+_avutil.av_frame_alloc.restype = AVFrame_p
+_avutil.av_frame_alloc.argtypes = None
+
+def frame_alloc():
+    return _avutil.av_frame_alloc()
+
+_avutil.av_frame_free.restype = None
+_avutil.av_frame_free.argtypes = [AVFrame_pp]
+
+def frame_free(frame):
+    _avutil.av_frame_free(ctypes.byref(frame))
+
 class AVFormatContext(ctypes.Structure):
     pass
 
@@ -15,37 +34,37 @@ AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p)
 _avformat.avformat_alloc_context.restype = AVFormatContext_p
 _avformat.avformat_alloc_context.argtypes = None
 
-def alloc_context():
+def format_alloc_context():
     return _avformat.avformat_alloc_context()
 
 _avformat.avformat_free_context.restype = None
-_avformat.avformat_free_context.argtypes = [AVFormatContext_p] # context
+_avformat.avformat_free_context.argtypes = [AVFormatContext_p]
 
-def free_context(context):
+def format_free_context(context):
     _avformat.avformat_free_context(context)
 
 _avformat.avformat_open_input.restype = ctypes.c_int
 _avformat.avformat_open_input.argtypes = [
-    AVFormatContext_pp, # context
+    AVFormatContext_pp,
     ctypes.c_char_p, # url
     ctypes.c_void_p, # format
     ctypes.POINTER(ctypes.c_void_p)] # options
 
-def open_input(context, url):
+def format_open_input(context, url):
     return _avformat.avformat_open_input(ctypes.byref(context), url.encode('ascii', 'ignore'), None, None)
 
 _avformat.avformat_close_input.restype = None
-_avformat.avformat_close_input.argtypes = [AVFormatContext_pp] # context
+_avformat.avformat_close_input.argtypes = [AVFormatContext_pp]
 
-def close_input(context):
+def format_close_input(context):
     _avformat.avformat_close_input(ctypes.byref(context))
 
 _avformat.avformat_find_stream_info.restype = ctypes.c_int
 _avformat.avformat_find_stream_info.argtypes = [
-    AVFormatContext_p, # context
+    AVFormatContext_p,
     ctypes.POINTER(ctypes.c_void_p)] # options
 
-def find_stream_info(context):
+def format_find_stream_info(context):
     return _avformat.avformat_find_stream_info(context, None)
 
 AVMEDIA_TYPE_UNKNOWN = -1
@@ -66,14 +85,14 @@ AVCodec_pp = ctypes.POINTER(AVCodec_p)
 
 _avformat.av_find_best_stream.restype = ctypes.c_int
 _avformat.av_find_best_stream.argtypes = [
-    AVFormatContext_p, # context
+    AVFormatContext_p,
     ctypes.c_int, # type
     ctypes.c_int, # wanted stream
     ctypes.c_int, # related stream
-    AVCodec_pp, # decoder
+    AVCodec_pp,
     ctypes.c_int] # flags
 
-def find_best_stream(context, type):
+def format_find_best_stream(context, type):
     codec = AVCodec_p()
     index = _avformat.av_find_best_stream(context, type, -1, -1, ctypes.byref(codec), 0)
     return index, codec
@@ -98,15 +117,52 @@ def packet_alloc():
     return _avformat.av_packet_alloc()
 
 _avformat.av_packet_free.restype = None
-_avformat.av_packet_free.argtypes = [AVPacket_pp] # packet
+_avformat.av_packet_free.argtypes = [AVPacket_pp]
 
 def packet_free(packet):
     _avformat.av_packet_free(ctypes.byref(packet))
 
 _avformat.av_read_frame.restype = ctypes.c_int
-_avformat.av_read_frame.argtypes = [
-    AVFormatContext_p, # context
-    AVPacket_p] # packet
+_avformat.av_read_frame.argtypes = [AVFormatContext_p, AVPacket_p]
 
 def read_frame(context, packet):
     return _avformat.av_read_frame(context, packet)
+
+class AVCodecContext(ctypes.Structure):
+    pass
+
+AVCodecContext_p = ctypes.POINTER(AVCodecContext)
+AVCodecContext_pp = ctypes.POINTER(AVCodecContext_p)
+
+_avcodec.avcodec_alloc_context3.restype = AVCodecContext_p
+_avcodec.avcodec_alloc_context3.argtypes = [AVCodec_p]
+
+def codec_alloc_context(codec):
+    return _avcodec.avcodec_alloc_context3(codec)
+
+_avcodec.avcodec_free_context.restype = None
+_avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp]
+
+def codec_free_context(context):
+    _avcodec.avcodec_free_context(ctypes.byref(context))
+
+_avcodec.avcodec_open2.restype = ctypes.c_int
+_avcodec.avcodec_open2.argtypes = [
+    AVCodecContext_p,
+    AVCodec_p,
+    ctypes.POINTER(ctypes.c_void_p)] # options
+
+def codec_open(context, codec):
+    return _avcodec.avcodec_open2(context, codec, None)
+
+_avcodec.avcodec_send_packet.restype = ctypes.c_int
+_avcodec.avcodec_send_packet.argtypes = [AVCodecContext_p, AVPacket_p]
+
+def codec_send_packet(context, packet):
+    return _avcodec.avcodec_send_packet(context, packet)
+
+_avcodec.avcodec_receive_frame.restype = ctypes.c_int
+_avcodec.avcodec_receive_frame.argtypes = [AVCodecContext_p, AVFrame_p]
+
+def codec_receive_frame(context, frame):
+    return _avcodec.avcodec_receive_frame(context, frame)
diff --git a/pve.py b/pve.py
index b00aea0..9fc9f05 100644
--- a/pve.py
+++ b/pve.py
@@ -1,19 +1,27 @@
 # RozK
 
 from mp4.demuxer import Demuxer
+from mp4.decoder import Decoder
 
 demuxer = Demuxer('test.mp4')
 
 print(demuxer.video_stream.codec.name)
 print(demuxer.audio_stream.codec.name)
 
+video_decoder = Decoder(demuxer.video_stream.codec)
+audio_decoder = Decoder(demuxer.audio_stream.codec)
+
 while True:
     packet = demuxer.read_packet()
     if packet is None:
         break
     if demuxer.video_stream.contains(packet):
+        video_frames = video_decoder.decode(packet)
+        print(f"decoded {len(video_frames)} video frames")
         continue
     elif demuxer.audio_stream.contains(packet):
+        audio_frames = audio_decoder.decode(packet)
+        print(f"decoded {len(audio_frames)} audio frames")
         continue
     else:
         print("unkown packet")