demuxer, decoder not working

2025-10-03 15:10:49 +02:00
parent 3c56a880da
commit 2316435af8
6 changed files with 176 additions and 78 deletions
--- a/mp4/context.py
+++ b/mp4/context.py
@ -1,54 +0,0 @@
-# RozK
-
-from . import libav
-from .codec import Codec
-from .stream import NullStream, Stream
-from .packet import Packet
-
-class Context:
-    __slots__ = '_ref'
-
-    def __init__(self):
-        self._ref = libav.alloc_context()
-        if not self._ref:
-            raise MemoryError
-
-    def __del__(self):
-        if self._ref:
-            libav.free_context(self._ref)
-
-    @property
-    def _as_parameter_(self):
-        return self._ref
-
-    def open_input(self, url):
-        if not self._ref:
-            return
-        errcode = libav.open_input(self._ref, url)
-        if errcode < 0:
-            raise Exception(f"Failed to open: {url}")
-        errcode = libav.find_stream_info(self._ref)
-        if errcode < 0:
-            libav.close_input(self._ref)
-            raise Exception("Failed to find stream info")
-
-    def close_input(self):
-        if self._ref:
-            libav.close_input(self._ref)
-
-    def find_stream(self, type):
-        if not self._ref:
-            return NullStream()
-        index, codec_ref = libav.find_best_stream(self._ref, type)
-        if index < 0 or not codec_ref:
-            return NullStream()
-        return Stream(index, Codec(codec_ref))
-
-    def read_packet(self):
-        if not self._ref:
-            return None
-        packet = Packet()
-        errcode = libav.read_frame(self._ref, packet)
-        if errcode < 0:
-            return None
-        return packet
--- a/mp4/decoder.py
+++ b/mp4/decoder.py
@ -0,0 +1,46 @@
+# RozK
+
+import errno
+
+from . import libav
+from .frame import Frame
+
+class Decoder:
+    __slots__ = '_ref'
+
+    def __init__(self, codec):
+        self._ref = libav.codec_alloc_context(codec)
+        if not self._ref:
+            raise MemoryError
+        errcode = libav.codec_open(self._ref, codec)
+        if errcode < 0:
+            libav.codec_free_context(self._ref)
+            raise Exception("Failed to open codec context")
+
+    def __del__(self):
+        if self._ref:
+            libav.codec_free_context(self._ref)
+
+    def _recieve(self, frames):
+        while True:
+            frame = Frame()
+            errcode = libav.codec_receive_frame(self._ref, frame)
+            if errcode == 0:
+                frames.append(frame)
+            elif errcode == errno.EAGAIN:
+                break
+            else:
+                raise Exception(f"Failed to receive frame: {errcode}")
+
+    def decode(self, packet):
+        if not self._ref:
+            return None
+        frames = []
+        while True:
+            errcode = libav.codec_send_packet(self._ref, packet)
+            if errcode != 0 and errcode != errno.EAGAIN:
+                raise Exception(f"Failed to send packet: {errcode}")
+            self._recieve(frames)
+            if errcode == 0:
+                break
+        return frames
--- a/mp4/demuxer.py
+++ b/mp4/demuxer.py
@ -1,19 +1,42 @@
 # RozK

 from . import libav
-from .context import Context
+from .codec import Codec
+from .stream import NullStream, Stream
+from .packet import Packet

 class Demuxer:
-    __slots__ = 'context', 'video_stream', 'audio_stream'
+    __slots__ = '_ref', 'video_stream', 'audio_stream'

    def __init__(self, path):
-        self.context = Context()
-        self.context.open_input("file:" + path)
-        self.video_stream = self.context.find_stream(libav.AVMEDIA_TYPE_VIDEO)
-        self.audio_stream = self.context.find_stream(libav.AVMEDIA_TYPE_AUDIO)
+        self._ref = libav.format_alloc_context()
+        if not self._ref:
+            raise MemoryError
+        errcode = libav.format_open_input(self._ref, "file:" + path)
+        if errcode < 0:
+            raise Exception(f"Failed to open: {path}")
+        errcode = libav.format_find_stream_info(self._ref)
+        if errcode < 0:
+            libav.format_close_input(self._ref)
+            raise Exception("Failed to find stream info")
+        self.video_stream = self._find_stream(libav.AVMEDIA_TYPE_VIDEO)
+        self.audio_stream = self._find_stream(libav.AVMEDIA_TYPE_AUDIO)
+
+    def _find_stream(self, type):
+        index, codec_ref = libav.format_find_best_stream(self._ref, type)
+        if index < 0 or not codec_ref:
+            return NullStream()
+        return Stream(index, Codec(codec_ref))

    def read_packet(self):
-        return self.context.read_packet()
+        if not self._ref:
+            return None
+        packet = Packet()
+        errcode = libav.read_frame(self._ref, packet)
+        if errcode < 0:
+            return None
+        return packet

    def close(self):
-        self.context.close_input()
+        if self._ref:
+            libav.format_close_input(self._ref)
--- a/mp4/frame.py
+++ b/mp4/frame.py
@ -0,0 +1,19 @@
+# RozK
+
+from . import libav
+
+class Frame:
+    __slots__ = '_ref'
+
+    def __init__(self):
+        self._ref = libav.frame_alloc()
+        if not self._ref:
+            raise MemoryError
+
+    def __del__(self):
+        if self._ref:
+            libav.frame_free(self._ref)
+
+    @property
+    def _as_parameter_(self):
+        return self._ref
--- a/mp4/libav.py
+++ b/mp4/libav.py
@ -3,9 +3,28 @@

 import ctypes

+_avutil = ctypes.cdll.LoadLibrary('libavutil.so')
 _avformat = ctypes.cdll.LoadLibrary('libavformat.so')
 _avcodec = ctypes.cdll.LoadLibrary('libavcodec.so')

+class AVFrame(ctypes.Structure):
+    pass
+
+AVFrame_p = ctypes.POINTER(AVFrame)
+AVFrame_pp = ctypes.POINTER(AVFrame_p)
+
+_avutil.av_frame_alloc.restype = AVFrame_p
+_avutil.av_frame_alloc.argtypes = None
+
+def frame_alloc():
+    return _avutil.av_frame_alloc()
+
+_avutil.av_frame_free.restype = None
+_avutil.av_frame_free.argtypes = [AVFrame_pp]
+
+def frame_free(frame):
+    _avutil.av_frame_free(ctypes.byref(frame))
+
 class AVFormatContext(ctypes.Structure):
    pass

@ -15,37 +34,37 @@ AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p)
 _avformat.avformat_alloc_context.restype = AVFormatContext_p
 _avformat.avformat_alloc_context.argtypes = None

-def alloc_context():
+def format_alloc_context():
    return _avformat.avformat_alloc_context()

 _avformat.avformat_free_context.restype = None
-_avformat.avformat_free_context.argtypes = [AVFormatContext_p] # context
+_avformat.avformat_free_context.argtypes = [AVFormatContext_p]

-def free_context(context):
+def format_free_context(context):
    _avformat.avformat_free_context(context)

 _avformat.avformat_open_input.restype = ctypes.c_int
 _avformat.avformat_open_input.argtypes = [
-    AVFormatContext_pp, # context
+    AVFormatContext_pp,
    ctypes.c_char_p, # url
    ctypes.c_void_p, # format
    ctypes.POINTER(ctypes.c_void_p)] # options

-def open_input(context, url):
+def format_open_input(context, url):
    return _avformat.avformat_open_input(ctypes.byref(context), url.encode('ascii', 'ignore'), None, None)

 _avformat.avformat_close_input.restype = None
-_avformat.avformat_close_input.argtypes = [AVFormatContext_pp] # context
+_avformat.avformat_close_input.argtypes = [AVFormatContext_pp]

-def close_input(context):
+def format_close_input(context):
    _avformat.avformat_close_input(ctypes.byref(context))

 _avformat.avformat_find_stream_info.restype = ctypes.c_int
 _avformat.avformat_find_stream_info.argtypes = [
-    AVFormatContext_p, # context
+    AVFormatContext_p,
    ctypes.POINTER(ctypes.c_void_p)] # options

-def find_stream_info(context):
+def format_find_stream_info(context):
    return _avformat.avformat_find_stream_info(context, None)

 AVMEDIA_TYPE_UNKNOWN = -1
@ -66,14 +85,14 @@ AVCodec_pp = ctypes.POINTER(AVCodec_p)

 _avformat.av_find_best_stream.restype = ctypes.c_int
 _avformat.av_find_best_stream.argtypes = [
-    AVFormatContext_p, # context
+    AVFormatContext_p,
    ctypes.c_int, # type
    ctypes.c_int, # wanted stream
    ctypes.c_int, # related stream
-    AVCodec_pp, # decoder
+    AVCodec_pp,
    ctypes.c_int] # flags

-def find_best_stream(context, type):
+def format_find_best_stream(context, type):
    codec = AVCodec_p()
    index = _avformat.av_find_best_stream(context, type, -1, -1, ctypes.byref(codec), 0)
    return index, codec
@ -98,15 +117,52 @@ def packet_alloc():
    return _avformat.av_packet_alloc()

 _avformat.av_packet_free.restype = None
-_avformat.av_packet_free.argtypes = [AVPacket_pp] # packet
+_avformat.av_packet_free.argtypes = [AVPacket_pp]

 def packet_free(packet):
    _avformat.av_packet_free(ctypes.byref(packet))

 _avformat.av_read_frame.restype = ctypes.c_int
-_avformat.av_read_frame.argtypes = [
-    AVFormatContext_p, # context
-    AVPacket_p] # packet
+_avformat.av_read_frame.argtypes = [AVFormatContext_p, AVPacket_p]

 def read_frame(context, packet):
    return _avformat.av_read_frame(context, packet)
+
+class AVCodecContext(ctypes.Structure):
+    pass
+
+AVCodecContext_p = ctypes.POINTER(AVCodecContext)
+AVCodecContext_pp = ctypes.POINTER(AVCodecContext_p)
+
+_avcodec.avcodec_alloc_context3.restype = AVCodecContext_p
+_avcodec.avcodec_alloc_context3.argtypes = [AVCodec_p]
+
+def codec_alloc_context(codec):
+    return _avcodec.avcodec_alloc_context3(codec)
+
+_avcodec.avcodec_free_context.restype = None
+_avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp]
+
+def codec_free_context(context):
+    _avcodec.avcodec_free_context(ctypes.byref(context))
+
+_avcodec.avcodec_open2.restype = ctypes.c_int
+_avcodec.avcodec_open2.argtypes = [
+    AVCodecContext_p,
+    AVCodec_p,
+    ctypes.POINTER(ctypes.c_void_p)] # options
+
+def codec_open(context, codec):
+    return _avcodec.avcodec_open2(context, codec, None)
+
+_avcodec.avcodec_send_packet.restype = ctypes.c_int
+_avcodec.avcodec_send_packet.argtypes = [AVCodecContext_p, AVPacket_p]
+
+def codec_send_packet(context, packet):
+    return _avcodec.avcodec_send_packet(context, packet)
+
+_avcodec.avcodec_receive_frame.restype = ctypes.c_int
+_avcodec.avcodec_receive_frame.argtypes = [AVCodecContext_p, AVFrame_p]
+
+def codec_receive_frame(context, frame):
+    return _avcodec.avcodec_receive_frame(context, frame)
--- a/pve.py
+++ b/pve.py
@ -1,19 +1,27 @@
 # RozK

 from mp4.demuxer import Demuxer
+from mp4.decoder import Decoder

 demuxer = Demuxer('test.mp4')

 print(demuxer.video_stream.codec.name)
 print(demuxer.audio_stream.codec.name)

+video_decoder = Decoder(demuxer.video_stream.codec)
+audio_decoder = Decoder(demuxer.audio_stream.codec)
+
 while True:
    packet = demuxer.read_packet()
    if packet is None:
        break
    if demuxer.video_stream.contains(packet):
+        video_frames = video_decoder.decode(packet)
+        print(f"decoded {len(video_frames)} video frames")
        continue
    elif demuxer.audio_stream.contains(packet):
+        audio_frames = audio_decoder.decode(packet)
+        print(f"decoded {len(audio_frames)} audio frames")
        continue
    else:
        print("unkown packet")