demuxer, decoder not working

2025-10-03 15:10:49 +02:00
parent 3c56a880da
commit 2316435af8
6 changed files with 176 additions and 78 deletions
--- a/mp4/context.py
+++ b/mp4/context.py
@@ -1,54 +0,0 @@
 # RozK
 from . import libav
 from .codec import Codec
 from .stream import NullStream, Stream
 from .packet import Packet
 class Context:
    __slots__ = '_ref'
    def __init__(self):
        self._ref = libav.alloc_context()
        if not self._ref:
            raise MemoryError
    def __del__(self):
        if self._ref:
            libav.free_context(self._ref)
    @property
    def _as_parameter_(self):
        return self._ref
    def open_input(self, url):
        if not self._ref:
            return
        errcode = libav.open_input(self._ref, url)
        if errcode < 0:
            raise Exception(f"Failed to open: {url}")
        errcode = libav.find_stream_info(self._ref)
        if errcode < 0:
            libav.close_input(self._ref)
            raise Exception("Failed to find stream info")
    def close_input(self):
        if self._ref:
            libav.close_input(self._ref)
    def find_stream(self, type):
        if not self._ref:
            return NullStream()
        index, codec_ref = libav.find_best_stream(self._ref, type)
        if index < 0 or not codec_ref:
            return NullStream()
        return Stream(index, Codec(codec_ref))
    def read_packet(self):
        if not self._ref:
            return None
        packet = Packet()
        errcode = libav.read_frame(self._ref, packet)
        if errcode < 0:
            return None
        return packet
--- a/mp4/decoder.py
+++ b/mp4/decoder.py
@@ -0,0 +1,46 @@
 # RozK
 import errno
 from . import libav
 from .frame import Frame
 class Decoder:
    __slots__ = '_ref'
    def __init__(self, codec):
        self._ref = libav.codec_alloc_context(codec)
        if not self._ref:
            raise MemoryError
        errcode = libav.codec_open(self._ref, codec)
        if errcode < 0:
            libav.codec_free_context(self._ref)
            raise Exception("Failed to open codec context")
    def __del__(self):
        if self._ref:
            libav.codec_free_context(self._ref)
    def _recieve(self, frames):
        while True:
            frame = Frame()
            errcode = libav.codec_receive_frame(self._ref, frame)
            if errcode == 0:
                frames.append(frame)
            elif errcode == errno.EAGAIN:
                break
            else:
                raise Exception(f"Failed to receive frame: {errcode}")
    def decode(self, packet):
        if not self._ref:
            return None
        frames = []
        while True:
            errcode = libav.codec_send_packet(self._ref, packet)
            if errcode != 0 and errcode != errno.EAGAIN:
                raise Exception(f"Failed to send packet: {errcode}")
            self._recieve(frames)
            if errcode == 0:
                break
        return frames
--- a/mp4/demuxer.py
+++ b/mp4/demuxer.py
@@ -1,19 +1,42 @@
 # RozK
 from . import libav
-from .context import Context
+from .codec import Codec
 from .stream import NullStream, Stream
 from .packet import Packet
 class Demuxer:
-    __slots__ = 'context', 'video_stream', 'audio_stream'
+    __slots__ = '_ref', 'video_stream', 'audio_stream'
    def __init__(self, path):
-        self.context = Context()
+        self._ref = libav.format_alloc_context()
-        self.context.open_input("file:" + path)
+        if not self._ref:
-        self.video_stream = self.context.find_stream(libav.AVMEDIA_TYPE_VIDEO)
+            raise MemoryError
-        self.audio_stream = self.context.find_stream(libav.AVMEDIA_TYPE_AUDIO)
+        errcode = libav.format_open_input(self._ref, "file:" + path)
        if errcode < 0:
            raise Exception(f"Failed to open: {path}")
        errcode = libav.format_find_stream_info(self._ref)
        if errcode < 0:
            libav.format_close_input(self._ref)
            raise Exception("Failed to find stream info")
        self.video_stream = self._find_stream(libav.AVMEDIA_TYPE_VIDEO)
        self.audio_stream = self._find_stream(libav.AVMEDIA_TYPE_AUDIO)
    def _find_stream(self, type):
        index, codec_ref = libav.format_find_best_stream(self._ref, type)
        if index < 0 or not codec_ref:
            return NullStream()
        return Stream(index, Codec(codec_ref))
    def read_packet(self):
-        return self.context.read_packet()
+        if not self._ref:
            return None
        packet = Packet()
        errcode = libav.read_frame(self._ref, packet)
        if errcode < 0:
            return None
        return packet
    def close(self):
-        self.context.close_input()
+        if self._ref:
            libav.format_close_input(self._ref)
--- a/mp4/frame.py
+++ b/mp4/frame.py
@@ -0,0 +1,19 @@
 # RozK
 from . import libav
 class Frame:
    __slots__ = '_ref'
    def __init__(self):
        self._ref = libav.frame_alloc()
        if not self._ref:
            raise MemoryError
    def __del__(self):
        if self._ref:
            libav.frame_free(self._ref)
    @property
    def _as_parameter_(self):
        return self._ref
--- a/mp4/libav.py
+++ b/mp4/libav.py
@@ -3,9 +3,28 @@
 import ctypes
 _avutil = ctypes.cdll.LoadLibrary('libavutil.so')
 _avformat = ctypes.cdll.LoadLibrary('libavformat.so')
 _avcodec = ctypes.cdll.LoadLibrary('libavcodec.so')
 class AVFrame(ctypes.Structure):
    pass
 AVFrame_p = ctypes.POINTER(AVFrame)
 AVFrame_pp = ctypes.POINTER(AVFrame_p)
 _avutil.av_frame_alloc.restype = AVFrame_p
 _avutil.av_frame_alloc.argtypes = None
 def frame_alloc():
    return _avutil.av_frame_alloc()
 _avutil.av_frame_free.restype = None
 _avutil.av_frame_free.argtypes = [AVFrame_pp]
 def frame_free(frame):
    _avutil.av_frame_free(ctypes.byref(frame))
 class AVFormatContext(ctypes.Structure):
    pass
@@ -15,37 +34,37 @@ AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p)
 _avformat.avformat_alloc_context.restype = AVFormatContext_p
 _avformat.avformat_alloc_context.argtypes = None
-def alloc_context():
+def format_alloc_context():
    return _avformat.avformat_alloc_context()
 _avformat.avformat_free_context.restype = None
-_avformat.avformat_free_context.argtypes = [AVFormatContext_p] # context
+_avformat.avformat_free_context.argtypes = [AVFormatContext_p]
-def free_context(context):
+def format_free_context(context):
    _avformat.avformat_free_context(context)
 _avformat.avformat_open_input.restype = ctypes.c_int
 _avformat.avformat_open_input.argtypes = [
-    AVFormatContext_pp, # context
+    AVFormatContext_pp,
    ctypes.c_char_p, # url
    ctypes.c_void_p, # format
    ctypes.POINTER(ctypes.c_void_p)] # options
-def open_input(context, url):
+def format_open_input(context, url):
    return _avformat.avformat_open_input(ctypes.byref(context), url.encode('ascii', 'ignore'), None, None)
 _avformat.avformat_close_input.restype = None
-_avformat.avformat_close_input.argtypes = [AVFormatContext_pp] # context
+_avformat.avformat_close_input.argtypes = [AVFormatContext_pp]
-def close_input(context):
+def format_close_input(context):
    _avformat.avformat_close_input(ctypes.byref(context))
 _avformat.avformat_find_stream_info.restype = ctypes.c_int
 _avformat.avformat_find_stream_info.argtypes = [
-    AVFormatContext_p, # context
+    AVFormatContext_p,
    ctypes.POINTER(ctypes.c_void_p)] # options
-def find_stream_info(context):
+def format_find_stream_info(context):
    return _avformat.avformat_find_stream_info(context, None)
 AVMEDIA_TYPE_UNKNOWN = -1
@@ -66,14 +85,14 @@ AVCodec_pp = ctypes.POINTER(AVCodec_p)
 _avformat.av_find_best_stream.restype = ctypes.c_int
 _avformat.av_find_best_stream.argtypes = [
-    AVFormatContext_p, # context
+    AVFormatContext_p,
    ctypes.c_int, # type
    ctypes.c_int, # wanted stream
    ctypes.c_int, # related stream
-    AVCodec_pp, # decoder
+    AVCodec_pp,
    ctypes.c_int] # flags
-def find_best_stream(context, type):
+def format_find_best_stream(context, type):
    codec = AVCodec_p()
    index = _avformat.av_find_best_stream(context, type, -1, -1, ctypes.byref(codec), 0)
    return index, codec
@@ -98,15 +117,52 @@ def packet_alloc():
    return _avformat.av_packet_alloc()
 _avformat.av_packet_free.restype = None
-_avformat.av_packet_free.argtypes = [AVPacket_pp] # packet
+_avformat.av_packet_free.argtypes = [AVPacket_pp]
 def packet_free(packet):
    _avformat.av_packet_free(ctypes.byref(packet))
 _avformat.av_read_frame.restype = ctypes.c_int
-_avformat.av_read_frame.argtypes = [
+_avformat.av_read_frame.argtypes = [AVFormatContext_p, AVPacket_p]
    AVFormatContext_p, # context
    AVPacket_p] # packet
 def read_frame(context, packet):
    return _avformat.av_read_frame(context, packet)
 class AVCodecContext(ctypes.Structure):
    pass
 AVCodecContext_p = ctypes.POINTER(AVCodecContext)
 AVCodecContext_pp = ctypes.POINTER(AVCodecContext_p)
 _avcodec.avcodec_alloc_context3.restype = AVCodecContext_p
 _avcodec.avcodec_alloc_context3.argtypes = [AVCodec_p]
 def codec_alloc_context(codec):
    return _avcodec.avcodec_alloc_context3(codec)
 _avcodec.avcodec_free_context.restype = None
 _avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp]
 def codec_free_context(context):
    _avcodec.avcodec_free_context(ctypes.byref(context))
 _avcodec.avcodec_open2.restype = ctypes.c_int
 _avcodec.avcodec_open2.argtypes = [
    AVCodecContext_p,
    AVCodec_p,
    ctypes.POINTER(ctypes.c_void_p)] # options
 def codec_open(context, codec):
    return _avcodec.avcodec_open2(context, codec, None)
 _avcodec.avcodec_send_packet.restype = ctypes.c_int
 _avcodec.avcodec_send_packet.argtypes = [AVCodecContext_p, AVPacket_p]
 def codec_send_packet(context, packet):
    return _avcodec.avcodec_send_packet(context, packet)
 _avcodec.avcodec_receive_frame.restype = ctypes.c_int
 _avcodec.avcodec_receive_frame.argtypes = [AVCodecContext_p, AVFrame_p]
 def codec_receive_frame(context, frame):
    return _avcodec.avcodec_receive_frame(context, frame)
--- a/pve.py
+++ b/pve.py
@@ -1,19 +1,27 @@
 # RozK
 from mp4.demuxer import Demuxer
 from mp4.decoder import Decoder
 demuxer = Demuxer('test.mp4')
 print(demuxer.video_stream.codec.name)
 print(demuxer.audio_stream.codec.name)
 video_decoder = Decoder(demuxer.video_stream.codec)
 audio_decoder = Decoder(demuxer.audio_stream.codec)
 while True:
    packet = demuxer.read_packet()
    if packet is None:
        break
    if demuxer.video_stream.contains(packet):
        video_frames = video_decoder.decode(packet)
        print(f"decoded {len(video_frames)} video frames")
        continue
    elif demuxer.audio_stream.contains(packet):
        audio_frames = audio_decoder.decode(packet)
        print(f"decoded {len(audio_frames)} audio frames")
        continue
    else:
        print("unkown packet")