From 2316435af85ef515b019115fe9365d4606c9b4d6 Mon Sep 17 00:00:00 2001 From: Roz K Date: Fri, 3 Oct 2025 15:10:49 +0200 Subject: [PATCH] demuxer, decoder not working --- mp4/context.py | 54 ------------------------------- mp4/decoder.py | 46 ++++++++++++++++++++++++++ mp4/demuxer.py | 39 +++++++++++++++++----- mp4/frame.py | 19 +++++++++++ mp4/libav.py | 88 +++++++++++++++++++++++++++++++++++++++++--------- pve.py | 8 +++++ 6 files changed, 176 insertions(+), 78 deletions(-) delete mode 100644 mp4/context.py create mode 100644 mp4/decoder.py create mode 100644 mp4/frame.py diff --git a/mp4/context.py b/mp4/context.py deleted file mode 100644 index c6fbbcd..0000000 --- a/mp4/context.py +++ /dev/null @@ -1,54 +0,0 @@ -# RozK - -from . import libav -from .codec import Codec -from .stream import NullStream, Stream -from .packet import Packet - -class Context: - __slots__ = '_ref' - - def __init__(self): - self._ref = libav.alloc_context() - if not self._ref: - raise MemoryError - - def __del__(self): - if self._ref: - libav.free_context(self._ref) - - @property - def _as_parameter_(self): - return self._ref - - def open_input(self, url): - if not self._ref: - return - errcode = libav.open_input(self._ref, url) - if errcode < 0: - raise Exception(f"Failed to open: {url}") - errcode = libav.find_stream_info(self._ref) - if errcode < 0: - libav.close_input(self._ref) - raise Exception("Failed to find stream info") - - def close_input(self): - if self._ref: - libav.close_input(self._ref) - - def find_stream(self, type): - if not self._ref: - return NullStream() - index, codec_ref = libav.find_best_stream(self._ref, type) - if index < 0 or not codec_ref: - return NullStream() - return Stream(index, Codec(codec_ref)) - - def read_packet(self): - if not self._ref: - return None - packet = Packet() - errcode = libav.read_frame(self._ref, packet) - if errcode < 0: - return None - return packet diff --git a/mp4/decoder.py b/mp4/decoder.py new file mode 100644 index 0000000..6edc54c --- /dev/null +++ b/mp4/decoder.py @@ -0,0 +1,46 @@ +# RozK + +import errno + +from . import libav +from .frame import Frame + +class Decoder: + __slots__ = '_ref' + + def __init__(self, codec): + self._ref = libav.codec_alloc_context(codec) + if not self._ref: + raise MemoryError + errcode = libav.codec_open(self._ref, codec) + if errcode < 0: + libav.codec_free_context(self._ref) + raise Exception("Failed to open codec context") + + def __del__(self): + if self._ref: + libav.codec_free_context(self._ref) + + def _recieve(self, frames): + while True: + frame = Frame() + errcode = libav.codec_receive_frame(self._ref, frame) + if errcode == 0: + frames.append(frame) + elif errcode == errno.EAGAIN: + break + else: + raise Exception(f"Failed to receive frame: {errcode}") + + def decode(self, packet): + if not self._ref: + return None + frames = [] + while True: + errcode = libav.codec_send_packet(self._ref, packet) + if errcode != 0 and errcode != errno.EAGAIN: + raise Exception(f"Failed to send packet: {errcode}") + self._recieve(frames) + if errcode == 0: + break + return frames diff --git a/mp4/demuxer.py b/mp4/demuxer.py index fd6d881..33e9407 100644 --- a/mp4/demuxer.py +++ b/mp4/demuxer.py @@ -1,19 +1,42 @@ # RozK from . import libav -from .context import Context +from .codec import Codec +from .stream import NullStream, Stream +from .packet import Packet class Demuxer: - __slots__ = 'context', 'video_stream', 'audio_stream' + __slots__ = '_ref', 'video_stream', 'audio_stream' def __init__(self, path): - self.context = Context() - self.context.open_input("file:" + path) - self.video_stream = self.context.find_stream(libav.AVMEDIA_TYPE_VIDEO) - self.audio_stream = self.context.find_stream(libav.AVMEDIA_TYPE_AUDIO) + self._ref = libav.format_alloc_context() + if not self._ref: + raise MemoryError + errcode = libav.format_open_input(self._ref, "file:" + path) + if errcode < 0: + raise Exception(f"Failed to open: {path}") + errcode = libav.format_find_stream_info(self._ref) + if errcode < 0: + libav.format_close_input(self._ref) + raise Exception("Failed to find stream info") + self.video_stream = self._find_stream(libav.AVMEDIA_TYPE_VIDEO) + self.audio_stream = self._find_stream(libav.AVMEDIA_TYPE_AUDIO) + + def _find_stream(self, type): + index, codec_ref = libav.format_find_best_stream(self._ref, type) + if index < 0 or not codec_ref: + return NullStream() + return Stream(index, Codec(codec_ref)) def read_packet(self): - return self.context.read_packet() + if not self._ref: + return None + packet = Packet() + errcode = libav.read_frame(self._ref, packet) + if errcode < 0: + return None + return packet def close(self): - self.context.close_input() + if self._ref: + libav.format_close_input(self._ref) diff --git a/mp4/frame.py b/mp4/frame.py new file mode 100644 index 0000000..75c1fb3 --- /dev/null +++ b/mp4/frame.py @@ -0,0 +1,19 @@ +# RozK + +from . import libav + +class Frame: + __slots__ = '_ref' + + def __init__(self): + self._ref = libav.frame_alloc() + if not self._ref: + raise MemoryError + + def __del__(self): + if self._ref: + libav.frame_free(self._ref) + + @property + def _as_parameter_(self): + return self._ref diff --git a/mp4/libav.py b/mp4/libav.py index 3890650..d9e0f0a 100644 --- a/mp4/libav.py +++ b/mp4/libav.py @@ -3,9 +3,28 @@ import ctypes +_avutil = ctypes.cdll.LoadLibrary('libavutil.so') _avformat = ctypes.cdll.LoadLibrary('libavformat.so') _avcodec = ctypes.cdll.LoadLibrary('libavcodec.so') +class AVFrame(ctypes.Structure): + pass + +AVFrame_p = ctypes.POINTER(AVFrame) +AVFrame_pp = ctypes.POINTER(AVFrame_p) + +_avutil.av_frame_alloc.restype = AVFrame_p +_avutil.av_frame_alloc.argtypes = None + +def frame_alloc(): + return _avutil.av_frame_alloc() + +_avutil.av_frame_free.restype = None +_avutil.av_frame_free.argtypes = [AVFrame_pp] + +def frame_free(frame): + _avutil.av_frame_free(ctypes.byref(frame)) + class AVFormatContext(ctypes.Structure): pass @@ -15,37 +34,37 @@ AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p) _avformat.avformat_alloc_context.restype = AVFormatContext_p _avformat.avformat_alloc_context.argtypes = None -def alloc_context(): +def format_alloc_context(): return _avformat.avformat_alloc_context() _avformat.avformat_free_context.restype = None -_avformat.avformat_free_context.argtypes = [AVFormatContext_p] # context +_avformat.avformat_free_context.argtypes = [AVFormatContext_p] -def free_context(context): +def format_free_context(context): _avformat.avformat_free_context(context) _avformat.avformat_open_input.restype = ctypes.c_int _avformat.avformat_open_input.argtypes = [ - AVFormatContext_pp, # context + AVFormatContext_pp, ctypes.c_char_p, # url ctypes.c_void_p, # format ctypes.POINTER(ctypes.c_void_p)] # options -def open_input(context, url): +def format_open_input(context, url): return _avformat.avformat_open_input(ctypes.byref(context), url.encode('ascii', 'ignore'), None, None) _avformat.avformat_close_input.restype = None -_avformat.avformat_close_input.argtypes = [AVFormatContext_pp] # context +_avformat.avformat_close_input.argtypes = [AVFormatContext_pp] -def close_input(context): +def format_close_input(context): _avformat.avformat_close_input(ctypes.byref(context)) _avformat.avformat_find_stream_info.restype = ctypes.c_int _avformat.avformat_find_stream_info.argtypes = [ - AVFormatContext_p, # context + AVFormatContext_p, ctypes.POINTER(ctypes.c_void_p)] # options -def find_stream_info(context): +def format_find_stream_info(context): return _avformat.avformat_find_stream_info(context, None) AVMEDIA_TYPE_UNKNOWN = -1 @@ -66,14 +85,14 @@ AVCodec_pp = ctypes.POINTER(AVCodec_p) _avformat.av_find_best_stream.restype = ctypes.c_int _avformat.av_find_best_stream.argtypes = [ - AVFormatContext_p, # context + AVFormatContext_p, ctypes.c_int, # type ctypes.c_int, # wanted stream ctypes.c_int, # related stream - AVCodec_pp, # decoder + AVCodec_pp, ctypes.c_int] # flags -def find_best_stream(context, type): +def format_find_best_stream(context, type): codec = AVCodec_p() index = _avformat.av_find_best_stream(context, type, -1, -1, ctypes.byref(codec), 0) return index, codec @@ -98,15 +117,52 @@ def packet_alloc(): return _avformat.av_packet_alloc() _avformat.av_packet_free.restype = None -_avformat.av_packet_free.argtypes = [AVPacket_pp] # packet +_avformat.av_packet_free.argtypes = [AVPacket_pp] def packet_free(packet): _avformat.av_packet_free(ctypes.byref(packet)) _avformat.av_read_frame.restype = ctypes.c_int -_avformat.av_read_frame.argtypes = [ - AVFormatContext_p, # context - AVPacket_p] # packet +_avformat.av_read_frame.argtypes = [AVFormatContext_p, AVPacket_p] def read_frame(context, packet): return _avformat.av_read_frame(context, packet) + +class AVCodecContext(ctypes.Structure): + pass + +AVCodecContext_p = ctypes.POINTER(AVCodecContext) +AVCodecContext_pp = ctypes.POINTER(AVCodecContext_p) + +_avcodec.avcodec_alloc_context3.restype = AVCodecContext_p +_avcodec.avcodec_alloc_context3.argtypes = [AVCodec_p] + +def codec_alloc_context(codec): + return _avcodec.avcodec_alloc_context3(codec) + +_avcodec.avcodec_free_context.restype = None +_avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp] + +def codec_free_context(context): + _avcodec.avcodec_free_context(ctypes.byref(context)) + +_avcodec.avcodec_open2.restype = ctypes.c_int +_avcodec.avcodec_open2.argtypes = [ + AVCodecContext_p, + AVCodec_p, + ctypes.POINTER(ctypes.c_void_p)] # options + +def codec_open(context, codec): + return _avcodec.avcodec_open2(context, codec, None) + +_avcodec.avcodec_send_packet.restype = ctypes.c_int +_avcodec.avcodec_send_packet.argtypes = [AVCodecContext_p, AVPacket_p] + +def codec_send_packet(context, packet): + return _avcodec.avcodec_send_packet(context, packet) + +_avcodec.avcodec_receive_frame.restype = ctypes.c_int +_avcodec.avcodec_receive_frame.argtypes = [AVCodecContext_p, AVFrame_p] + +def codec_receive_frame(context, frame): + return _avcodec.avcodec_receive_frame(context, frame) diff --git a/pve.py b/pve.py index b00aea0..9fc9f05 100644 --- a/pve.py +++ b/pve.py @@ -1,19 +1,27 @@ # RozK from mp4.demuxer import Demuxer +from mp4.decoder import Decoder demuxer = Demuxer('test.mp4') print(demuxer.video_stream.codec.name) print(demuxer.audio_stream.codec.name) +video_decoder = Decoder(demuxer.video_stream.codec) +audio_decoder = Decoder(demuxer.audio_stream.codec) + while True: packet = demuxer.read_packet() if packet is None: break if demuxer.video_stream.contains(packet): + video_frames = video_decoder.decode(packet) + print(f"decoded {len(video_frames)} video frames") continue elif demuxer.audio_stream.contains(packet): + audio_frames = audio_decoder.decode(packet) + print(f"decoded {len(audio_frames)} audio frames") continue else: print("unkown packet")