From dfd0902256e9b7f1bd14ae39b580906f36e461c7 Mon Sep 17 00:00:00 2001 From: Roz K Date: Fri, 3 Oct 2025 16:55:38 +0200 Subject: [PATCH] decoder --- mp4/codec.py | 2 +- mp4/decoder.py | 60 ++++++++++++++++++++++++++------------------------ mp4/demuxer.py | 25 +++++++++++---------- mp4/libav.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++- mp4/packet.py | 6 +++++ mp4/stream.py | 9 ++++++-- pve.py | 10 +++++++-- 7 files changed, 122 insertions(+), 47 deletions(-) diff --git a/mp4/codec.py b/mp4/codec.py index ccb9e7f..d9c7df5 100644 --- a/mp4/codec.py +++ b/mp4/codec.py @@ -17,4 +17,4 @@ class Codec: @property def name(self): - return self._ref.contents.name + return self._ref.contents.name.decode("utf-8") diff --git a/mp4/decoder.py b/mp4/decoder.py index 6edc54c..c244bc1 100644 --- a/mp4/decoder.py +++ b/mp4/decoder.py @@ -1,46 +1,48 @@ # RozK -import errno - from . import libav +from .packet import Packet from .frame import Frame class Decoder: - __slots__ = '_ref' + __slots__ = '_context', '_index' - def __init__(self, codec): - self._ref = libav.codec_alloc_context(codec) - if not self._ref: + def __init__(self, stream): + self._context = libav.codec_alloc_context(stream.codec) + if not self._context: raise MemoryError - errcode = libav.codec_open(self._ref, codec) + errcode = libav.codec_parameters_to_context(self._context, stream.parameters) if errcode < 0: - libav.codec_free_context(self._ref) + libav.codec_free_context(self._context) + raise Exception("Failed to set context parameters") + errcode = libav.codec_open(self._context, stream.codec) + if errcode < 0: + libav.codec_free_context(self._context) raise Exception("Failed to open codec context") + self._index = 0 def __del__(self): - if self._ref: - libav.codec_free_context(self._ref) + if self._context: + libav.codec_free_context(self._context) - def _recieve(self, frames): - while True: - frame = Frame() - errcode = libav.codec_receive_frame(self._ref, frame) - if errcode == 0: - frames.append(frame) - elif errcode == errno.EAGAIN: - break - else: - raise Exception(f"Failed to receive frame: {errcode}") - - def decode(self, packet): - if not self._ref: - return None + def _receive(self): frames = [] while True: - errcode = libav.codec_send_packet(self._ref, packet) - if errcode != 0 and errcode != errno.EAGAIN: - raise Exception(f"Failed to send packet: {errcode}") - self._recieve(frames) - if errcode == 0: + frame = Frame() + errcode = libav.codec_receive_frame(self._context, frame) + if errcode in (libav.AVERROR_EOF, libav.AVERROR_EAGAIN): break + elif errcode < 0: + errstring = libav.strerror(errcode) + raise Exception(f"Failed to receive frame: {errstring} {errcode} {libav.AVERROR_EAGAIN}") + frames.append(frame) return frames + + def decode(self, packet): + if not self._context: + return None + errcode = libav.codec_send_packet(self._context, packet) + if errcode < 0: + errstring = libav.strerror(errcode) + raise Exception(f"Failed to send packet: {errstring}") + return self._receive() diff --git a/mp4/demuxer.py b/mp4/demuxer.py index 33e9407..4627767 100644 --- a/mp4/demuxer.py +++ b/mp4/demuxer.py @@ -6,37 +6,38 @@ from .stream import NullStream, Stream from .packet import Packet class Demuxer: - __slots__ = '_ref', 'video_stream', 'audio_stream' + __slots__ = '_context', 'video_stream', 'audio_stream' def __init__(self, path): - self._ref = libav.format_alloc_context() - if not self._ref: + self._context = libav.format_alloc_context() + if not self._context: raise MemoryError - errcode = libav.format_open_input(self._ref, "file:" + path) + errcode = libav.format_open_input(self._context, "file:" + path) if errcode < 0: raise Exception(f"Failed to open: {path}") - errcode = libav.format_find_stream_info(self._ref) + errcode = libav.format_find_stream_info(self._context) if errcode < 0: - libav.format_close_input(self._ref) + libav.format_close_input(self._context) raise Exception("Failed to find stream info") self.video_stream = self._find_stream(libav.AVMEDIA_TYPE_VIDEO) self.audio_stream = self._find_stream(libav.AVMEDIA_TYPE_AUDIO) def _find_stream(self, type): - index, codec_ref = libav.format_find_best_stream(self._ref, type) + index, codec_ref = libav.format_find_best_stream(self._context, type) if index < 0 or not codec_ref: return NullStream() - return Stream(index, Codec(codec_ref)) + parameters = self._context.contents.streams[index].contents.codecpar + return Stream(index, Codec(codec_ref), parameters) def read_packet(self): - if not self._ref: + if not self._context: return None packet = Packet() - errcode = libav.read_frame(self._ref, packet) + errcode = libav.read_frame(self._context, packet) if errcode < 0: return None return packet def close(self): - if self._ref: - libav.format_close_input(self._ref) + if self._context: + libav.format_close_input(self._context) diff --git a/mp4/libav.py b/mp4/libav.py index d9e0f0a..b1e06bb 100644 --- a/mp4/libav.py +++ b/mp4/libav.py @@ -1,12 +1,36 @@ # RozK # https://www.ffmpeg.org/doxygen/trunk/group__libavf.html +import errno import ctypes _avutil = ctypes.cdll.LoadLibrary('libavutil.so') _avformat = ctypes.cdll.LoadLibrary('libavformat.so') _avcodec = ctypes.cdll.LoadLibrary('libavcodec.so') +def _errtag(a, b, c, d): + return -(ord(a) | (ord(b) << 8) | (ord(c) << 16) | (ord(d) << 24)) + +if errno.EAGAIN < 0: + AVERROR_EAGAIN = errno.EAGAIN +else: + AVERROR_EAGAIN = -errno.EAGAIN + +AVERROR_EOF = _errtag('E', 'O', 'F', ' ') + +AV_ERROR_MAX_STRING_SIZE = 64 + +_avutil.av_strerror.restype = ctypes.c_int +_avutil.av_strerror.argtypes = [ + ctypes.c_int, # errno + ctypes.c_char_p, # errbuf + ctypes.c_size_t] # errbuff_size + +def strerror(errno): + errbuf = ctypes.create_string_buffer(AV_ERROR_MAX_STRING_SIZE) + _avutil.av_strerror(errno, errbuf, AV_ERROR_MAX_STRING_SIZE) + return errbuf.value.decode("utf-8") + class AVFrame(ctypes.Structure): pass @@ -25,9 +49,34 @@ _avutil.av_frame_free.argtypes = [AVFrame_pp] def frame_free(frame): _avutil.av_frame_free(ctypes.byref(frame)) -class AVFormatContext(ctypes.Structure): +class AVCodecParameters(ctypes.Structure): pass +AVCodecParameters_p = ctypes.POINTER(AVCodecParameters) + +class AVStream(ctypes.Structure): + _fields_ = [ + ("av_class", ctypes.c_void_p), + ("index", ctypes.c_int), + ("id", ctypes.c_int), + ("codecpar", AVCodecParameters_p)] + # ... + +AVStream_p = ctypes.POINTER(AVStream) +AVStream_pp = ctypes.POINTER(AVStream_p) + +class AVFormatContext(ctypes.Structure): + _fields_ = [ + ("av_class", ctypes.c_void_p), + ("iformat", ctypes.c_void_p), + ("oformat", ctypes.c_void_p), + ("priv_data", ctypes.c_void_p), + ("pb", ctypes.c_void_p), + ("ctx_flags", ctypes.c_int), + ("nb_streams", ctypes.c_uint), + ("streams", AVStream_pp)] + # ... + AVFormatContext_p = ctypes.POINTER(AVFormatContext) AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p) @@ -146,6 +195,12 @@ _avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp] def codec_free_context(context): _avcodec.avcodec_free_context(ctypes.byref(context)) +_avcodec.avcodec_parameters_to_context.restype = ctypes.c_int +_avcodec.avcodec_parameters_to_context.argtypes = [AVCodecContext_p, AVCodecParameters_p] + +def codec_parameters_to_context(context, parameters): + return _avcodec.avcodec_parameters_to_context(context, parameters) + _avcodec.avcodec_open2.restype = ctypes.c_int _avcodec.avcodec_open2.argtypes = [ AVCodecContext_p, diff --git a/mp4/packet.py b/mp4/packet.py index c6aceab..193fe2b 100644 --- a/mp4/packet.py +++ b/mp4/packet.py @@ -23,3 +23,9 @@ class Packet: if self._ref: return self._ref.contents.stream_index return -1 + + @property + def pts(self): + if self._ref: + return self._ref.contents.pts + return 0 diff --git a/mp4/stream.py b/mp4/stream.py index 25c0424..0c7dace 100644 --- a/mp4/stream.py +++ b/mp4/stream.py @@ -11,15 +11,20 @@ class NullStream: def codec(self): return NullCodec() + @property + def parameters(self): + return None + def contains(self, packet): return False class Stream: - __slots__ = 'index', 'codec' + __slots__ = 'index', 'codec', 'parameters' - def __init__(self, index, codec): + def __init__(self, index, codec, parameters): self.index = index self.codec = codec + self.parameters = parameters def contains(self, packet): return (self.index == packet.stream_index) diff --git a/pve.py b/pve.py index 9fc9f05..73ab92d 100644 --- a/pve.py +++ b/pve.py @@ -8,8 +8,8 @@ demuxer = Demuxer('test.mp4') print(demuxer.video_stream.codec.name) print(demuxer.audio_stream.codec.name) -video_decoder = Decoder(demuxer.video_stream.codec) -audio_decoder = Decoder(demuxer.audio_stream.codec) +video_decoder = Decoder(demuxer.video_stream) +audio_decoder = Decoder(demuxer.audio_stream) while True: packet = demuxer.read_packet() @@ -26,4 +26,10 @@ while True: else: print("unkown packet") +video_frames = video_decoder.decode(None) +print(f"flushed {len(video_frames)} video frames") + +audio_frames = audio_decoder.decode(None) +print(f"flushed {len(audio_frames)} audio frames") + demuxer.close()