decoder

2025-10-03 16:55:38 +02:00
parent 2316435af8
commit dfd0902256
7 changed files with 122 additions and 47 deletions
--- a/mp4/codec.py
+++ b/mp4/codec.py
@@ -17,4 +17,4 @@ class Codec:
    @property
    def name(self):
-        return self._ref.contents.name
+        return self._ref.contents.name.decode("utf-8")
--- a/mp4/decoder.py
+++ b/mp4/decoder.py
@@ -1,46 +1,48 @@
 # RozK
 import errno
 from . import libav
 from .packet import Packet
 from .frame import Frame
 class Decoder:
-    __slots__ = '_ref'
+    __slots__ = '_context', '_index'
-    def __init__(self, codec):
+    def __init__(self, stream):
-        self._ref = libav.codec_alloc_context(codec)
+        self._context = libav.codec_alloc_context(stream.codec)
-        if not self._ref:
+        if not self._context:
            raise MemoryError
-        errcode = libav.codec_open(self._ref, codec)
+        errcode = libav.codec_parameters_to_context(self._context, stream.parameters)
        if errcode < 0:
-            libav.codec_free_context(self._ref)
+            libav.codec_free_context(self._context)
            raise Exception("Failed to set context parameters")
        errcode = libav.codec_open(self._context, stream.codec)
        if errcode < 0:
            libav.codec_free_context(self._context)
            raise Exception("Failed to open codec context")
        self._index = 0
    def __del__(self):
-        if self._ref:
+        if self._context:
-            libav.codec_free_context(self._ref)
+            libav.codec_free_context(self._context)
-    def _recieve(self, frames):
+    def _receive(self):
        while True:
            frame = Frame()
            errcode = libav.codec_receive_frame(self._ref, frame)
            if errcode == 0:
                frames.append(frame)
            elif errcode == errno.EAGAIN:
                break
            else:
                raise Exception(f"Failed to receive frame: {errcode}")
    def decode(self, packet):
        if not self._ref:
            return None
        frames = []
        while True:
-            errcode = libav.codec_send_packet(self._ref, packet)
+            frame = Frame()
-            if errcode != 0 and errcode != errno.EAGAIN:
+            errcode = libav.codec_receive_frame(self._context, frame)
-                raise Exception(f"Failed to send packet: {errcode}")
+            if errcode in (libav.AVERROR_EOF, libav.AVERROR_EAGAIN):
            self._recieve(frames)
            if errcode == 0:
                break
            elif errcode < 0:
                errstring = libav.strerror(errcode)
                raise Exception(f"Failed to receive frame: {errstring} {errcode} {libav.AVERROR_EAGAIN}")
            frames.append(frame)
        return frames
    def decode(self, packet):
        if not self._context:
            return None
        errcode = libav.codec_send_packet(self._context, packet)
        if errcode < 0:
            errstring = libav.strerror(errcode)
            raise Exception(f"Failed to send packet: {errstring}")
        return self._receive()
--- a/mp4/demuxer.py
+++ b/mp4/demuxer.py
@@ -6,37 +6,38 @@ from .stream import NullStream, Stream
 from .packet import Packet
 class Demuxer:
-    __slots__ = '_ref', 'video_stream', 'audio_stream'
+    __slots__ = '_context', 'video_stream', 'audio_stream'
    def __init__(self, path):
-        self._ref = libav.format_alloc_context()
+        self._context = libav.format_alloc_context()
-        if not self._ref:
+        if not self._context:
            raise MemoryError
-        errcode = libav.format_open_input(self._ref, "file:" + path)
+        errcode = libav.format_open_input(self._context, "file:" + path)
        if errcode < 0:
            raise Exception(f"Failed to open: {path}")
-        errcode = libav.format_find_stream_info(self._ref)
+        errcode = libav.format_find_stream_info(self._context)
        if errcode < 0:
-            libav.format_close_input(self._ref)
+            libav.format_close_input(self._context)
            raise Exception("Failed to find stream info")
        self.video_stream = self._find_stream(libav.AVMEDIA_TYPE_VIDEO)
        self.audio_stream = self._find_stream(libav.AVMEDIA_TYPE_AUDIO)
    def _find_stream(self, type):
-        index, codec_ref = libav.format_find_best_stream(self._ref, type)
+        index, codec_ref = libav.format_find_best_stream(self._context, type)
        if index < 0 or not codec_ref:
            return NullStream()
-        return Stream(index, Codec(codec_ref))
+        parameters = self._context.contents.streams[index].contents.codecpar
        return Stream(index, Codec(codec_ref), parameters)
    def read_packet(self):
-        if not self._ref:
+        if not self._context:
            return None
        packet = Packet()
-        errcode = libav.read_frame(self._ref, packet)
+        errcode = libav.read_frame(self._context, packet)
        if errcode < 0:
            return None
        return packet
    def close(self):
-        if self._ref:
+        if self._context:
-            libav.format_close_input(self._ref)
+            libav.format_close_input(self._context)
--- a/mp4/libav.py
+++ b/mp4/libav.py
@@ -1,12 +1,36 @@
 # RozK
 # https://www.ffmpeg.org/doxygen/trunk/group__libavf.html
 import errno
 import ctypes
 _avutil = ctypes.cdll.LoadLibrary('libavutil.so')
 _avformat = ctypes.cdll.LoadLibrary('libavformat.so')
 _avcodec = ctypes.cdll.LoadLibrary('libavcodec.so')
 def _errtag(a, b, c, d):
    return -(ord(a) | (ord(b) << 8) | (ord(c) << 16) | (ord(d) << 24))
 if errno.EAGAIN < 0:
    AVERROR_EAGAIN = errno.EAGAIN
 else:
    AVERROR_EAGAIN = -errno.EAGAIN
 AVERROR_EOF = _errtag('E', 'O', 'F', ' ')
 AV_ERROR_MAX_STRING_SIZE = 64
 _avutil.av_strerror.restype = ctypes.c_int
 _avutil.av_strerror.argtypes = [
    ctypes.c_int, # errno
    ctypes.c_char_p, # errbuf
    ctypes.c_size_t] # errbuff_size
 def strerror(errno):
    errbuf = ctypes.create_string_buffer(AV_ERROR_MAX_STRING_SIZE)
    _avutil.av_strerror(errno, errbuf, AV_ERROR_MAX_STRING_SIZE)
    return errbuf.value.decode("utf-8")
 class AVFrame(ctypes.Structure):
    pass
@@ -25,9 +49,34 @@ _avutil.av_frame_free.argtypes = [AVFrame_pp]
 def frame_free(frame):
    _avutil.av_frame_free(ctypes.byref(frame))
-class AVFormatContext(ctypes.Structure):
+class AVCodecParameters(ctypes.Structure):
    pass
 AVCodecParameters_p = ctypes.POINTER(AVCodecParameters)
 class AVStream(ctypes.Structure):
    _fields_ = [
        ("av_class", ctypes.c_void_p),
        ("index", ctypes.c_int),
        ("id", ctypes.c_int),
        ("codecpar", AVCodecParameters_p)]
        # ...
 AVStream_p = ctypes.POINTER(AVStream)
 AVStream_pp = ctypes.POINTER(AVStream_p)
 class AVFormatContext(ctypes.Structure):
    _fields_ = [
        ("av_class", ctypes.c_void_p),
        ("iformat", ctypes.c_void_p),
        ("oformat", ctypes.c_void_p),
        ("priv_data", ctypes.c_void_p),
        ("pb", ctypes.c_void_p),
        ("ctx_flags", ctypes.c_int),
        ("nb_streams", ctypes.c_uint),
        ("streams", AVStream_pp)]
        # ...
 AVFormatContext_p = ctypes.POINTER(AVFormatContext)
 AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p)
@@ -146,6 +195,12 @@ _avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp]
 def codec_free_context(context):
    _avcodec.avcodec_free_context(ctypes.byref(context))
 _avcodec.avcodec_parameters_to_context.restype = ctypes.c_int
 _avcodec.avcodec_parameters_to_context.argtypes = [AVCodecContext_p, AVCodecParameters_p]
 def codec_parameters_to_context(context, parameters):
    return _avcodec.avcodec_parameters_to_context(context, parameters)
 _avcodec.avcodec_open2.restype = ctypes.c_int
 _avcodec.avcodec_open2.argtypes = [
    AVCodecContext_p,
--- a/mp4/packet.py
+++ b/mp4/packet.py
@@ -23,3 +23,9 @@ class Packet:
        if self._ref:
            return self._ref.contents.stream_index
        return -1
    @property
    def pts(self):
        if self._ref:
            return self._ref.contents.pts
        return 0
--- a/mp4/stream.py
+++ b/mp4/stream.py
@@ -11,15 +11,20 @@ class NullStream:
    def codec(self):
        return NullCodec()
    @property
    def parameters(self):
        return None
    def contains(self, packet):
        return False
 class Stream:
-    __slots__ = 'index', 'codec'
+    __slots__ = 'index', 'codec', 'parameters'
-    def __init__(self, index, codec):
+    def __init__(self, index, codec, parameters):
        self.index = index
        self.codec = codec
        self.parameters = parameters
    def contains(self, packet):
        return (self.index == packet.stream_index)
--- a/pve.py
+++ b/pve.py
@@ -8,8 +8,8 @@ demuxer = Demuxer('test.mp4')
 print(demuxer.video_stream.codec.name)
 print(demuxer.audio_stream.codec.name)
-video_decoder = Decoder(demuxer.video_stream.codec)
+video_decoder = Decoder(demuxer.video_stream)
-audio_decoder = Decoder(demuxer.audio_stream.codec)
+audio_decoder = Decoder(demuxer.audio_stream)
 while True:
    packet = demuxer.read_packet()
@@ -26,4 +26,10 @@ while True:
    else:
        print("unkown packet")
 video_frames = video_decoder.decode(None)
 print(f"flushed {len(video_frames)} video frames")
 audio_frames = audio_decoder.decode(None)
 print(f"flushed {len(audio_frames)} audio frames")
 demuxer.close()