decoder

2025-10-03 16:55:38 +02:00
parent 2316435af8
commit dfd0902256
7 changed files with 122 additions and 47 deletions
--- a/mp4/codec.py
+++ b/mp4/codec.py
@ -17,4 +17,4 @@ class Codec:

    @property
    def name(self):
-        return self._ref.contents.name
+        return self._ref.contents.name.decode("utf-8")
--- a/mp4/decoder.py
+++ b/mp4/decoder.py
@ -1,46 +1,48 @@
 # RozK

-import errno
-
 from . import libav
+from .packet import Packet
 from .frame import Frame

 class Decoder:
-    __slots__ = '_ref'
+    __slots__ = '_context', '_index'

-    def __init__(self, codec):
-        self._ref = libav.codec_alloc_context(codec)
-        if not self._ref:
+    def __init__(self, stream):
+        self._context = libav.codec_alloc_context(stream.codec)
+        if not self._context:
            raise MemoryError
-        errcode = libav.codec_open(self._ref, codec)
+        errcode = libav.codec_parameters_to_context(self._context, stream.parameters)
        if errcode < 0:
-            libav.codec_free_context(self._ref)
+            libav.codec_free_context(self._context)
+            raise Exception("Failed to set context parameters")
+        errcode = libav.codec_open(self._context, stream.codec)
+        if errcode < 0:
+            libav.codec_free_context(self._context)
            raise Exception("Failed to open codec context")
+        self._index = 0

    def __del__(self):
-        if self._ref:
-            libav.codec_free_context(self._ref)
+        if self._context:
+            libav.codec_free_context(self._context)

-    def _recieve(self, frames):
-        while True:
-            frame = Frame()
-            errcode = libav.codec_receive_frame(self._ref, frame)
-            if errcode == 0:
-                frames.append(frame)
-            elif errcode == errno.EAGAIN:
-                break
-            else:
-                raise Exception(f"Failed to receive frame: {errcode}")
-
-    def decode(self, packet):
-        if not self._ref:
-            return None
+    def _receive(self):
        frames = []
        while True:
-            errcode = libav.codec_send_packet(self._ref, packet)
-            if errcode != 0 and errcode != errno.EAGAIN:
-                raise Exception(f"Failed to send packet: {errcode}")
-            self._recieve(frames)
-            if errcode == 0:
+            frame = Frame()
+            errcode = libav.codec_receive_frame(self._context, frame)
+            if errcode in (libav.AVERROR_EOF, libav.AVERROR_EAGAIN):
                break
+            elif errcode < 0:
+                errstring = libav.strerror(errcode)
+                raise Exception(f"Failed to receive frame: {errstring} {errcode} {libav.AVERROR_EAGAIN}")
+            frames.append(frame)
        return frames
+
+    def decode(self, packet):
+        if not self._context:
+            return None
+        errcode = libav.codec_send_packet(self._context, packet)
+        if errcode < 0:
+            errstring = libav.strerror(errcode)
+            raise Exception(f"Failed to send packet: {errstring}")
+        return self._receive()
--- a/mp4/demuxer.py
+++ b/mp4/demuxer.py
@ -6,37 +6,38 @@ from .stream import NullStream, Stream
 from .packet import Packet

 class Demuxer:
-    __slots__ = '_ref', 'video_stream', 'audio_stream'
+    __slots__ = '_context', 'video_stream', 'audio_stream'

    def __init__(self, path):
-        self._ref = libav.format_alloc_context()
-        if not self._ref:
+        self._context = libav.format_alloc_context()
+        if not self._context:
            raise MemoryError
-        errcode = libav.format_open_input(self._ref, "file:" + path)
+        errcode = libav.format_open_input(self._context, "file:" + path)
        if errcode < 0:
            raise Exception(f"Failed to open: {path}")
-        errcode = libav.format_find_stream_info(self._ref)
+        errcode = libav.format_find_stream_info(self._context)
        if errcode < 0:
-            libav.format_close_input(self._ref)
+            libav.format_close_input(self._context)
            raise Exception("Failed to find stream info")
        self.video_stream = self._find_stream(libav.AVMEDIA_TYPE_VIDEO)
        self.audio_stream = self._find_stream(libav.AVMEDIA_TYPE_AUDIO)

    def _find_stream(self, type):
-        index, codec_ref = libav.format_find_best_stream(self._ref, type)
+        index, codec_ref = libav.format_find_best_stream(self._context, type)
        if index < 0 or not codec_ref:
            return NullStream()
-        return Stream(index, Codec(codec_ref))
+        parameters = self._context.contents.streams[index].contents.codecpar
+        return Stream(index, Codec(codec_ref), parameters)

    def read_packet(self):
-        if not self._ref:
+        if not self._context:
            return None
        packet = Packet()
-        errcode = libav.read_frame(self._ref, packet)
+        errcode = libav.read_frame(self._context, packet)
        if errcode < 0:
            return None
        return packet

    def close(self):
-        if self._ref:
-            libav.format_close_input(self._ref)
+        if self._context:
+            libav.format_close_input(self._context)
--- a/mp4/libav.py
+++ b/mp4/libav.py
@ -1,12 +1,36 @@
 # RozK
 # https://www.ffmpeg.org/doxygen/trunk/group__libavf.html

+import errno
 import ctypes

 _avutil = ctypes.cdll.LoadLibrary('libavutil.so')
 _avformat = ctypes.cdll.LoadLibrary('libavformat.so')
 _avcodec = ctypes.cdll.LoadLibrary('libavcodec.so')

+def _errtag(a, b, c, d):
+    return -(ord(a) | (ord(b) << 8) | (ord(c) << 16) | (ord(d) << 24))
+
+if errno.EAGAIN < 0:
+    AVERROR_EAGAIN = errno.EAGAIN
+else:
+    AVERROR_EAGAIN = -errno.EAGAIN
+
+AVERROR_EOF = _errtag('E', 'O', 'F', ' ')
+
+AV_ERROR_MAX_STRING_SIZE = 64
+
+_avutil.av_strerror.restype = ctypes.c_int
+_avutil.av_strerror.argtypes = [
+    ctypes.c_int, # errno
+    ctypes.c_char_p, # errbuf
+    ctypes.c_size_t] # errbuff_size
+
+def strerror(errno):
+    errbuf = ctypes.create_string_buffer(AV_ERROR_MAX_STRING_SIZE)
+    _avutil.av_strerror(errno, errbuf, AV_ERROR_MAX_STRING_SIZE)
+    return errbuf.value.decode("utf-8")
+
 class AVFrame(ctypes.Structure):
    pass

@ -25,9 +49,34 @@ _avutil.av_frame_free.argtypes = [AVFrame_pp]
 def frame_free(frame):
    _avutil.av_frame_free(ctypes.byref(frame))

-class AVFormatContext(ctypes.Structure):
+class AVCodecParameters(ctypes.Structure):
    pass

+AVCodecParameters_p = ctypes.POINTER(AVCodecParameters)
+
+class AVStream(ctypes.Structure):
+    _fields_ = [
+        ("av_class", ctypes.c_void_p),
+        ("index", ctypes.c_int),
+        ("id", ctypes.c_int),
+        ("codecpar", AVCodecParameters_p)]
+        # ...
+
+AVStream_p = ctypes.POINTER(AVStream)
+AVStream_pp = ctypes.POINTER(AVStream_p)
+
+class AVFormatContext(ctypes.Structure):
+    _fields_ = [
+        ("av_class", ctypes.c_void_p),
+        ("iformat", ctypes.c_void_p),
+        ("oformat", ctypes.c_void_p),
+        ("priv_data", ctypes.c_void_p),
+        ("pb", ctypes.c_void_p),
+        ("ctx_flags", ctypes.c_int),
+        ("nb_streams", ctypes.c_uint),
+        ("streams", AVStream_pp)]
+        # ...
+
 AVFormatContext_p = ctypes.POINTER(AVFormatContext)
 AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p)

@ -146,6 +195,12 @@ _avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp]
 def codec_free_context(context):
    _avcodec.avcodec_free_context(ctypes.byref(context))

+_avcodec.avcodec_parameters_to_context.restype = ctypes.c_int
+_avcodec.avcodec_parameters_to_context.argtypes = [AVCodecContext_p, AVCodecParameters_p]
+
+def codec_parameters_to_context(context, parameters):
+    return _avcodec.avcodec_parameters_to_context(context, parameters)
+
 _avcodec.avcodec_open2.restype = ctypes.c_int
 _avcodec.avcodec_open2.argtypes = [
    AVCodecContext_p,
--- a/mp4/packet.py
+++ b/mp4/packet.py
@ -23,3 +23,9 @@ class Packet:
        if self._ref:
            return self._ref.contents.stream_index
        return -1
+
+    @property
+    def pts(self):
+        if self._ref:
+            return self._ref.contents.pts
+        return 0
--- a/mp4/stream.py
+++ b/mp4/stream.py
@ -11,15 +11,20 @@ class NullStream:
    def codec(self):
        return NullCodec()

+    @property
+    def parameters(self):
+        return None
+
    def contains(self, packet):
        return False

 class Stream:
-    __slots__ = 'index', 'codec'
+    __slots__ = 'index', 'codec', 'parameters'

-    def __init__(self, index, codec):
+    def __init__(self, index, codec, parameters):
        self.index = index
        self.codec = codec
+        self.parameters = parameters

    def contains(self, packet):
        return (self.index == packet.stream_index)
--- a/pve.py
+++ b/pve.py
@ -8,8 +8,8 @@ demuxer = Demuxer('test.mp4')
 print(demuxer.video_stream.codec.name)
 print(demuxer.audio_stream.codec.name)

-video_decoder = Decoder(demuxer.video_stream.codec)
-audio_decoder = Decoder(demuxer.audio_stream.codec)
+video_decoder = Decoder(demuxer.video_stream)
+audio_decoder = Decoder(demuxer.audio_stream)

 while True:
    packet = demuxer.read_packet()
@ -26,4 +26,10 @@ while True:
    else:
        print("unkown packet")

+video_frames = video_decoder.decode(None)
+print(f"flushed {len(video_frames)} video frames")
+
+audio_frames = audio_decoder.decode(None)
+print(f"flushed {len(audio_frames)} audio frames")
+
 demuxer.close()