From 639a20bca92ac789407935ecb8590c69475056e9 Mon Sep 17 00:00:00 2001
From: Roz K <roz@rozk.net>
Date: Sat, 4 Oct 2025 17:39:37 +0200
Subject: [PATCH] decoder multithreading

---
 pve.py          |  19 ++++---
 pyav/decoder.py |   5 +-
 pyav/libav.py   | 136 +++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 151 insertions(+), 9 deletions(-)

diff --git a/pve.py b/pve.py
index fa60253..c24f740 100644
--- a/pve.py
+++ b/pve.py
@@ -14,6 +14,7 @@
 # You should have received a copy of the GNU General Public License along with People's Video Editor.
 # If not, see <https://www.gnu.org/licenses/>.
 
+import multiprocessing
 import cProfile
 
 from pyav.demuxer import Demuxer
@@ -25,10 +26,15 @@ print(f"nb_streams  = {demuxer.nb_streams}")
 print(f"video codec = {demuxer.video_stream.codec.description()}")
 print(f"audio codec = {demuxer.audio_stream.codec.description()}")
 
-video_decoder = Decoder(demuxer.video_stream)
-audio_decoder = Decoder(demuxer.audio_stream)
+cpu_count = multiprocessing.cpu_count()
 
-num_frames = 0
+print(f"using {cpu_count} threads for video decoding")
+
+video_decoder = Decoder(demuxer.video_stream, cpu_count)
+audio_decoder = Decoder(demuxer.audio_stream, 1)
+
+num_video_frames = 0
+num_audio_frames = 0
 
 with cProfile.Profile() as pr:
     while True:
@@ -36,13 +42,12 @@ with cProfile.Profile() as pr:
         eof = (packet is None)
         if eof or demuxer.video_stream.contains(packet):
             video_frames = video_decoder.decode(packet)
-            num_frames += len(video_frames)
-            # print(f"decoded {len(video_frames)} video frames")
+            num_video_frames += len(video_frames)
         if eof or demuxer.audio_stream.contains(packet):
             audio_frames = audio_decoder.decode(packet)
-            # print(f"decoded {len(audio_frames)} audio frames")
+            num_audio_frames += len(audio_frames)
         if eof:
             break
 
-print(f"num frames: {num_frames}")
+print(f"num video frames: {num_video_frames}, audio frames: {num_audio_frames}")
 pr.print_stats()
diff --git a/pyav/decoder.py b/pyav/decoder.py
index 54144b3..4517377 100644
--- a/pyav/decoder.py
+++ b/pyav/decoder.py
@@ -21,7 +21,7 @@ from .frame import Frame
 class Decoder:
     __slots__ = '_context'
 
-    def __init__(self, stream):
+    def __init__(self, stream, thread_count):
         self._context = libav.codec_alloc_context(stream.codec)
         if not self._context:
             raise MemoryError
@@ -29,6 +29,9 @@ class Decoder:
         if errcode < 0:
             libav.codec_free_context(self._context)
             raise Exception("Failed to set context parameters")
+        if thread_count > 1:
+            self._context.contents.thread_count = thread_count
+            self._context.contents.thread_type = libav.FF_THREAD_FRAME
         errcode = libav.codec_open(self._context, stream.codec)
         if errcode < 0:
             libav.codec_free_context(self._context)
diff --git a/pyav/libav.py b/pyav/libav.py
index bb8562b..fe47288 100644
--- a/pyav/libav.py
+++ b/pyav/libav.py
@@ -49,6 +49,9 @@ AVMEDIA_TYPE_ATTACHMENT = 4
 
 AV_NUM_DATA_POINTERS = 8
 
+FF_THREAD_FRAME = 1
+FF_THREAD_SLICE = 2
+
 c_uint8_p = ctypes.POINTER(ctypes.c_uint8)
 c_uint8_pp = ctypes.POINTER(c_uint8_p)
 
@@ -143,8 +146,139 @@ class AVCodec(ctypes.Structure):
 AVCodec_p = ctypes.POINTER(AVCodec)
 AVCodec_pp = ctypes.POINTER(AVCodec_p)
 
+class AVChannelLayout_u(ctypes.Union):
+    _fields_ = [
+        ("mask", ctypes.c_uint64),
+        ("map", ctypes.c_void_p)] # AVChannelCustom
+
+class AVChannelLayout(ctypes.Structure):
+    _fields_ = [
+        ("order", ctypes.c_int), # AVChannelOrder
+        ("nb_channels", ctypes.c_int),
+        ("u", AVChannelLayout_u),
+        ("opaque", ctypes.c_void_p)]
+
 class AVCodecContext(ctypes.Structure):
-    pass
+    _fields_ = [
+    ("av_class", ctypes.c_void_p),
+    ("log_level_offset", ctypes.c_int),
+    ("codec_type", ctypes.c_int),
+    ("codec", AVCodec_p),
+    ("codec_id", ctypes.c_int),
+    ("codec_tag", ctypes.c_uint),
+    ("priv_data", ctypes.c_void_p),
+    ("internal", ctypes.c_void_p), # AVCodecInternal
+    ("opaque", ctypes.c_void_p),
+    ("bit_rate", ctypes.c_int64),
+    ("flags", ctypes.c_int),
+    ("flags2", ctypes.c_int),
+    ("extradata", ctypes.POINTER(ctypes.c_uint8)),
+    ("extradata_size", ctypes.c_int),
+    ("time_base", AVRational),
+    ("pkt_timebase", AVRational),
+    ("framerate", AVRational),
+    ("delay", ctypes.c_int),
+    ("width", ctypes.c_int),
+    ("height", ctypes.c_int),
+    ("coded_width", ctypes.c_int),
+    ("coded_height", ctypes.c_int),
+    ("sample_aspect_ratio", AVRational),
+    ("pix_fmt", ctypes.c_int), # AVPixelFormat
+    ("sw_pix_fmt", ctypes.c_int), # AVPixelFormat
+    ("color_primaries", ctypes.c_int), # AVColorPrimaries
+    ("color_trc", ctypes.c_int), # AVColorTransferCharacteristic
+    ("colorspace", ctypes.c_int), # AVColorSpace
+    ("color_range", ctypes.c_int), # AVColorRange
+    ("chroma_sample_location", ctypes.c_int), # AVChromaLocation
+    ("field_order", ctypes.c_int), # AVFieldOrder
+    ("refs", ctypes.c_int),
+    ("has_b_frames", ctypes.c_int),
+    ("slice_flags", ctypes.c_int),
+    ("draw_horiz_band", ctypes.c_void_p),
+    ("get_format", ctypes.c_void_p),
+    ("max_b_frames", ctypes.c_int),
+    ("b_quant_factor", ctypes.c_float),
+    ("b_quant_offset", ctypes.c_float),
+    ("i_quant_factor", ctypes.c_float),
+    ("i_quant_offset", ctypes.c_float),
+    ("lumi_masking", ctypes.c_float),
+    ("temporal_cplx_masking", ctypes.c_float),
+    ("spatial_cplx_masking", ctypes.c_float),
+    ("p_masking", ctypes.c_float),
+    ("dark_masking", ctypes.c_float),
+    ("nsse_weight", ctypes.c_int),
+    ("me_cmp", ctypes.c_int),
+    ("me_sub_cmp", ctypes.c_int),
+    ("mb_cmp", ctypes.c_int),
+    ("ildct_cmp", ctypes.c_int),
+    ("dia_size", ctypes.c_int),
+    ("last_predictor_count", ctypes.c_int),
+    ("me_pre_cmp", ctypes.c_int),
+    ("pre_dia_size", ctypes.c_int),
+    ("me_subpel_quality", ctypes.c_int),
+    ("me_range", ctypes.c_int),
+    ("mb_decision", ctypes.c_int),
+    ("intra_matrix", ctypes.POINTER(ctypes.c_uint16)),
+    ("inter_matrix", ctypes.POINTER(ctypes.c_uint16)),
+    ("chroma_intra_matrix", ctypes.POINTER(ctypes.c_uint16)),
+    ("intra_dc_precision", ctypes.c_int),
+    ("mb_lmin", ctypes.c_int),
+    ("mb_lmax", ctypes.c_int),
+    ("bidir_refine", ctypes.c_int),
+    ("keyint_min", ctypes.c_int),
+    ("gop_size", ctypes.c_int),
+    ("mv0_threshold", ctypes.c_int),
+    ("slices", ctypes.c_int),
+    ("sample_rate", ctypes.c_int),
+    ("sample_fmt", ctypes.c_int), # AVSampleFormat
+    ("ch_layout", AVChannelLayout),
+    ("frame_size", ctypes.c_int),
+    ("block_align", ctypes.c_int),
+    ("cutoff", ctypes.c_int),
+    ("audio_service_type", ctypes.c_int), # AVAudioServiceType
+    ("request_sample_fmt", ctypes.c_int), # AVSampleFormat
+    ("initial_padding", ctypes.c_int),
+    ("trailing_padding", ctypes.c_int),
+    ("seek_preroll", ctypes.c_int),
+    ("get_buffer2", ctypes.c_void_p),
+    ("bit_rate_tolerance", ctypes.c_int),
+    ("global_quality", ctypes.c_int),
+    ("compression_level", ctypes.c_int),
+    ("qcompress", ctypes.c_float),
+    ("qblur", ctypes.c_float),
+    ("qmin", ctypes.c_int),
+    ("qmax", ctypes.c_int),
+    ("max_qdiff", ctypes.c_int),
+    ("rc_buffer_size", ctypes.c_int),
+    ("rc_override_count", ctypes.c_int),
+    ("rc_override", ctypes.c_void_p), # RcOverride
+    ("rc_max_rate", ctypes.c_int64),
+    ("rc_min_rate", ctypes.c_int64),
+    ("rc_max_available_vbv_use", ctypes.c_float),
+    ("rc_min_vbv_overflow_use", ctypes.c_float),
+    ("rc_initial_buffer_occupancy", ctypes.c_int),
+    ("trellis", ctypes.c_int),
+    ("stats_out", ctypes.c_char_p),
+    ("stats_in", ctypes.c_char_p),
+    ("workaround_bugs", ctypes.c_int),
+    ("strict_std_compliance", ctypes.c_int),
+    ("error_concealment", ctypes.c_int),
+    ("debug", ctypes.c_int),
+    ("err_recognition", ctypes.c_int),
+    ("hwaccel", ctypes.c_void_p), # AVHWAccel
+    ("hwaccel_context", ctypes.c_void_p),
+    ("hw_frames_ctx", ctypes.c_void_p), # AVBufferRef
+    ("hw_device_ctx", ctypes.c_void_p), # AVBufferRef
+    ("hwaccel_flags", ctypes.c_int),
+    ("extra_hw_frames", ctypes.c_int),
+    ("error", ctypes.c_uint64 * AV_NUM_DATA_POINTERS),
+    ("dct_algo", ctypes.c_int),
+    ("idct_algo", ctypes.c_int),
+    ("bits_per_coded_sample", ctypes.c_int),
+    ("bits_per_raw_sample", ctypes.c_int),
+    ("thread_count", ctypes.c_int),
+    ("thread_type", ctypes.c_int)]
+    # ...
 
 AVCodecContext_p = ctypes.POINTER(AVCodecContext)
 AVCodecContext_pp = ctypes.POINTER(AVCodecContext_p)