Files
rk_pve/pyav/libav.py

415 lines
14 KiB
Python
Raw Normal View History

2025-10-03 18:08:00 +02:00
# People's Video Editor: high quality, GPU accelerated mp4 editor
# Copyright (C) 2025 Roz K <roz@rozk.net>
#
# This file is part of People's Video Editor.
#
# People's Video Editor is free software: you can redistribute it and/or modify it under the terms of the
# GNU General Public License as published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
2025-10-03 18:15:25 +02:00
# People's Video Editor is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
2025-10-03 18:08:00 +02:00
#
2025-10-03 18:15:25 +02:00
# You should have received a copy of the GNU General Public License along with People's Video Editor.
2025-10-03 18:08:00 +02:00
# If not, see <https://www.gnu.org/licenses/>.
2025-10-03 13:25:14 +02:00
2025-10-03 16:55:38 +02:00
import errno
2025-10-03 13:25:14 +02:00
import ctypes
2025-10-04 13:51:41 +02:00
from pathlib import PurePath
2025-10-03 13:25:14 +02:00
2025-10-04 13:51:41 +02:00
shared_library_path = PurePath(__file__).parent
def load_shared_library(name):
path = str(shared_library_path / ("lib" + name + ".so"))
return ctypes.cdll.LoadLibrary(path)
_avutil = load_shared_library("avutil")
_avformat = load_shared_library("avformat")
_avcodec = load_shared_library("avcodec")
2025-10-03 13:25:14 +02:00
2025-10-03 16:55:38 +02:00
def _errtag(a, b, c, d):
return -(ord(a) | (ord(b) << 8) | (ord(c) << 16) | (ord(d) << 24))
if errno.EAGAIN < 0:
AVERROR_EAGAIN = errno.EAGAIN
else:
AVERROR_EAGAIN = -errno.EAGAIN
AVERROR_EOF = _errtag('E', 'O', 'F', ' ')
2025-10-03 17:22:25 +02:00
_AV_ERROR_MAX_STRING_SIZE = 64
2025-10-03 16:55:38 +02:00
2025-10-03 17:22:25 +02:00
AVMEDIA_TYPE_UNKNOWN = -1
AVMEDIA_TYPE_VIDEO = 0
AVMEDIA_TYPE_AUDIO = 1
AVMEDIA_TYPE_DATA = 2
AVMEDIA_TYPE_SUBTITLE = 3
AVMEDIA_TYPE_ATTACHMENT = 4
2025-10-03 16:55:38 +02:00
2025-10-04 04:27:51 +02:00
AV_NUM_DATA_POINTERS = 8
2025-10-04 17:39:37 +02:00
FF_THREAD_FRAME = 1
FF_THREAD_SLICE = 2
2025-10-04 04:27:51 +02:00
c_uint8_p = ctypes.POINTER(ctypes.c_uint8)
c_uint8_pp = ctypes.POINTER(c_uint8_p)
class AVRational(ctypes.Structure):
_fields_ = [
("num", ctypes.c_int),
("den", ctypes.c_int)]
2025-10-03 15:10:49 +02:00
class AVFrame(ctypes.Structure):
2025-10-04 04:27:51 +02:00
_fields_ = [
("data", c_uint8_p * AV_NUM_DATA_POINTERS),
("linesize", ctypes.c_int * AV_NUM_DATA_POINTERS),
("extended_data", c_uint8_pp),
("width", ctypes.c_int),
("height", ctypes.c_int),
("nb_samples", ctypes.c_int),
("format", ctypes.c_int),
("key_frame", ctypes.c_int),
("pict_type", ctypes.c_int),
("sample_aspect_ratio", AVRational),
("pts", ctypes.c_int64),
("pkt_dts", ctypes.c_int64),
("time_base", AVRational)]
# ...
2025-10-03 15:10:49 +02:00
AVFrame_p = ctypes.POINTER(AVFrame)
AVFrame_pp = ctypes.POINTER(AVFrame_p)
2025-10-03 16:55:38 +02:00
class AVCodecParameters(ctypes.Structure):
2025-10-03 13:25:14 +02:00
pass
2025-10-03 16:55:38 +02:00
AVCodecParameters_p = ctypes.POINTER(AVCodecParameters)
class AVStream(ctypes.Structure):
_fields_ = [
("av_class", ctypes.c_void_p),
("index", ctypes.c_int),
("id", ctypes.c_int),
2025-10-03 18:08:09 +02:00
("codecpar", AVCodecParameters_p),
("priv_data", ctypes.c_void_p),
("time_base", AVRational)]
2025-10-03 16:55:38 +02:00
# ...
AVStream_p = ctypes.POINTER(AVStream)
AVStream_pp = ctypes.POINTER(AVStream_p)
class AVFormatContext(ctypes.Structure):
_fields_ = [
("av_class", ctypes.c_void_p),
("iformat", ctypes.c_void_p),
("oformat", ctypes.c_void_p),
("priv_data", ctypes.c_void_p),
("pb", ctypes.c_void_p),
("ctx_flags", ctypes.c_int),
("nb_streams", ctypes.c_uint),
("streams", AVStream_pp)]
# ...
2025-10-03 13:25:14 +02:00
AVFormatContext_p = ctypes.POINTER(AVFormatContext)
AVFormatContext_pp = ctypes.POINTER(AVFormatContext_p)
2025-10-03 17:22:25 +02:00
class AVPacket(ctypes.Structure):
_fields_ = [
("buf", ctypes.c_void_p),
("pts", ctypes.c_int64),
("dts", ctypes.c_int64),
("data", ctypes.c_void_p),
("size", ctypes.c_int),
("stream_index", ctypes.c_int)]
# ...
AVPacket_p = ctypes.POINTER(AVPacket)
AVPacket_pp = ctypes.POINTER(AVPacket_p)
class AVCodec(ctypes.Structure):
_fields_ = [
("name", ctypes.c_char_p),
2025-10-04 14:52:27 +02:00
("long_name", ctypes.c_char_p),
("type", ctypes.c_int), # AVMediaType
("id", ctypes.c_int), # AVCodecID
("capabilities", ctypes.c_int), # AV_CODEC_CAP_*
("max_lowres", ctypes.c_uint8),
("deprecated_supported_framerates", ctypes.c_void_p),
("deprecated_pix_fmts", ctypes.c_void_p),
("deprecated_supported_samplerates", ctypes.c_void_p),
("deprecated_sample_fmts", ctypes.c_void_p),
("priv_class", ctypes.c_void_p),
("profiles", ctypes.c_void_p), # AVProfile
("wrapper_name", ctypes.c_char_p)]
2025-10-03 17:22:25 +02:00
# ...
AVCodec_p = ctypes.POINTER(AVCodec)
AVCodec_pp = ctypes.POINTER(AVCodec_p)
2025-10-04 17:39:37 +02:00
class AVChannelLayout_u(ctypes.Union):
_fields_ = [
("mask", ctypes.c_uint64),
("map", ctypes.c_void_p)] # AVChannelCustom
class AVChannelLayout(ctypes.Structure):
_fields_ = [
("order", ctypes.c_int), # AVChannelOrder
("nb_channels", ctypes.c_int),
("u", AVChannelLayout_u),
("opaque", ctypes.c_void_p)]
2025-10-03 17:22:25 +02:00
class AVCodecContext(ctypes.Structure):
2025-10-04 17:39:37 +02:00
_fields_ = [
("av_class", ctypes.c_void_p),
("log_level_offset", ctypes.c_int),
("codec_type", ctypes.c_int),
("codec", AVCodec_p),
("codec_id", ctypes.c_int),
("codec_tag", ctypes.c_uint),
("priv_data", ctypes.c_void_p),
("internal", ctypes.c_void_p), # AVCodecInternal
("opaque", ctypes.c_void_p),
("bit_rate", ctypes.c_int64),
("flags", ctypes.c_int),
("flags2", ctypes.c_int),
("extradata", ctypes.POINTER(ctypes.c_uint8)),
("extradata_size", ctypes.c_int),
("time_base", AVRational),
("pkt_timebase", AVRational),
("framerate", AVRational),
("delay", ctypes.c_int),
("width", ctypes.c_int),
("height", ctypes.c_int),
("coded_width", ctypes.c_int),
("coded_height", ctypes.c_int),
("sample_aspect_ratio", AVRational),
("pix_fmt", ctypes.c_int), # AVPixelFormat
("sw_pix_fmt", ctypes.c_int), # AVPixelFormat
("color_primaries", ctypes.c_int), # AVColorPrimaries
("color_trc", ctypes.c_int), # AVColorTransferCharacteristic
("colorspace", ctypes.c_int), # AVColorSpace
("color_range", ctypes.c_int), # AVColorRange
("chroma_sample_location", ctypes.c_int), # AVChromaLocation
("field_order", ctypes.c_int), # AVFieldOrder
("refs", ctypes.c_int),
("has_b_frames", ctypes.c_int),
("slice_flags", ctypes.c_int),
("draw_horiz_band", ctypes.c_void_p),
("get_format", ctypes.c_void_p),
("max_b_frames", ctypes.c_int),
("b_quant_factor", ctypes.c_float),
("b_quant_offset", ctypes.c_float),
("i_quant_factor", ctypes.c_float),
("i_quant_offset", ctypes.c_float),
("lumi_masking", ctypes.c_float),
("temporal_cplx_masking", ctypes.c_float),
("spatial_cplx_masking", ctypes.c_float),
("p_masking", ctypes.c_float),
("dark_masking", ctypes.c_float),
("nsse_weight", ctypes.c_int),
("me_cmp", ctypes.c_int),
("me_sub_cmp", ctypes.c_int),
("mb_cmp", ctypes.c_int),
("ildct_cmp", ctypes.c_int),
("dia_size", ctypes.c_int),
("last_predictor_count", ctypes.c_int),
("me_pre_cmp", ctypes.c_int),
("pre_dia_size", ctypes.c_int),
("me_subpel_quality", ctypes.c_int),
("me_range", ctypes.c_int),
("mb_decision", ctypes.c_int),
("intra_matrix", ctypes.POINTER(ctypes.c_uint16)),
("inter_matrix", ctypes.POINTER(ctypes.c_uint16)),
("chroma_intra_matrix", ctypes.POINTER(ctypes.c_uint16)),
("intra_dc_precision", ctypes.c_int),
("mb_lmin", ctypes.c_int),
("mb_lmax", ctypes.c_int),
("bidir_refine", ctypes.c_int),
("keyint_min", ctypes.c_int),
("gop_size", ctypes.c_int),
("mv0_threshold", ctypes.c_int),
("slices", ctypes.c_int),
("sample_rate", ctypes.c_int),
("sample_fmt", ctypes.c_int), # AVSampleFormat
("ch_layout", AVChannelLayout),
("frame_size", ctypes.c_int),
("block_align", ctypes.c_int),
("cutoff", ctypes.c_int),
("audio_service_type", ctypes.c_int), # AVAudioServiceType
("request_sample_fmt", ctypes.c_int), # AVSampleFormat
("initial_padding", ctypes.c_int),
("trailing_padding", ctypes.c_int),
("seek_preroll", ctypes.c_int),
("get_buffer2", ctypes.c_void_p),
("bit_rate_tolerance", ctypes.c_int),
("global_quality", ctypes.c_int),
("compression_level", ctypes.c_int),
("qcompress", ctypes.c_float),
("qblur", ctypes.c_float),
("qmin", ctypes.c_int),
("qmax", ctypes.c_int),
("max_qdiff", ctypes.c_int),
("rc_buffer_size", ctypes.c_int),
("rc_override_count", ctypes.c_int),
("rc_override", ctypes.c_void_p), # RcOverride
("rc_max_rate", ctypes.c_int64),
("rc_min_rate", ctypes.c_int64),
("rc_max_available_vbv_use", ctypes.c_float),
("rc_min_vbv_overflow_use", ctypes.c_float),
("rc_initial_buffer_occupancy", ctypes.c_int),
("trellis", ctypes.c_int),
("stats_out", ctypes.c_char_p),
("stats_in", ctypes.c_char_p),
("workaround_bugs", ctypes.c_int),
("strict_std_compliance", ctypes.c_int),
("error_concealment", ctypes.c_int),
("debug", ctypes.c_int),
("err_recognition", ctypes.c_int),
("hwaccel", ctypes.c_void_p), # AVHWAccel
("hwaccel_context", ctypes.c_void_p),
("hw_frames_ctx", ctypes.c_void_p), # AVBufferRef
("hw_device_ctx", ctypes.c_void_p), # AVBufferRef
("hwaccel_flags", ctypes.c_int),
("extra_hw_frames", ctypes.c_int),
("error", ctypes.c_uint64 * AV_NUM_DATA_POINTERS),
("dct_algo", ctypes.c_int),
("idct_algo", ctypes.c_int),
("bits_per_coded_sample", ctypes.c_int),
("bits_per_raw_sample", ctypes.c_int),
("thread_count", ctypes.c_int),
("thread_type", ctypes.c_int)]
# ...
2025-10-03 17:22:25 +02:00
AVCodecContext_p = ctypes.POINTER(AVCodecContext)
AVCodecContext_pp = ctypes.POINTER(AVCodecContext_p)
_avutil.av_strerror.restype = ctypes.c_int
_avutil.av_strerror.argtypes = [
ctypes.c_int, # errno
ctypes.c_char_p, # errbuf
ctypes.c_size_t] # errbuff_size
def strerror(errno):
errbuf = ctypes.create_string_buffer(_AV_ERROR_MAX_STRING_SIZE)
_avutil.av_strerror(errno, errbuf, _AV_ERROR_MAX_STRING_SIZE)
return errbuf.value.decode("utf-8")
_avutil.av_frame_alloc.restype = AVFrame_p
_avutil.av_frame_alloc.argtypes = None
def frame_alloc():
return _avutil.av_frame_alloc()
_avutil.av_frame_free.restype = None
_avutil.av_frame_free.argtypes = [AVFrame_pp]
def frame_free(frame):
_avutil.av_frame_free(ctypes.byref(frame))
2025-10-03 13:25:14 +02:00
_avformat.avformat_alloc_context.restype = AVFormatContext_p
_avformat.avformat_alloc_context.argtypes = None
2025-10-03 15:10:49 +02:00
def format_alloc_context():
2025-10-03 13:25:14 +02:00
return _avformat.avformat_alloc_context()
_avformat.avformat_free_context.restype = None
2025-10-03 15:10:49 +02:00
_avformat.avformat_free_context.argtypes = [AVFormatContext_p]
2025-10-03 13:25:14 +02:00
2025-10-03 15:10:49 +02:00
def format_free_context(context):
2025-10-03 13:25:14 +02:00
_avformat.avformat_free_context(context)
_avformat.avformat_open_input.restype = ctypes.c_int
_avformat.avformat_open_input.argtypes = [
2025-10-03 15:10:49 +02:00
AVFormatContext_pp,
2025-10-03 13:25:14 +02:00
ctypes.c_char_p, # url
ctypes.c_void_p, # format
ctypes.POINTER(ctypes.c_void_p)] # options
2025-10-03 15:10:49 +02:00
def format_open_input(context, url):
2025-10-03 13:25:14 +02:00
return _avformat.avformat_open_input(ctypes.byref(context), url.encode('ascii', 'ignore'), None, None)
_avformat.avformat_close_input.restype = None
2025-10-03 15:10:49 +02:00
_avformat.avformat_close_input.argtypes = [AVFormatContext_pp]
2025-10-03 13:25:14 +02:00
2025-10-03 15:10:49 +02:00
def format_close_input(context):
2025-10-03 13:25:14 +02:00
_avformat.avformat_close_input(ctypes.byref(context))
_avformat.avformat_find_stream_info.restype = ctypes.c_int
_avformat.avformat_find_stream_info.argtypes = [
2025-10-03 15:10:49 +02:00
AVFormatContext_p,
2025-10-03 13:25:14 +02:00
ctypes.POINTER(ctypes.c_void_p)] # options
2025-10-03 15:10:49 +02:00
def format_find_stream_info(context):
2025-10-03 13:25:14 +02:00
return _avformat.avformat_find_stream_info(context, None)
_avformat.av_find_best_stream.restype = ctypes.c_int
_avformat.av_find_best_stream.argtypes = [
2025-10-03 15:10:49 +02:00
AVFormatContext_p,
2025-10-03 13:25:14 +02:00
ctypes.c_int, # type
ctypes.c_int, # wanted stream
ctypes.c_int, # related stream
2025-10-03 15:10:49 +02:00
AVCodec_pp,
2025-10-03 13:25:14 +02:00
ctypes.c_int] # flags
2025-10-03 15:10:49 +02:00
def format_find_best_stream(context, type):
2025-10-03 13:25:14 +02:00
codec = AVCodec_p()
index = _avformat.av_find_best_stream(context, type, -1, -1, ctypes.byref(codec), 0)
return index, codec
_avformat.av_packet_alloc.restype = AVPacket_p
_avformat.av_packet_alloc.argtypes = None
def packet_alloc():
return _avformat.av_packet_alloc()
_avformat.av_packet_free.restype = None
2025-10-03 15:10:49 +02:00
_avformat.av_packet_free.argtypes = [AVPacket_pp]
2025-10-03 13:25:14 +02:00
def packet_free(packet):
_avformat.av_packet_free(ctypes.byref(packet))
_avformat.av_read_frame.restype = ctypes.c_int
2025-10-03 15:10:49 +02:00
_avformat.av_read_frame.argtypes = [AVFormatContext_p, AVPacket_p]
2025-10-03 13:25:14 +02:00
def read_frame(context, packet):
return _avformat.av_read_frame(context, packet)
2025-10-03 15:10:49 +02:00
_avcodec.avcodec_alloc_context3.restype = AVCodecContext_p
_avcodec.avcodec_alloc_context3.argtypes = [AVCodec_p]
def codec_alloc_context(codec):
return _avcodec.avcodec_alloc_context3(codec)
_avcodec.avcodec_free_context.restype = None
_avcodec.avcodec_free_context.argtypes = [AVCodecContext_pp]
def codec_free_context(context):
_avcodec.avcodec_free_context(ctypes.byref(context))
2025-10-03 16:55:38 +02:00
_avcodec.avcodec_parameters_to_context.restype = ctypes.c_int
_avcodec.avcodec_parameters_to_context.argtypes = [AVCodecContext_p, AVCodecParameters_p]
def codec_parameters_to_context(context, parameters):
return _avcodec.avcodec_parameters_to_context(context, parameters)
2025-10-03 15:10:49 +02:00
_avcodec.avcodec_open2.restype = ctypes.c_int
_avcodec.avcodec_open2.argtypes = [
AVCodecContext_p,
AVCodec_p,
ctypes.POINTER(ctypes.c_void_p)] # options
def codec_open(context, codec):
return _avcodec.avcodec_open2(context, codec, None)
_avcodec.avcodec_send_packet.restype = ctypes.c_int
_avcodec.avcodec_send_packet.argtypes = [AVCodecContext_p, AVPacket_p]
def codec_send_packet(context, packet):
return _avcodec.avcodec_send_packet(context, packet)
_avcodec.avcodec_receive_frame.restype = ctypes.c_int
_avcodec.avcodec_receive_frame.argtypes = [AVCodecContext_p, AVFrame_p]
def codec_receive_frame(context, frame):
return _avcodec.avcodec_receive_frame(context, frame)