video_decode_plugin/ios/Classes/VideoDecoder.swift
2025-05-07 15:09:53 +08:00

364 lines
15 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import Foundation
import VideoToolbox
import AVFoundation
/// VideoToolboxH264/H265CVPixelBuffer
class VideoDecoder {
enum CodecType: String {
case h264 = "h264"
case h265 = "h265"
var codecType: CMVideoCodecType {
switch self {
case .h264: return kCMVideoCodecType_H264
case .h265: return kCMVideoCodecType_HEVC
}
}
}
// ====== ======
///
private var decompressionSession: VTDecompressionSession?
///
private var formatDesc: CMVideoFormatDescription?
///
private let width: Int
///
private let height: Int
/// H264/H265
private let codecType: CodecType
/// 线
private let decodeQueue = DispatchQueue(label: "video_decode_plugin.decode.queue")
///
private var isSessionReady = false
/// I
private var lastIFrameSeq: Int?
///
private var frameSeqSet = Set<Int>()
///
private let maxAllowedDelayMs: Int = 350
///
private var timestampBaseMs: Int64?
///
private var firstFrameRelativeTimestamp: Int64?
// ====== ======
/// 线
private let inputQueue = DispatchQueue(label: "video_decode_plugin.input.queue", attributes: .concurrent)
private var inputBuffer: [(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data?, pps: Data?)] = []
private let inputBufferSemaphore = DispatchSemaphore(value: 1)
private let inputBufferMaxCount = 30
/// 线
private let outputQueue = DispatchQueue(label: "video_decode_plugin.output.queue", attributes: .concurrent)
private var outputBuffer: [(pixelBuffer: CVPixelBuffer, timestamp: Int64)] = []
private let outputBufferSemaphore = DispatchSemaphore(value: 1)
private let outputBufferMaxCount = 20
/// 线
private var renderThread: Thread?
/// 线
private var renderThreadRunning = false
///
private var hasNotifiedFlutter = false
///
private var renderFps: Int = 15
/// EMA
private var smoothedFps: Double = 15.0
/// EMA
private let alpha: Double = 0.2
///
private let minFps: Double = 8.0
///
private let maxFps: Double = 30.0
///
private let maxStep: Double = 2.0
///
private var renderedTimestamps: [Int64] = [] // ms
///
private let renderedTimestampsMaxCount = 20
///
private var renderedFrameCount = 0
/// N
private let fpsAdjustInterval = 10
/// CVPixelBuffer
var onFrameDecoded: ((CVPixelBuffer, Int64) -> Void)? = { _, _ in }
/// 线
init(width: Int, height: Int, codecType: String) {
self.width = width
self.height = height
self.codecType = CodecType(rawValue: codecType.lowercased()) ?? .h264
startRenderThread()
}
// ====== ======
///
private func enqueueInput(_ item: (Data, Int, Int64, Int, Int?, Data?, Data?)) {
inputQueue.async(flags: .barrier) {
if self.inputBuffer.count >= self.inputBufferMaxCount {
self.inputBuffer.removeFirst()
}
self.inputBuffer.append(item)
}
}
///
private func dequeueInput() -> (Data, Int, Int64, Int, Int?, Data?, Data?)? {
var item: (Data, Int, Int64, Int, Int?, Data?, Data?)?
inputQueue.sync {
if !self.inputBuffer.isEmpty {
item = self.inputBuffer.removeFirst()
}
}
return item
}
// ====== ======
///
private func enqueueOutput(_ item: (CVPixelBuffer, Int64)) {
outputQueue.async(flags: .barrier) {
if self.outputBuffer.count >= self.outputBufferMaxCount {
self.outputBuffer.removeFirst()
}
self.outputBuffer.append(item)
}
}
///
private func dequeueOutput() -> (CVPixelBuffer, Int64)? {
var item: (CVPixelBuffer, Int64)?
outputQueue.sync {
if !self.outputBuffer.isEmpty {
item = self.outputBuffer.removeFirst()
}
}
return item
}
// ====== 线 ======
/// 线FlutterEMA
private func startRenderThread() {
renderThreadRunning = true
renderThread = Thread { [weak self] in
guard let self = self else { return }
while self.renderThreadRunning {
let frameIntervalMs = Int(1000.0 / self.smoothedFps)
let loopStart = Date().timeIntervalSince1970 * 1000.0
if let (pixelBuffer, timestamp) = self.dequeueOutput() {
// Flutter
DispatchQueue.main.async {
self.onFrameDecoded?(pixelBuffer, timestamp)
}
// Flutter
if !self.hasNotifiedFlutter {
self.hasNotifiedFlutter = true
// onFrameRendered
}
//
self.renderedTimestamps.append(Int64(Date().timeIntervalSince1970 * 1000))
if self.renderedTimestamps.count > self.renderedTimestampsMaxCount {
self.renderedTimestamps.removeFirst()
}
self.renderedFrameCount += 1
if self.renderedFrameCount % self.fpsAdjustInterval == 0 {
let measuredFps = self.calculateDecodeFps()
let newFps = self.updateSmoothedFps(measuredFps)
self.renderFps = newFps
}
}
//
let loopCost = Int(Date().timeIntervalSince1970 * 1000.0 - loopStart)
let sleepMs = frameIntervalMs - loopCost
if sleepMs > 0 {
Thread.sleep(forTimeInterval: Double(sleepMs) / 1000.0)
}
}
}
renderThread?.start()
}
/// 线
private func stopRenderThread() {
renderThreadRunning = false
renderThread?.cancel()
renderThread = nil
}
// ====== EMA ======
/// N
private func calculateDecodeFps() -> Double {
guard renderedTimestamps.count >= 2 else { return smoothedFps }
let first = renderedTimestamps.first!
let last = renderedTimestamps.last!
let frameCount = renderedTimestamps.count - 1
let durationMs = max(last - first, 1)
return Double(frameCount) * 1000.0 / Double(durationMs)
}
/// EMA
private func updateSmoothedFps(_ measuredFps: Double) -> Int {
let safeFps = min(max(measuredFps, minFps), maxFps)
let targetFps = alpha * safeFps + (1 - alpha) * smoothedFps
let delta = targetFps - smoothedFps
let step = min(max(delta, -maxStep), maxStep)
smoothedFps = min(max(smoothedFps + step, minFps), maxFps)
return Int(smoothedFps)
}
/// I
private func setupSession(sps: Data?, pps: Data?) -> Bool {
//
if let session = decompressionSession {
VTDecompressionSessionInvalidate(session)
decompressionSession = nil
}
formatDesc = nil
isSessionReady = false
guard let sps = sps, let pps = pps else {
print("[VideoDecoder] 缺少SPS/PPS无法初始化解码会话")
return false
}
// SPS/PPS
let spsType: UInt8 = sps.count > 0 ? (sps[0] & 0x1F) : 0
let ppsType: UInt8 = pps.count > 0 ? (pps[0] & 0x1F) : 0
if sps.count < 3 || spsType != 7 {
print("[VideoDecoder][错误] SPS内容异常len=\(sps.count), type=\(spsType)")
return false
}
if pps.count < 3 || ppsType != 8 {
print("[VideoDecoder][错误] PPS内容异常len=\(pps.count), type=\(ppsType)")
return false
}
var success = false
sps.withUnsafeBytes { spsPtr in
pps.withUnsafeBytes { ppsPtr in
let parameterSetPointers: [UnsafePointer<UInt8>] = [
spsPtr.baseAddress!.assumingMemoryBound(to: UInt8.self),
ppsPtr.baseAddress!.assumingMemoryBound(to: UInt8.self)
]
let parameterSetSizes: [Int] = [sps.count, pps.count]
let status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
allocator: kCFAllocatorDefault,
parameterSetCount: 2,
parameterSetPointers: parameterSetPointers,
parameterSetSizes: parameterSetSizes,
nalUnitHeaderLength: 4,
formatDescriptionOut: &formatDesc
)
if status != noErr {
print("[VideoDecoder] 创建FormatDescription失败: \(status)")
success = false
} else {
success = true
}
}
}
if !success { return false }
var callback = VTDecompressionOutputCallbackRecord(
decompressionOutputCallback: { (decompressionOutputRefCon, _, status, _, imageBuffer, pts, _) in
let decoder = Unmanaged<VideoDecoder>.fromOpaque(decompressionOutputRefCon!).takeUnretainedValue()
if status == noErr, let pixelBuffer = imageBuffer {
// 线
decoder.enqueueOutput((pixelBuffer, Int64(pts.seconds * 1000)))
} else {
print("[VideoDecoder] 解码回调失败, status=\(status)")
}
},
decompressionOutputRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())
)
let attrs: [NSString: Any] = [
kCVPixelBufferPixelFormatTypeKey: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
kCVPixelBufferWidthKey: width,
kCVPixelBufferHeightKey: height,
kCVPixelBufferOpenGLESCompatibilityKey: true
]
let status2 = VTDecompressionSessionCreate(
allocator: kCFAllocatorDefault,
formatDescription: formatDesc!,
decoderSpecification: nil,
imageBufferAttributes: attrs as CFDictionary,
outputCallback: &callback,
decompressionSessionOut: &decompressionSession
)
if status2 != noErr {
print("[VideoDecoder] 创建解码会话失败: \(status2)")
return false
}
isSessionReady = true
print("[VideoDecoder] 解码会话初始化成功")
return true
}
///
func decodeFrame(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data? = nil, pps: Data? = nil) {
enqueueInput((frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps))
// 线
decodeQueue.async { [weak self] in
guard let self = self else { return }
guard let (frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps) = self.dequeueInput() else { return }
if !self.isSessionReady, let sps = sps, let pps = pps {
guard self.setupSession(sps: sps, pps: pps) else { return }
}
guard let session = self.decompressionSession else { return }
guard frameData.count > 4 else { return }
var avccData = frameData
let naluLen = UInt32(frameData.count - 4).bigEndian
if avccData.count >= 4 {
avccData.replaceSubrange(0..<4, with: withUnsafeBytes(of: naluLen) { Data($0) })
} else {
return
}
var blockBuffer: CMBlockBuffer?
let status = CMBlockBufferCreateWithMemoryBlock(
allocator: kCFAllocatorDefault,
memoryBlock: UnsafeMutableRawPointer(mutating: (avccData as NSData).bytes),
blockLength: avccData.count,
blockAllocator: kCFAllocatorNull,
customBlockSource: nil,
offsetToData: 0,
dataLength: avccData.count,
flags: 0,
blockBufferOut: &blockBuffer
)
if status != kCMBlockBufferNoErr { return }
var sampleBuffer: CMSampleBuffer?
var timing = CMSampleTimingInfo(duration: .invalid, presentationTimeStamp: CMTime(value: timestamp, timescale: 1000), decodeTimeStamp: .invalid)
let status2 = CMSampleBufferCreate(
allocator: kCFAllocatorDefault,
dataBuffer: blockBuffer,
dataReady: true,
makeDataReadyCallback: nil,
refcon: nil,
formatDescription: self.formatDesc,
sampleCount: 1,
sampleTimingEntryCount: 1,
sampleTimingArray: &timing,
sampleSizeEntryCount: 1,
sampleSizeArray: [avccData.count],
sampleBufferOut: &sampleBuffer
)
if status2 != noErr { return }
let decodeFlags: VTDecodeFrameFlags = []
var infoFlags = VTDecodeInfoFlags()
let status3 = VTDecompressionSessionDecodeFrame(
session,
sampleBuffer: sampleBuffer!,
flags: decodeFlags,
frameRefcon: nil,
infoFlagsOut: &infoFlags
)
if status3 != noErr {
print("[VideoDecoder] 解码失败: \(status3)")
}
}
}
///
func release() {
stopRenderThread()
decodeQueue.sync {
if let session = decompressionSession {
VTDecompressionSessionInvalidate(session)
}
decompressionSession = nil
formatDesc = nil
isSessionReady = false
frameSeqSet.removeAll()
lastIFrameSeq = nil
}
inputQueue.async(flags: .barrier) { self.inputBuffer.removeAll() }
outputQueue.async(flags: .barrier) { self.outputBuffer.removeAll() }
print("[VideoDecoder] 解码器已释放")
}
}