video_decode_plugin/ios/Classes/VideoDecoder.swift

509 lines
21 KiB
Swift
Raw Normal View History

2025-05-07 15:07:36 +08:00
import Foundation
import VideoToolbox
import AVFoundation
/// VideoToolboxH264/H265CVPixelBuffer
class VideoDecoder {
enum CodecType: String {
case h264 = "h264"
case h265 = "h265"
var codecType: CMVideoCodecType {
switch self {
case .h264: return kCMVideoCodecType_H264
case .h265: return kCMVideoCodecType_HEVC
}
}
}
2025-05-07 15:09:53 +08:00
// ====== ======
///
2025-05-07 15:07:36 +08:00
private var decompressionSession: VTDecompressionSession?
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private var formatDesc: CMVideoFormatDescription?
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private let width: Int
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private let height: Int
2025-05-07 15:09:53 +08:00
/// H264/H265
2025-05-07 15:07:36 +08:00
private let codecType: CodecType
2025-05-07 15:09:53 +08:00
/// 线
2025-05-07 15:07:36 +08:00
private let decodeQueue = DispatchQueue(label: "video_decode_plugin.decode.queue")
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private var isSessionReady = false
2025-05-07 15:09:53 +08:00
/// I
2025-05-07 15:07:36 +08:00
private var lastIFrameSeq: Int?
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private var frameSeqSet = Set<Int>()
2025-05-07 15:09:53 +08:00
///
2025-06-25 18:47:40 +08:00
private let maxAllowedDelayMs: Int64 = 750 // Android
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private var timestampBaseMs: Int64?
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private var firstFrameRelativeTimestamp: Int64?
2025-06-25 18:47:40 +08:00
// ====== ======
2025-05-07 15:09:53 +08:00
/// 线
2025-05-07 15:07:36 +08:00
private let inputQueue = DispatchQueue(label: "video_decode_plugin.input.queue", attributes: .concurrent)
private var inputBuffer: [(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data?, pps: Data?)] = []
private let inputBufferSemaphore = DispatchSemaphore(value: 1)
2025-06-25 18:47:40 +08:00
private let inputBufferMaxCount = 100 // Android
2025-05-07 15:09:53 +08:00
/// 线
2025-05-07 15:07:36 +08:00
private let outputQueue = DispatchQueue(label: "video_decode_plugin.output.queue", attributes: .concurrent)
private var outputBuffer: [(pixelBuffer: CVPixelBuffer, timestamp: Int64)] = []
private let outputBufferSemaphore = DispatchSemaphore(value: 1)
2025-06-25 18:47:40 +08:00
private let outputBufferMaxCount = 100 // Android
///
private var renderTimer: DispatchSourceTimer?
2025-05-07 15:09:53 +08:00
/// 线
2025-05-07 15:07:36 +08:00
private var renderThreadRunning = false
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private var hasNotifiedFlutter = false
2025-05-07 15:09:53 +08:00
///
2025-06-25 18:47:40 +08:00
private var renderFps: Int = 20 // Android
///
private var renderIntervalMs: Int64 = 0
///
private let renderJitterMs: Int64 = 2
///
private var lastRenderTimeMs: Int64 = 0
///
private var renderStarted = false
// ====== ======
///
private var reorderBuffer: [Int: (frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data?, pps: Data?)] = [:]
/// I
private var receivedIFrames = Set<Int>()
///
private let reorderLock = NSLock()
///
private let maxReorderBufferSize = 100 //
2025-05-07 15:07:36 +08:00
2025-05-07 15:09:53 +08:00
/// CVPixelBuffer
2025-05-07 15:07:36 +08:00
var onFrameDecoded: ((CVPixelBuffer, Int64) -> Void)? = { _, _ in }
2025-05-07 15:09:53 +08:00
/// 线
2025-05-07 15:07:36 +08:00
init(width: Int, height: Int, codecType: String) {
self.width = width
self.height = height
self.codecType = CodecType(rawValue: codecType.lowercased()) ?? .h264
2025-06-25 18:47:40 +08:00
self.renderIntervalMs = Int64(1000.0 / Double(renderFps))
startRenderTimer()
print("[VideoDecoder] 初始化解码器: width=\(width), height=\(height)")
2025-05-07 15:07:36 +08:00
}
2025-05-07 15:09:53 +08:00
// ====== ======
///
2025-05-07 15:07:36 +08:00
private func enqueueInput(_ item: (Data, Int, Int64, Int, Int?, Data?, Data?)) {
inputQueue.async(flags: .barrier) {
if self.inputBuffer.count >= self.inputBufferMaxCount {
2025-06-25 18:47:40 +08:00
self.inputBuffer.removeFirst() //
print("[VideoDecoder][警告] 输入缓冲区满,丢弃最旧帧")
2025-05-07 15:07:36 +08:00
}
self.inputBuffer.append(item)
}
}
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private func dequeueInput() -> (Data, Int, Int64, Int, Int?, Data?, Data?)? {
var item: (Data, Int, Int64, Int, Int?, Data?, Data?)?
inputQueue.sync {
if !self.inputBuffer.isEmpty {
item = self.inputBuffer.removeFirst()
}
}
return item
}
2025-05-07 15:09:53 +08:00
// ====== ======
///
2025-05-07 15:07:36 +08:00
private func enqueueOutput(_ item: (CVPixelBuffer, Int64)) {
outputQueue.async(flags: .barrier) {
if self.outputBuffer.count >= self.outputBufferMaxCount {
2025-06-25 18:47:40 +08:00
self.outputBuffer.removeFirst() //
print("[VideoDecoder][警告] 输出缓冲区满,丢弃最旧帧")
2025-05-07 15:07:36 +08:00
}
self.outputBuffer.append(item)
}
}
2025-05-07 15:09:53 +08:00
///
2025-05-07 15:07:36 +08:00
private func dequeueOutput() -> (CVPixelBuffer, Int64)? {
var item: (CVPixelBuffer, Int64)?
outputQueue.sync {
if !self.outputBuffer.isEmpty {
item = self.outputBuffer.removeFirst()
}
}
return item
}
2025-06-25 18:47:40 +08:00
// ====== ======
///
private func handleFrameReordering(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data?, pps: Data?) -> Bool {
reorderLock.lock()
defer { reorderLock.unlock() }
// 1.
let now = Int64(Date().timeIntervalSince1970 * 1000)
let base = timestampBaseMs ?? 0
let firstRel = firstFrameRelativeTimestamp ?? 0
let absTimestamp = base + (timestamp - firstRel)
if absTimestamp < now - maxAllowedDelayMs {
print("[VideoDecoder][警告] 丢弃延迟帧: type=\(frameType), seq=\(frameSeq), delay=\(now - absTimestamp)ms")
return false
}
// 2.
if frameType == 0 { // I
receivedIFrames.insert(frameSeq)
lastIFrameSeq = frameSeq
// I
enqueueInput((frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps))
// IP
let readyPFrames = reorderBuffer.values
.filter { $0.refIFrameSeq == frameSeq }
.sorted { $0.frameSeq < $1.frameSeq }
for pFrame in readyPFrames {
enqueueInput(pFrame)
reorderBuffer.removeValue(forKey: pFrame.frameSeq)
}
if !readyPFrames.isEmpty {
print("[VideoDecoder] I帧\(frameSeq)释放\(readyPFrames.count)个P帧")
}
//
if reorderBuffer.count > maxReorderBufferSize {
let toRemove = reorderBuffer.keys.sorted().prefix(reorderBuffer.count - maxReorderBufferSize)
for seq in toRemove {
reorderBuffer.removeValue(forKey: seq)
}
print("[VideoDecoder][警告] 重排序缓冲区溢出,清理\(toRemove.count)个P帧")
}
return true
} else { // P
// P
if let refIFrameSeq = refIFrameSeq, receivedIFrames.contains(refIFrameSeq) {
// I
enqueueInput((frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps))
return true
} else {
// IP
reorderBuffer[frameSeq] = (frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps)
print("[VideoDecoder] P帧\(frameSeq)缓存等待I帧\(refIFrameSeq ?? -1)")
//
if reorderBuffer.count > maxReorderBufferSize {
let toRemove = reorderBuffer.keys.sorted().prefix(reorderBuffer.count - maxReorderBufferSize)
for seq in toRemove {
reorderBuffer.removeValue(forKey: seq)
}
print("[VideoDecoder][警告] 重排序缓冲区溢出,清理\(toRemove.count)个P帧")
}
return false
}
}
}
// ====== ======
/// Flutter
private func startRenderTimer() {
2025-05-07 15:07:36 +08:00
renderThreadRunning = true
2025-06-25 18:47:40 +08:00
let timer = DispatchSource.makeTimerSource(queue: DispatchQueue.global())
timer.schedule(deadline: .now(), repeating: .milliseconds(Int(renderIntervalMs / 2)))
timer.setEventHandler { [weak self] in
2025-05-07 15:07:36 +08:00
guard let self = self else { return }
2025-06-25 18:47:40 +08:00
let now = Int64(Date().timeIntervalSince1970 * 1000)
//
let timeSinceLastRender = now - self.lastRenderTimeMs
if timeSinceLastRender < self.renderIntervalMs - self.renderJitterMs {
return
}
//
if !self.renderStarted {
var outputCount = 0
self.outputQueue.sync { outputCount = self.outputBuffer.count }
if outputCount >= Int(Double(self.outputBufferMaxCount) * 0.15) {
self.renderStarted = true
print("[VideoDecoder] 渲染启动outputBuffer已达低水位: \(outputCount)")
} else {
//
return
2025-05-07 15:07:36 +08:00
}
2025-06-25 18:47:40 +08:00
}
if let (pixelBuffer, timestamp) = self.dequeueOutput() {
//
let now = Int64(Date().timeIntervalSince1970 * 1000)
let base = timestampBaseMs ?? 0
let firstRel = firstFrameRelativeTimestamp ?? 0
let absTimestamp = base + (timestamp - firstRel)
if absTimestamp < now - self.maxAllowedDelayMs {
print("[VideoDecoder][警告] 丢弃延迟渲染帧: delay=\(now - absTimestamp)ms")
return
}
DispatchQueue.main.async {
self.onFrameDecoded?(pixelBuffer, timestamp)
}
self.lastRenderTimeMs = now
if !self.hasNotifiedFlutter {
self.hasNotifiedFlutter = true
2025-05-07 15:07:36 +08:00
}
}
}
2025-06-25 18:47:40 +08:00
timer.resume()
renderTimer = timer
2025-05-07 15:07:36 +08:00
}
2025-06-25 18:47:40 +08:00
///
private func stopRenderTimer() {
2025-05-07 15:07:36 +08:00
renderThreadRunning = false
2025-06-25 18:47:40 +08:00
renderTimer?.cancel()
renderTimer = nil
2025-05-07 15:07:36 +08:00
}
2025-06-25 18:47:40 +08:00
2025-05-07 15:07:36 +08:00
/// I
private func setupSession(sps: Data?, pps: Data?) -> Bool {
//
if let session = decompressionSession {
VTDecompressionSessionInvalidate(session)
decompressionSession = nil
}
formatDesc = nil
isSessionReady = false
guard let sps = sps, let pps = pps else {
2025-06-25 18:47:40 +08:00
print("[VideoDecoder][错误] 缺少SPS/PPS无法初始化解码会话")
2025-05-07 15:07:36 +08:00
return false
}
2025-06-25 18:47:40 +08:00
2025-05-07 15:07:36 +08:00
// SPS/PPS
let spsType: UInt8 = sps.count > 0 ? (sps[0] & 0x1F) : 0
let ppsType: UInt8 = pps.count > 0 ? (pps[0] & 0x1F) : 0
if sps.count < 3 || spsType != 7 {
print("[VideoDecoder][错误] SPS内容异常len=\(sps.count), type=\(spsType)")
return false
}
if pps.count < 3 || ppsType != 8 {
print("[VideoDecoder][错误] PPS内容异常len=\(pps.count), type=\(ppsType)")
return false
}
2025-06-25 18:47:40 +08:00
2025-05-07 15:07:36 +08:00
var success = false
sps.withUnsafeBytes { spsPtr in
pps.withUnsafeBytes { ppsPtr in
let parameterSetPointers: [UnsafePointer<UInt8>] = [
spsPtr.baseAddress!.assumingMemoryBound(to: UInt8.self),
ppsPtr.baseAddress!.assumingMemoryBound(to: UInt8.self)
]
let parameterSetSizes: [Int] = [sps.count, pps.count]
let status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
allocator: kCFAllocatorDefault,
parameterSetCount: 2,
parameterSetPointers: parameterSetPointers,
parameterSetSizes: parameterSetSizes,
nalUnitHeaderLength: 4,
formatDescriptionOut: &formatDesc
)
if status != noErr {
2025-06-25 18:47:40 +08:00
print("[VideoDecoder][错误] 创建FormatDescription失败: \(status)")
2025-05-07 15:07:36 +08:00
success = false
} else {
success = true
}
}
}
if !success { return false }
var callback = VTDecompressionOutputCallbackRecord(
decompressionOutputCallback: { (decompressionOutputRefCon, _, status, _, imageBuffer, pts, _) in
let decoder = Unmanaged<VideoDecoder>.fromOpaque(decompressionOutputRefCon!).takeUnretainedValue()
if status == noErr, let pixelBuffer = imageBuffer {
// 线
decoder.enqueueOutput((pixelBuffer, Int64(pts.seconds * 1000)))
} else {
2025-06-25 18:47:40 +08:00
print("[VideoDecoder][错误] 解码回调失败: \(status)")
2025-05-07 15:07:36 +08:00
}
},
decompressionOutputRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())
)
let attrs: [NSString: Any] = [
kCVPixelBufferPixelFormatTypeKey: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
kCVPixelBufferWidthKey: width,
kCVPixelBufferHeightKey: height,
2025-06-25 18:47:40 +08:00
kCVPixelBufferOpenGLESCompatibilityKey: true,
kCVPixelBufferIOSurfacePropertiesKey: [:]
2025-05-07 15:07:36 +08:00
]
2025-06-25 18:47:40 +08:00
2025-05-07 15:07:36 +08:00
let status2 = VTDecompressionSessionCreate(
allocator: kCFAllocatorDefault,
formatDescription: formatDesc!,
decoderSpecification: nil,
imageBufferAttributes: attrs as CFDictionary,
outputCallback: &callback,
decompressionSessionOut: &decompressionSession
)
if status2 != noErr {
2025-06-25 18:47:40 +08:00
print("[VideoDecoder][错误] 创建解码会话失败: \(status2)")
2025-05-07 15:07:36 +08:00
return false
}
isSessionReady = true
print("[VideoDecoder] 解码会话初始化成功")
return true
}
///
func decodeFrame(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data? = nil, pps: Data? = nil) {
2025-06-25 18:47:40 +08:00
// 1.
if timestampBaseMs == nil {
timestampBaseMs = Int64(Date().timeIntervalSince1970 * 1000)
firstFrameRelativeTimestamp = timestamp
print("[VideoDecoder] 设置时间戳基准: base=\(timestampBaseMs!), firstRel=\(firstFrameRelativeTimestamp!)")
}
// 2.
if !handleFrameReordering(frameData: frameData, frameType: frameType, timestamp: timestamp, frameSeq: frameSeq, refIFrameSeq: refIFrameSeq, sps: sps, pps: pps) {
return
}
// 3. inputQueue
2025-05-07 15:07:36 +08:00
decodeQueue.async { [weak self] in
guard let self = self else { return }
guard let (frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps) = self.dequeueInput() else { return }
2025-06-25 18:47:40 +08:00
2025-05-07 15:07:36 +08:00
if !self.isSessionReady, let sps = sps, let pps = pps {
guard self.setupSession(sps: sps, pps: pps) else { return }
}
guard let session = self.decompressionSession else { return }
2025-06-25 18:47:40 +08:00
guard frameData.count > 3 else { return }
// AnnexB
var startCodeSize = 0
var naluData: Data
if frameData.count >= 4 && frameData[0] == 0x00 && frameData[1] == 0x00 && frameData[2] == 0x00 && frameData[3] == 0x01 {
startCodeSize = 4
naluData = frameData.subdata(in: 4..<frameData.count)
} else if frameData.count >= 3 && frameData[0] == 0x00 && frameData[1] == 0x00 && frameData[2] == 0x01 {
startCodeSize = 3
naluData = frameData.subdata(in: 3..<frameData.count)
2025-05-07 15:07:36 +08:00
} else {
2025-06-25 18:47:40 +08:00
print("[VideoDecoder][警告] 未找到起始码")
naluData = frameData
}
// AVCC
let naluLength = UInt32(naluData.count).bigEndian
var avccData = Data(capacity: naluData.count + 4)
withUnsafeBytes(of: naluLength) { ptr in
avccData.append(ptr.baseAddress!.assumingMemoryBound(to: UInt8.self), count: 4)
2025-05-07 15:07:36 +08:00
}
2025-06-25 18:47:40 +08:00
avccData.append(naluData)
2025-05-07 15:07:36 +08:00
var blockBuffer: CMBlockBuffer?
let status = CMBlockBufferCreateWithMemoryBlock(
allocator: kCFAllocatorDefault,
2025-06-25 18:47:40 +08:00
memoryBlock: nil,
2025-05-07 15:07:36 +08:00
blockLength: avccData.count,
2025-06-25 18:47:40 +08:00
blockAllocator: nil,
2025-05-07 15:07:36 +08:00
customBlockSource: nil,
offsetToData: 0,
dataLength: avccData.count,
2025-06-25 18:47:40 +08:00
flags: kCMBlockBufferAssureMemoryNowFlag,
2025-05-07 15:07:36 +08:00
blockBufferOut: &blockBuffer
)
2025-06-25 18:47:40 +08:00
if status != kCMBlockBufferNoErr {
print("[VideoDecoder][错误] 创建BlockBuffer失败: \(status)")
return
}
// BlockBuffer
if let blockBuffer = blockBuffer {
let status2 = avccData.withUnsafeBytes { ptr in
CMBlockBufferReplaceDataBytes(
with: ptr.baseAddress!,
blockBuffer: blockBuffer,
offsetIntoDestination: 0,
dataLength: avccData.count
)
}
if status2 != kCMBlockBufferNoErr {
print("[VideoDecoder][错误] 复制数据到BlockBuffer失败: \(status2)")
return
}
}
2025-05-07 15:07:36 +08:00
var sampleBuffer: CMSampleBuffer?
2025-06-25 18:47:40 +08:00
var timing = CMSampleTimingInfo(
duration: .invalid,
presentationTimeStamp: CMTime(value: timestamp, timescale: 1000),
decodeTimeStamp: .invalid
)
2025-05-07 15:07:36 +08:00
let status2 = CMSampleBufferCreate(
allocator: kCFAllocatorDefault,
dataBuffer: blockBuffer,
dataReady: true,
makeDataReadyCallback: nil,
refcon: nil,
formatDescription: self.formatDesc,
sampleCount: 1,
sampleTimingEntryCount: 1,
sampleTimingArray: &timing,
sampleSizeEntryCount: 1,
sampleSizeArray: [avccData.count],
sampleBufferOut: &sampleBuffer
)
2025-06-25 18:47:40 +08:00
if status2 != noErr {
print("[VideoDecoder][错误] 创建SampleBuffer失败: \(status2)")
return
}
let decodeFlags: VTDecodeFrameFlags = [._EnableAsynchronousDecompression]
2025-05-07 15:07:36 +08:00
var infoFlags = VTDecodeInfoFlags()
let status3 = VTDecompressionSessionDecodeFrame(
session,
sampleBuffer: sampleBuffer!,
flags: decodeFlags,
frameRefcon: nil,
infoFlagsOut: &infoFlags
)
if status3 != noErr {
2025-06-25 18:47:40 +08:00
print("[VideoDecoder][错误] 解码失败: \(status3)")
if status3 == -6661 {
print(" - 错误类型: kVTInvalidSessionErr (解码会话无效)")
print(" - 会话状态: \(self.isSessionReady ? "就绪" : "未就绪")")
print(" - formatDesc: \(self.formatDesc != nil ? "有效" : "无效")")
}
2025-05-07 15:07:36 +08:00
}
}
}
///
func release() {
2025-06-25 18:47:40 +08:00
stopRenderTimer()
2025-05-07 15:07:36 +08:00
decodeQueue.sync {
if let session = decompressionSession {
VTDecompressionSessionInvalidate(session)
}
decompressionSession = nil
formatDesc = nil
isSessionReady = false
frameSeqSet.removeAll()
lastIFrameSeq = nil
2025-06-25 18:47:40 +08:00
//
reorderLock.lock()
reorderBuffer.removeAll()
receivedIFrames.removeAll()
reorderLock.unlock()
2025-05-07 15:07:36 +08:00
}
inputQueue.async(flags: .barrier) { self.inputBuffer.removeAll() }
outputQueue.async(flags: .barrier) { self.outputBuffer.removeAll() }
print("[VideoDecoder] 解码器已释放")
}
}