video_decode_plugin/ios/Classes/VideoDecoder.swift

505 lines
21 KiB
Swift
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import Foundation
import VideoToolbox
import AVFoundation
/// VideoToolboxH264/H265CVPixelBuffer
class VideoDecoder {
enum CodecType: String {
case h264 = "h264"
case h265 = "h265"
var codecType: CMVideoCodecType {
switch self {
case .h264: return kCMVideoCodecType_H264
case .h265: return kCMVideoCodecType_HEVC
}
}
}
// ====== ======
///
private var decompressionSession: VTDecompressionSession?
///
private var formatDesc: CMVideoFormatDescription?
///
private let width: Int
///
private let height: Int
/// H264/H265
private let codecType: CodecType
/// 线
private let decodeQueue = DispatchQueue(label: "video_decode_plugin.decode.queue")
///
private var isSessionReady = false
/// I
private var lastIFrameSeq: Int?
///
private var frameSeqSet = Set<Int>()
///
private let maxAllowedDelayMs: Int64 = 750 // Android
///
private var timestampBaseMs: Int64?
///
private var firstFrameRelativeTimestamp: Int64?
// ====== ======
/// 线
private let inputQueue = DispatchQueue(label: "video_decode_plugin.input.queue", attributes: .concurrent)
private var inputBuffer: [(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data?, pps: Data?)] = []
private let inputBufferSemaphore = DispatchSemaphore(value: 1)
private let inputBufferMaxCount = 100 // Android
/// 线
private let outputQueue = DispatchQueue(label: "video_decode_plugin.output.queue", attributes: .concurrent)
private var outputBuffer: [(pixelBuffer: CVPixelBuffer, timestamp: Int64)] = []
private let outputBufferSemaphore = DispatchSemaphore(value: 1)
private let outputBufferMaxCount = 100 // Android
///
private var renderTimer: DispatchSourceTimer?
/// 线
private var renderThreadRunning = false
///
private var hasNotifiedFlutter = false
///
private var renderFps: Int = 20 // Android
///
private var renderIntervalMs: Int64 = 0
///
private let renderJitterMs: Int64 = 2
///
private var lastRenderTimeMs: Int64 = 0
///
///
private let renderLowWaterMark = 15
// ====== ======
///
private var reorderBuffer: [Int: (frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data?, pps: Data?)] = [:]
/// I
private var receivedIFrames = Set<Int>()
///
private let reorderLock = NSLock()
///
private let maxReorderBufferSize = 100 //
/// CVPixelBuffer
var onFrameDecoded: ((CVPixelBuffer, Int64) -> Void)? = { _, _ in }
/// 线
init(width: Int, height: Int, codecType: String) {
self.width = width
self.height = height
self.codecType = CodecType(rawValue: codecType.lowercased()) ?? .h264
self.renderIntervalMs = Int64(1000.0 / Double(renderFps))
startRenderTimer()
print("[VideoDecoder] 初始化解码器: width=\(width), height=\(height)")
}
// ====== ======
///
private func enqueueInput(_ item: (Data, Int, Int64, Int, Int?, Data?, Data?)) {
inputQueue.async(flags: .barrier) {
if self.inputBuffer.count >= self.inputBufferMaxCount {
self.inputBuffer.removeFirst() //
print("[VideoDecoder][警告] 输入缓冲区满,丢弃最旧帧")
}
self.inputBuffer.append(item)
}
}
///
private func dequeueInput() -> (Data, Int, Int64, Int, Int?, Data?, Data?)? {
var item: (Data, Int, Int64, Int, Int?, Data?, Data?)?
inputQueue.sync {
if !self.inputBuffer.isEmpty {
item = self.inputBuffer.removeFirst()
}
}
return item
}
// ====== ======
///
private func enqueueOutput(_ item: (CVPixelBuffer, Int64)) {
outputQueue.async(flags: .barrier) {
if self.outputBuffer.count >= self.outputBufferMaxCount {
self.outputBuffer.removeFirst() //
print("[VideoDecoder][警告] 输出缓冲区满,丢弃最旧帧")
}
self.outputBuffer.append(item)
}
}
///
private func dequeueOutput() -> (CVPixelBuffer, Int64)? {
var item: (CVPixelBuffer, Int64)?
outputQueue.sync {
if !self.outputBuffer.isEmpty {
item = self.outputBuffer.removeFirst()
}
}
return item
}
// ====== ======
///
private func handleFrameReordering(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data?, pps: Data?) -> Bool {
reorderLock.lock()
defer { reorderLock.unlock() }
// 1.
let now = Int64(Date().timeIntervalSince1970 * 1000)
let base = timestampBaseMs ?? 0
let firstRel = firstFrameRelativeTimestamp ?? 0
let absTimestamp = base + (timestamp - firstRel)
// if absTimestamp < now - maxAllowedDelayMs {
// print("[VideoDecoder][] : type=\(frameType), seq=\(frameSeq), delay=\(now - absTimestamp)ms")
// return false
// }
// 2.
if frameType == 0 { // I
receivedIFrames.insert(frameSeq)
lastIFrameSeq = frameSeq
// I
enqueueInput((frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps))
// IP
let readyPFrames = reorderBuffer.values
.filter { $0.refIFrameSeq == frameSeq }
.sorted { $0.frameSeq < $1.frameSeq }
for pFrame in readyPFrames {
enqueueInput(pFrame)
reorderBuffer.removeValue(forKey: pFrame.frameSeq)
}
if !readyPFrames.isEmpty {
print("[VideoDecoder] I帧\(frameSeq)释放\(readyPFrames.count)个P帧")
}
//
if reorderBuffer.count > maxReorderBufferSize {
let toRemove = reorderBuffer.keys.sorted().prefix(reorderBuffer.count - maxReorderBufferSize)
for seq in toRemove {
reorderBuffer.removeValue(forKey: seq)
}
print("[VideoDecoder][警告] 重排序缓冲区溢出,清理\(toRemove.count)个P帧")
}
return true
} else { // P
// P
if let refIFrameSeq = refIFrameSeq, receivedIFrames.contains(refIFrameSeq) {
// I
enqueueInput((frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps))
return true
} else {
// IP
reorderBuffer[frameSeq] = (frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps)
print("[VideoDecoder] P帧\(frameSeq)缓存等待I帧\(refIFrameSeq ?? -1)")
//
if reorderBuffer.count > maxReorderBufferSize {
let toRemove = reorderBuffer.keys.sorted().prefix(reorderBuffer.count - maxReorderBufferSize)
for seq in toRemove {
reorderBuffer.removeValue(forKey: seq)
}
print("[VideoDecoder][警告] 重排序缓冲区溢出,清理\(toRemove.count)个P帧")
}
return false
}
}
}
// ====== ======
/// Flutter
private func startRenderTimer() {
renderThreadRunning = true
let timer = DispatchSource.makeTimerSource(queue: DispatchQueue.global())
timer.schedule(deadline: .now(), repeating: .milliseconds(Int(renderIntervalMs / 2)))
timer.setEventHandler { [weak self] in
guard let self = self else { return }
let now = Int64(Date().timeIntervalSince1970 * 1000)
//
let timeSinceLastRender = now - self.lastRenderTimeMs
if timeSinceLastRender < self.renderIntervalMs - self.renderJitterMs {
return
}
//
var outputCount = 0
self.outputQueue.sync { outputCount = self.outputBuffer.count }
if outputCount < self.renderLowWaterMark {
//
return
}
if let (pixelBuffer, timestamp) = self.dequeueOutput() {
//
let now = Int64(Date().timeIntervalSince1970 * 1000)
let base = timestampBaseMs ?? 0
let firstRel = firstFrameRelativeTimestamp ?? 0
let absTimestamp = base + (timestamp - firstRel)
// if absTimestamp < now - self.maxAllowedDelayMs {
// print("[VideoDecoder][] : delay=\(now - absTimestamp)ms")
// return
// }
DispatchQueue.main.async {
self.onFrameDecoded?(pixelBuffer, timestamp)
}
self.lastRenderTimeMs = now
if !self.hasNotifiedFlutter {
self.hasNotifiedFlutter = true
}
}
}
timer.resume()
renderTimer = timer
}
///
private func stopRenderTimer() {
renderThreadRunning = false
renderTimer?.cancel()
renderTimer = nil
}
/// I
private func setupSession(sps: Data?, pps: Data?) -> Bool {
//
if let session = decompressionSession {
VTDecompressionSessionInvalidate(session)
decompressionSession = nil
}
formatDesc = nil
isSessionReady = false
guard let sps = sps, let pps = pps else {
print("[VideoDecoder][错误] 缺少SPS/PPS无法初始化解码会话")
return false
}
// SPS/PPS
let spsType: UInt8 = sps.count > 0 ? (sps[0] & 0x1F) : 0
let ppsType: UInt8 = pps.count > 0 ? (pps[0] & 0x1F) : 0
if sps.count < 3 || spsType != 7 {
print("[VideoDecoder][错误] SPS内容异常len=\(sps.count), type=\(spsType)")
return false
}
if pps.count < 3 || ppsType != 8 {
print("[VideoDecoder][错误] PPS内容异常len=\(pps.count), type=\(ppsType)")
return false
}
var success = false
sps.withUnsafeBytes { spsPtr in
pps.withUnsafeBytes { ppsPtr in
let parameterSetPointers: [UnsafePointer<UInt8>] = [
spsPtr.baseAddress!.assumingMemoryBound(to: UInt8.self),
ppsPtr.baseAddress!.assumingMemoryBound(to: UInt8.self)
]
let parameterSetSizes: [Int] = [sps.count, pps.count]
let status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
allocator: kCFAllocatorDefault,
parameterSetCount: 2,
parameterSetPointers: parameterSetPointers,
parameterSetSizes: parameterSetSizes,
nalUnitHeaderLength: 4,
formatDescriptionOut: &formatDesc
)
if status != noErr {
print("[VideoDecoder][错误] 创建FormatDescription失败: \(status)")
success = false
} else {
success = true
}
}
}
if !success { return false }
var callback = VTDecompressionOutputCallbackRecord(
decompressionOutputCallback: { (decompressionOutputRefCon, _, status, _, imageBuffer, pts, _) in
let decoder = Unmanaged<VideoDecoder>.fromOpaque(decompressionOutputRefCon!).takeUnretainedValue()
if status == noErr, let pixelBuffer = imageBuffer {
// 线
decoder.enqueueOutput((pixelBuffer, Int64(pts.seconds * 1000)))
} else {
print("[VideoDecoder][错误] 解码回调失败: \(status)")
}
},
decompressionOutputRefCon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())
)
let attrs: [NSString: Any] = [
kCVPixelBufferPixelFormatTypeKey: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
kCVPixelBufferWidthKey: width,
kCVPixelBufferHeightKey: height,
kCVPixelBufferOpenGLESCompatibilityKey: true,
kCVPixelBufferIOSurfacePropertiesKey: [:]
]
let status2 = VTDecompressionSessionCreate(
allocator: kCFAllocatorDefault,
formatDescription: formatDesc!,
decoderSpecification: nil,
imageBufferAttributes: attrs as CFDictionary,
outputCallback: &callback,
decompressionSessionOut: &decompressionSession
)
if status2 != noErr {
print("[VideoDecoder][错误] 创建解码会话失败: \(status2)")
return false
}
isSessionReady = true
print("[VideoDecoder] 解码会话初始化成功")
return true
}
///
func decodeFrame(frameData: Data, frameType: Int, timestamp: Int64, frameSeq: Int, refIFrameSeq: Int?, sps: Data? = nil, pps: Data? = nil) {
// 1.
if timestampBaseMs == nil {
timestampBaseMs = Int64(Date().timeIntervalSince1970 * 1000)
firstFrameRelativeTimestamp = timestamp
print("[VideoDecoder] 设置时间戳基准: base=\(timestampBaseMs!), firstRel=\(firstFrameRelativeTimestamp!)")
}
// 2.
if !handleFrameReordering(frameData: frameData, frameType: frameType, timestamp: timestamp, frameSeq: frameSeq, refIFrameSeq: refIFrameSeq, sps: sps, pps: pps) {
return
}
// 3. inputQueue
decodeQueue.async { [weak self] in
guard let self = self else { return }
guard let (frameData, frameType, timestamp, frameSeq, refIFrameSeq, sps, pps) = self.dequeueInput() else { return }
if !self.isSessionReady, let sps = sps, let pps = pps {
guard self.setupSession(sps: sps, pps: pps) else { return }
}
guard let session = self.decompressionSession else { return }
guard frameData.count > 3 else { return }
// AnnexB
var startCodeSize = 0
var naluData: Data
if frameData.count >= 4 && frameData[0] == 0x00 && frameData[1] == 0x00 && frameData[2] == 0x00 && frameData[3] == 0x01 {
startCodeSize = 4
naluData = frameData.subdata(in: 4..<frameData.count)
} else if frameData.count >= 3 && frameData[0] == 0x00 && frameData[1] == 0x00 && frameData[2] == 0x01 {
startCodeSize = 3
naluData = frameData.subdata(in: 3..<frameData.count)
} else {
print("[VideoDecoder][警告] 未找到起始码")
naluData = frameData
}
// AVCC
let naluLength = UInt32(naluData.count).bigEndian
var avccData = Data(capacity: naluData.count + 4)
withUnsafeBytes(of: naluLength) { ptr in
avccData.append(ptr.baseAddress!.assumingMemoryBound(to: UInt8.self), count: 4)
}
avccData.append(naluData)
var blockBuffer: CMBlockBuffer?
let status = CMBlockBufferCreateWithMemoryBlock(
allocator: kCFAllocatorDefault,
memoryBlock: nil,
blockLength: avccData.count,
blockAllocator: nil,
customBlockSource: nil,
offsetToData: 0,
dataLength: avccData.count,
flags: kCMBlockBufferAssureMemoryNowFlag,
blockBufferOut: &blockBuffer
)
if status != kCMBlockBufferNoErr {
print("[VideoDecoder][错误] 创建BlockBuffer失败: \(status)")
return
}
// BlockBuffer
if let blockBuffer = blockBuffer {
let status2 = avccData.withUnsafeBytes { ptr in
CMBlockBufferReplaceDataBytes(
with: ptr.baseAddress!,
blockBuffer: blockBuffer,
offsetIntoDestination: 0,
dataLength: avccData.count
)
}
if status2 != kCMBlockBufferNoErr {
print("[VideoDecoder][错误] 复制数据到BlockBuffer失败: \(status2)")
return
}
}
var sampleBuffer: CMSampleBuffer?
var timing = CMSampleTimingInfo(
duration: .invalid,
presentationTimeStamp: CMTime(value: timestamp, timescale: 1000),
decodeTimeStamp: .invalid
)
let status2 = CMSampleBufferCreate(
allocator: kCFAllocatorDefault,
dataBuffer: blockBuffer,
dataReady: true,
makeDataReadyCallback: nil,
refcon: nil,
formatDescription: self.formatDesc,
sampleCount: 1,
sampleTimingEntryCount: 1,
sampleTimingArray: &timing,
sampleSizeEntryCount: 1,
sampleSizeArray: [avccData.count],
sampleBufferOut: &sampleBuffer
)
if status2 != noErr {
print("[VideoDecoder][错误] 创建SampleBuffer失败: \(status2)")
return
}
let decodeFlags: VTDecodeFrameFlags = [._EnableAsynchronousDecompression]
var infoFlags = VTDecodeInfoFlags()
let status3 = VTDecompressionSessionDecodeFrame(
session,
sampleBuffer: sampleBuffer!,
flags: decodeFlags,
frameRefcon: nil,
infoFlagsOut: &infoFlags
)
if status3 != noErr {
print("[VideoDecoder][错误] 解码失败: \(status3)")
if status3 == -6661 {
print(" - 错误类型: kVTInvalidSessionErr (解码会话无效)")
print(" - 会话状态: \(self.isSessionReady ? "就绪" : "未就绪")")
print(" - formatDesc: \(self.formatDesc != nil ? "有效" : "无效")")
}
}
}
}
///
func release() {
stopRenderTimer()
decodeQueue.sync {
if let session = decompressionSession {
VTDecompressionSessionInvalidate(session)
}
decompressionSession = nil
formatDesc = nil
isSessionReady = false
frameSeqSet.removeAll()
lastIFrameSeq = nil
//
reorderLock.lock()
reorderBuffer.removeAll()
receivedIFrames.removeAll()
reorderLock.unlock()
}
inputQueue.async(flags: .barrier) { self.inputBuffer.removeAll() }
outputQueue.async(flags: .barrier) { self.outputBuffer.removeAll() }
print("[VideoDecoder] 解码器已释放")
}
}