me
me copied to clipboard
学习 MacOS 开发 (Part 25: VideoToolBox H264 Encode)
参考阅读的文章和WWDC的视频已经非常清晰了,我重新整理了一下代码:
- Camera
- Encoder
Camera
import AVFoundation
import VideoToolbox
class VideoInput : NSObject, ObservableObject {
@Published var sample: CMSampleBuffer?
@Published var image: CVPixelBuffer?
}
class Camera : VideoInput {
static let shared = Camera()
let session = AVCaptureSession()
private let queue = DispatchQueue(label: "cn.nonocast.camera")
override init() {
super.init()
}
func open() throws {
guard let device = chooseCaptureDevice() else {
throw AppError.CameraNotFound
}
guard let videoInput = try? AVCaptureDeviceInput(device: device), session.canAddInput(videoInput) else {
throw AppError.CameraOpenError
}
session.addInput(videoInput)
let videoOutput = AVCaptureVideoDataOutput()
videoOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange]
videoOutput.setSampleBufferDelegate(self, queue: queue)
guard session.canAddOutput(videoOutput) else {
throw AppError.CameraOpenError
}
session.addOutput(videoOutput)
session.startRunning()
}
func close() {
session.stopRunning()
}
}
extension Camera : AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
self.sample = sampleBuffer
self.image = sampleBuffer.imageBuffer
}
}
extension Camera {
private func chooseCaptureDevice() -> AVCaptureDevice? {
/*
under 10.15
let devices = AVCaptureDevice.devices(for: AVMediaType.video)
return devices[1]
*/
let discoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.externalUnknown], mediaType: .video, position: .unspecified)
print("found \(discoverySession.devices.count) device(s)")
let devices = discoverySession.devices
guard !devices.isEmpty else { fatalError("found device FAILED") }
// log all devices
for each in discoverySession.devices {
print("- \(each.localizedName)")
}
// choose the best
/*
obs-virtual-camera 报错时,需要去掉codesign
https://obsproject.com/wiki/MacOS-Virtual-Camera-Compatibility-Guide
sudo codesign --remove-signature CameraApp.app
sudo codesign --sign - Camera.app
*/
let device = devices.first(where: { device in device.position == AVCaptureDevice.Position(rawValue: 0) })
if let p = device {
print(p.localizedName)
}
return device
}
}
Encoder
import Combine
import Foundation
import VideoToolbox
class Encoder {
func start() {
}
func stop() {
}
func printSampleInfo(_ sampleBuffer: CMSampleBuffer?) {
guard let sampleBuffer = sampleBuffer else { return }
// show sample info
let desc = CMSampleBufferGetFormatDescription(sampleBuffer)
let extensions = CMFormatDescriptionGetExtensions(desc!)
print("extensions: \(extensions!)")
let sampleCount = CMSampleBufferGetNumSamples(sampleBuffer)
print("sample count: \(sampleCount)")
let dataBuffer = CMSampleBufferGetDataBuffer(sampleBuffer)!
var length: Int = 0
var dataPointer: UnsafeMutablePointer<Int8>?
CMBlockBufferGetDataPointer(dataBuffer, atOffset: 0, lengthAtOffsetOut: nil, totalLengthOut: &length, dataPointerOut: &dataPointer)
print("length: \(length), dataPointer: \(dataPointer!)")
}
}
class VTH264Encoder : Encoder {
private var frameSink: AnyCancellable?
private var session: VTCompressionSession?
private let queue = DispatchQueue(label: "cn.nonocast.VTH264Encoder")
private let NALUHeader: [UInt8] = [0x00, 0x00, 0x00, 0x01]
private var recordFileHandler: FileHandle?
override init() {
super.init()
print("VTH264Encoder init")
}
override func start() {
super.start()
openRecordFile()
frameSink = Camera.shared.$sample
.receive(on: queue)
.sink { sample in
self.onSampleBuffer(sample)
}
}
override func stop() {
super.stop()
frameSink?.cancel()
frameSink = nil
if let session = session {
VTCompressionSessionCompleteFrames(session, untilPresentationTimeStamp: CMTime.invalid)
VTCompressionSessionInvalidate(session)
self.session = nil
}
closeRecordFile()
}
func handle(sps: NSData, pps: NSData) {
guard let rec = recordFileHandler else { return }
let headerData: NSData = NSData(bytes: NALUHeader, length: NALUHeader.count)
rec.write(headerData as Data)
rec.write(sps as Data)
rec.write(headerData as Data)
rec.write(pps as Data)
}
func encode(data: NSData, isKeyFrame: Bool) {
guard let rec = recordFileHandler else { return }
let headerData: NSData = NSData(bytes: NALUHeader, length: NALUHeader.count)
rec.write(headerData as Data)
rec.write(data as Data)
}
func onSampleBuffer(_ sample: CMSampleBuffer?) {
guard let sample = sample, let buffer = sample.imageBuffer else {return }
if(session == nil) {
self.createCompressionSession(by: buffer)
}
guard let session = self.session else { return }
buffer.lock(.readwrite) {
let presentationTimestamp = CMSampleBufferGetOutputPresentationTimeStamp(sample)
let duration = CMSampleBufferGetOutputDuration(sample)
VTCompressionSessionEncodeFrame(session,
imageBuffer: buffer,
presentationTimeStamp: presentationTimestamp,
duration: duration,
frameProperties: nil,
sourceFrameRefcon: nil,
infoFlagsOut: nil)
}
}
func createCompressionSession(by buffer: CVPixelBuffer) {
print(buffer.pixelFormatName())
let width = CVPixelBufferGetWidth(buffer)
let height = CVPixelBufferGetHeight(buffer)
// 1280x960
print("width: \(width), height: \(height)")
VTCompressionSessionCreate(allocator: kCFAllocatorDefault,
width: Int32(width),
height: Int32(height),
codecType: kCMVideoCodecType_H264,
encoderSpecification: nil,
imageBufferAttributes: nil,
compressedDataAllocator: nil,
outputCallback: compressionOutputCallback,
refcon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()),
compressionSessionOut: &self.session)
guard let session = self.session else {return }
// set profile to Main
VTSessionSetProperty(session, key: kVTCompressionPropertyKey_ProfileLevel, value: kVTProfileLevel_H264_Main_AutoLevel)
// capture from camera, so it's real time
VTSessionSetProperty(session, key: kVTCompressionPropertyKey_RealTime, value: true as CFTypeRef)
// 关键帧间隔
VTSessionSetProperty(session, key: kVTCompressionPropertyKey_MaxKeyFrameInterval, value: 10 as CFTypeRef)
// 比特率和速率
VTSessionSetProperty(session, key: kVTCompressionPropertyKey_AverageBitRate, value: width * height * 2 * 32 as CFTypeRef)
VTSessionSetProperty(session, key: kVTCompressionPropertyKey_DataRateLimits, value: [width * height * 2 * 4, 1] as CFArray)
VTCompressionSessionPrepareToEncodeFrames(session)
}
}
extension VTH264Encoder {
func openRecordFile() {
let home = FileManager.default.homeDirectoryForCurrentUser
let clip = home.appendingPathComponent("/Desktop/clip.h264")
try? FileManager.default.removeItem(at: clip)
if FileManager.default.createFile(atPath: clip.path, contents: nil, attributes: nil) {
recordFileHandler = try? FileHandle(forWritingTo: clip)
}
}
func closeRecordFile() {
do { try recordFileHandler?.close() } catch { }
}
}
func compressionOutputCallback(outputCallbackRefCon: UnsafeMutableRawPointer?,
sourceFrameRefCon: UnsafeMutableRawPointer?,
status: OSStatus,
infoFlags: VTEncodeInfoFlags,
sampleBuffer: CMSampleBuffer?) -> Swift.Void {
print("\(Thread.current): compressionOutputCallback")
guard status == noErr else { print("error: \(status)"); return }
if infoFlags == .frameDropped { print("frame dropped"); return }
guard let sampleBuffer = sampleBuffer else { print("sampleBuffer is nil"); return }
guard CMSampleBufferDataIsReady(sampleBuffer) else { print("sampleBuffer data is not ready"); return }
let encoder: VTH264Encoder = Unmanaged.fromOpaque(outputCallbackRefCon!).takeUnretainedValue()
// encoder.printSampleInfo(sampleBuffer)
if let attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, createIfNecessary: true) {
// print("attachments: \(attachments)")
let rawDic: UnsafeRawPointer = CFArrayGetValueAtIndex(attachments, 0)
let dic: CFDictionary = Unmanaged.fromOpaque(rawDic).takeUnretainedValue()
// if not contains means it's an IDR frame
let keyFrame = !CFDictionaryContainsKey(dic, Unmanaged.passUnretained(kCMSampleAttachmentKey_NotSync).toOpaque())
if keyFrame {
// print("IDR frame")
// sps
let format = CMSampleBufferGetFormatDescription(sampleBuffer)
var spsSize: Int = 0
var spsCount: Int = 0
var nalHeaderLength: Int32 = 0
var sps: UnsafePointer<UInt8>?
if CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format!,
parameterSetIndex: 0,
parameterSetPointerOut: &sps,
parameterSetSizeOut: &spsSize,
parameterSetCountOut: &spsCount,
nalUnitHeaderLengthOut: &nalHeaderLength) == noErr {
// print("sps: \(String(describing: sps)), spsSize: \(spsSize), spsCount: \(spsCount), NAL header length: \(nalHeaderLength)")
// pps
var ppsSize: Int = 0
var ppsCount: Int = 0
var pps: UnsafePointer<UInt8>?
if CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format!,
parameterSetIndex: 1,
parameterSetPointerOut: &pps,
parameterSetSizeOut: &ppsSize,
parameterSetCountOut: &ppsCount,
nalUnitHeaderLengthOut: &nalHeaderLength) == noErr {
// print("sps: \(String(describing: pps)), spsSize: \(ppsSize), spsCount: \(ppsCount), NAL header length: \(nalHeaderLength)")
let spsData: NSData = NSData(bytes: sps, length: spsSize)
let ppsData: NSData = NSData(bytes: pps, length: ppsSize)
encoder.handle(sps: spsData, pps: ppsData)
}
}
} // end of handle sps/pps
// handle frame data
guard let dataBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else { return }
var lengthAtOffset: Int = 0
var totalLength: Int = 0
var dataPointer: UnsafeMutablePointer<Int8>?
if CMBlockBufferGetDataPointer(dataBuffer, atOffset: 0, lengthAtOffsetOut: &lengthAtOffset, totalLengthOut: &totalLength, dataPointerOut: &dataPointer) == noErr {
var bufferOffset: Int = 0
let AVCCHeaderLength = 4
while bufferOffset < (totalLength - AVCCHeaderLength) {
var NALUnitLength: UInt32 = 0
// first four character is NALUnit length
memcpy(&NALUnitLength, dataPointer?.advanced(by: bufferOffset), AVCCHeaderLength)
// big endian to host endian. in iOS it's little endian
NALUnitLength = CFSwapInt32BigToHost(NALUnitLength)
let data: NSData = NSData(bytes: dataPointer?.advanced(by: bufferOffset + AVCCHeaderLength), length: Int(NALUnitLength))
encoder.encode(data: data, isKeyFrame: keyFrame)
// move forward to the next NAL Unit
bufferOffset += Int(AVCCHeaderLength)
bufferOffset += Int(NALUnitLength)
}
}
}
}
源代码放在这里: VideoToolboxApp.zip
参考阅读
- Direct Access to Video Encoding and Decoding - WWDC14 - Videos - Apple Developer
- tomisacat/VideoToolboxCompression: Capture and compress video into H.264 with AVFoundation/VideoToolbox written in Swift
- 使用VideoToolbox硬编码H.264 - 简书
- 使用VideoToolbox硬编码H.264 – 92IT 🍭
- Apple 平台下的 VideoToolBox 框架 | Enki's Notes
- VideoToolBox H264 硬编码 - 掘金
- 使用VideoToolbox硬编码H.264_分贝丶的博客-CSDN博客
- 了解VideoToolBox 硬编码 - 简书
- VideoToolbox使用说明 - 简书 - GeorgeMR
- 重要! Android与IOS端h264硬解关键流程梳理 - 简书
- 码流格式: Annex-B, AVCC(H.264)与HVCC(H.265), extradata详解_yue_huang的博客-CSDN博客_extradata