me icon indicating copy to clipboard operation
me copied to clipboard

学习 MacOS 开发 (Part 25: VideoToolBox H264 Encode)

Open nonocast opened this issue 2 years ago • 0 comments

参考阅读的文章和WWDC的视频已经非常清晰了,我重新整理了一下代码:

  • Camera
  • Encoder

Camera

import AVFoundation
import VideoToolbox

class VideoInput : NSObject, ObservableObject {
  @Published var sample: CMSampleBuffer?
  @Published var image: CVPixelBuffer?
}

class Camera : VideoInput {
  static let shared = Camera()
  
  let session = AVCaptureSession()
  private let queue = DispatchQueue(label: "cn.nonocast.camera")
  
  override init() {
    super.init()
    
  }
  
  func open() throws {
    guard let device = chooseCaptureDevice() else {
      throw AppError.CameraNotFound
    }
    
    guard let videoInput = try? AVCaptureDeviceInput(device: device), session.canAddInput(videoInput) else {
      throw AppError.CameraOpenError
    }
    
    session.addInput(videoInput)
    
    let videoOutput = AVCaptureVideoDataOutput()
    videoOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange]
    videoOutput.setSampleBufferDelegate(self, queue: queue)
    guard session.canAddOutput(videoOutput) else {
      throw AppError.CameraOpenError
    }
    
    session.addOutput(videoOutput)
    session.startRunning()
  }
  
  func close() {
    session.stopRunning()
  }
}

extension Camera : AVCaptureVideoDataOutputSampleBufferDelegate {
  func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    self.sample = sampleBuffer
    self.image = sampleBuffer.imageBuffer
  }
}

extension Camera {
  private func chooseCaptureDevice() -> AVCaptureDevice? {
    /*
     under 10.15
     let devices = AVCaptureDevice.devices(for: AVMediaType.video)
     return devices[1]
     */
    let discoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.externalUnknown], mediaType: .video, position: .unspecified)
    print("found \(discoverySession.devices.count) device(s)")
    
    let devices = discoverySession.devices
    guard !devices.isEmpty else { fatalError("found device FAILED") }
    
    // log all devices
    for each in discoverySession.devices {
      print("- \(each.localizedName)")
    }
    
    // choose the best
    /*
     obs-virtual-camera 报错时,需要去掉codesign
     https://obsproject.com/wiki/MacOS-Virtual-Camera-Compatibility-Guide
     sudo codesign --remove-signature CameraApp.app
     sudo codesign --sign - Camera.app
     */
    let device = devices.first(where: { device in device.position == AVCaptureDevice.Position(rawValue: 0) })
    
    if let p  = device {
      print(p.localizedName)
    }
    return device
  }
}

Encoder

import Combine
import Foundation
import VideoToolbox

class Encoder {
  func start() {

  }
  
  func stop() {

  }
  
  func printSampleInfo(_ sampleBuffer: CMSampleBuffer?) {
    guard let sampleBuffer = sampleBuffer else { return }
    // show sample info
    let desc = CMSampleBufferGetFormatDescription(sampleBuffer)
    let extensions = CMFormatDescriptionGetExtensions(desc!)
    print("extensions: \(extensions!)")
    
    let sampleCount = CMSampleBufferGetNumSamples(sampleBuffer)
    print("sample count: \(sampleCount)")
    
    let dataBuffer = CMSampleBufferGetDataBuffer(sampleBuffer)!
    var length: Int = 0
    var dataPointer: UnsafeMutablePointer<Int8>?
    CMBlockBufferGetDataPointer(dataBuffer, atOffset: 0, lengthAtOffsetOut: nil, totalLengthOut: &length, dataPointerOut: &dataPointer)
    print("length: \(length), dataPointer: \(dataPointer!)")
  }
}

class VTH264Encoder : Encoder {
  private var frameSink: AnyCancellable?
  
  private var session: VTCompressionSession?
  private let queue = DispatchQueue(label: "cn.nonocast.VTH264Encoder")
  private let NALUHeader: [UInt8] = [0x00, 0x00, 0x00, 0x01]
  private var recordFileHandler: FileHandle?
  
  override init() {
    super.init()
    print("VTH264Encoder init")
  }
  
  override func start() {
    super.start()
    
    openRecordFile()
    
    frameSink = Camera.shared.$sample
      .receive(on: queue)
      .sink { sample in
        self.onSampleBuffer(sample)
      }
  }
  
  override func stop() {
    super.stop()
    frameSink?.cancel()
    frameSink = nil
    
    if let session = session {
      VTCompressionSessionCompleteFrames(session, untilPresentationTimeStamp: CMTime.invalid)
      VTCompressionSessionInvalidate(session)
      self.session = nil
    }
    
    closeRecordFile()
  }
  
  func handle(sps: NSData, pps: NSData) {
    guard let rec  = recordFileHandler else { return }
    
    let headerData: NSData = NSData(bytes: NALUHeader, length: NALUHeader.count)
    rec.write(headerData as Data)
    rec.write(sps as Data)
    rec.write(headerData as Data)
    rec.write(pps as Data)
  }
  
  func encode(data: NSData, isKeyFrame: Bool) {
    guard let rec  = recordFileHandler else { return }
    let headerData: NSData = NSData(bytes: NALUHeader, length: NALUHeader.count)
    rec.write(headerData as Data)
    rec.write(data as Data)
  }
  
  func onSampleBuffer(_ sample: CMSampleBuffer?) {
    guard let sample = sample, let buffer = sample.imageBuffer else {return }
    if(session == nil) {
      self.createCompressionSession(by: buffer)
    }
    
    guard let session = self.session else { return }
    
    buffer.lock(.readwrite) {
      let presentationTimestamp = CMSampleBufferGetOutputPresentationTimeStamp(sample)
      let duration = CMSampleBufferGetOutputDuration(sample)
      VTCompressionSessionEncodeFrame(session,
                                      imageBuffer: buffer,
                                      presentationTimeStamp: presentationTimestamp,
                                      duration: duration,
                                      frameProperties: nil,
                                      sourceFrameRefcon: nil,
                                      infoFlagsOut: nil)
    }
  }
  
  func createCompressionSession(by buffer: CVPixelBuffer) {
    print(buffer.pixelFormatName())
    
    let width = CVPixelBufferGetWidth(buffer)
    let height = CVPixelBufferGetHeight(buffer)
    
    // 1280x960
    print("width: \(width), height: \(height)")
    
    
    VTCompressionSessionCreate(allocator: kCFAllocatorDefault,
                               width: Int32(width),
                               height: Int32(height),
                               codecType: kCMVideoCodecType_H264,
                               encoderSpecification: nil,
                               imageBufferAttributes: nil,
                               compressedDataAllocator: nil,
                               outputCallback: compressionOutputCallback,
                               refcon: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque()),
                               compressionSessionOut: &self.session)
    
    guard let session  = self.session else {return }
    
    // set profile to Main
    VTSessionSetProperty(session, key: kVTCompressionPropertyKey_ProfileLevel, value: kVTProfileLevel_H264_Main_AutoLevel)
    // capture from camera, so it's real time
    VTSessionSetProperty(session, key: kVTCompressionPropertyKey_RealTime, value: true as CFTypeRef)
    // 关键帧间隔
    VTSessionSetProperty(session, key: kVTCompressionPropertyKey_MaxKeyFrameInterval, value: 10 as CFTypeRef)
    // 比特率和速率
    VTSessionSetProperty(session, key: kVTCompressionPropertyKey_AverageBitRate, value: width * height * 2 * 32 as CFTypeRef)
    VTSessionSetProperty(session, key: kVTCompressionPropertyKey_DataRateLimits, value: [width * height * 2 * 4, 1] as CFArray)
    
    VTCompressionSessionPrepareToEncodeFrames(session)
  }
}

extension VTH264Encoder {
  func openRecordFile() {
    let home = FileManager.default.homeDirectoryForCurrentUser
    let clip = home.appendingPathComponent("/Desktop/clip.h264")
    try? FileManager.default.removeItem(at: clip)
    if FileManager.default.createFile(atPath: clip.path, contents: nil, attributes: nil) {
      recordFileHandler = try? FileHandle(forWritingTo: clip)
    }
  }
  
  func closeRecordFile() {
    do { try recordFileHandler?.close() } catch { }
  }
}

func compressionOutputCallback(outputCallbackRefCon: UnsafeMutableRawPointer?,
                               sourceFrameRefCon: UnsafeMutableRawPointer?,
                               status: OSStatus,
                               infoFlags: VTEncodeInfoFlags,
                               sampleBuffer: CMSampleBuffer?) -> Swift.Void {
  print("\(Thread.current): compressionOutputCallback")
  
  guard status == noErr else { print("error: \(status)"); return }
  if infoFlags == .frameDropped { print("frame dropped"); return }
  guard let sampleBuffer = sampleBuffer else { print("sampleBuffer is nil"); return }
  guard CMSampleBufferDataIsReady(sampleBuffer) else { print("sampleBuffer data is not ready"); return }
  
  let encoder: VTH264Encoder = Unmanaged.fromOpaque(outputCallbackRefCon!).takeUnretainedValue()
  //  encoder.printSampleInfo(sampleBuffer)
  
  if let attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, createIfNecessary: true) {
    //    print("attachments: \(attachments)")
    
    let rawDic: UnsafeRawPointer = CFArrayGetValueAtIndex(attachments, 0)
    let dic: CFDictionary = Unmanaged.fromOpaque(rawDic).takeUnretainedValue()
    
    // if not contains means it's an IDR frame
    let keyFrame = !CFDictionaryContainsKey(dic, Unmanaged.passUnretained(kCMSampleAttachmentKey_NotSync).toOpaque())
    if keyFrame {
      //      print("IDR frame")
      
      // sps
      let format = CMSampleBufferGetFormatDescription(sampleBuffer)
      var spsSize: Int = 0
      var spsCount: Int = 0
      var nalHeaderLength: Int32 = 0
      var sps: UnsafePointer<UInt8>?
      if CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format!,
                                                            parameterSetIndex: 0,
                                                            parameterSetPointerOut: &sps,
                                                            parameterSetSizeOut: &spsSize,
                                                            parameterSetCountOut: &spsCount,
                                                            nalUnitHeaderLengthOut: &nalHeaderLength) == noErr {
        //        print("sps: \(String(describing: sps)), spsSize: \(spsSize), spsCount: \(spsCount), NAL header length: \(nalHeaderLength)")
        
        // pps
        var ppsSize: Int = 0
        var ppsCount: Int = 0
        var pps: UnsafePointer<UInt8>?
        
        if CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format!,
                                                              parameterSetIndex: 1,
                                                              parameterSetPointerOut: &pps,
                                                              parameterSetSizeOut: &ppsSize,
                                                              parameterSetCountOut: &ppsCount,
                                                              nalUnitHeaderLengthOut: &nalHeaderLength) == noErr {
          //          print("sps: \(String(describing: pps)), spsSize: \(ppsSize), spsCount: \(ppsCount), NAL header length: \(nalHeaderLength)")
          
          let spsData: NSData = NSData(bytes: sps, length: spsSize)
          let ppsData: NSData = NSData(bytes: pps, length: ppsSize)
          
          encoder.handle(sps: spsData, pps: ppsData)
        }
      }
    } // end of handle sps/pps
    
    // handle frame data
    guard let dataBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else { return }
    
    var lengthAtOffset: Int = 0
    var totalLength: Int = 0
    var dataPointer: UnsafeMutablePointer<Int8>?
    if CMBlockBufferGetDataPointer(dataBuffer, atOffset: 0, lengthAtOffsetOut: &lengthAtOffset, totalLengthOut: &totalLength, dataPointerOut: &dataPointer) == noErr {
      var bufferOffset: Int = 0
      let AVCCHeaderLength = 4
      
      while bufferOffset < (totalLength - AVCCHeaderLength) {
        var NALUnitLength: UInt32 = 0
        // first four character is NALUnit length
        memcpy(&NALUnitLength, dataPointer?.advanced(by: bufferOffset), AVCCHeaderLength)
        
        // big endian to host endian. in iOS it's little endian
        NALUnitLength = CFSwapInt32BigToHost(NALUnitLength)
        
        let data: NSData = NSData(bytes: dataPointer?.advanced(by: bufferOffset + AVCCHeaderLength), length: Int(NALUnitLength))
        
        encoder.encode(data: data, isKeyFrame: keyFrame)
        
        // move forward to the next NAL Unit
        bufferOffset += Int(AVCCHeaderLength)
        bufferOffset += Int(NALUnitLength)
      }
    }
  }
}

源代码放在这里: VideoToolboxApp.zip

参考阅读

nonocast avatar May 13 '22 15:05 nonocast