me
me copied to clipboard
学习 MacOS 开发 (Part 21: Vision - Face and Face's landmarks)
Vision 中关于 Face and Body Detection:
- class VNDetectFaceRectanglesRequest: A request that finds faces within an image.
- class VNDetectHumanRectanglesRequest: A request that finds rectangular regions that contain people in an image.
- class VNDetectFaceLandmarksRequest: An image analysis request that finds facial features like eyes and mouth in an image.
- VNGeneratePersonSegmentationRequest: An object that produces a matte image for a person that it finds in the input image.
Note: iOS 15 or Mac OS 12.4 later.
从图片中标记人脸
import Cocoa
import CoreGraphics
import Vision
var input: CGImage?
let requestHandler = VNSequenceRequestHandler()
func dectect() {
let faceRequest = VNDetectFaceRectanglesRequest { request, _ in
guard let results = request.results else { return }
print("face count: \(results.count)")
drawBoundingBox(input, observations: results)
}
faceRequest.revision = VNDetectFaceRectanglesRequestRevision3
if let image = NSImage(byReferencingFile: "./assets/face-samples/megaface.png") {
input = image.cgImage(forProposedRect: nil, context: nil, hints: nil)
try? requestHandler.perform([faceRequest], on: input!)
} else {
print("load face image FAILED")
}
}
func drawBoundingBox(_ input: CGImage?, observations: [VNObservation]) {
print("### drawBoundingBox \(observations.count)")
guard let source = input else { return }
let size = CGSize(width: source.width, height: source.height)
let image = NSImage(size: size, flipped: false, drawingHandler: { _ -> Bool in
print("### drawingHandler")
guard let ctx = NSGraphicsContext.current?.cgContext else { return false }
ctx.draw(source, in: CGRect(x: 0, y: 0, width: size.width, height: size.height))
for observation in observations {
if let face = observation as? VNFaceObservation {
let bb = face.boundingBox
let r = CGRect(x: bb.minX * size.width, y: bb.minY * size.height, width: bb.width * size.width, height: bb.height * size.height)
ctx.setLineWidth(2)
ctx.setStrokeColor(NSColor(calibratedRed: 1, green: 1, blue: 0, alpha: 0.9).cgColor)
ctx.setFillColor(NSColor(calibratedRed: 1, green: 1, blue: 0, alpha: 0.1).cgColor)
ctx.fill(r)
ctx.stroke(r)
}
}
return true
})
// save image
if let data = image.tiffRepresentation {
print(data)
let rep = NSBitmapImageRep(data: data)
let pngData = rep?.representation(using: .png, properties: [:])
do { try pngData?.write(to: URL(fileURLWithPath: "detected.png")) } catch { print(error) }
}
}
dectect()
Makefile
APP=hello
SRC=src/app.swift
BIN=build/
run: build
$(BIN)$(APP)
build:
mkdir -p $(BIN)
swiftc -o $(BIN)$(APP) $(SRC)
clean:
rm -rf $(BIN)
.PHONY: clean run build
网上随便找了几张,效果非常不错,一张50个人对我来说足够了,画框折腾了我半天,还好之前学了一下CGContext,不然搞不定。
人像分离
import Cocoa
import CoreGraphics
import CoreImage.CIFilterBuiltins
import Vision
var input: CGImage?
let requestHandler = VNSequenceRequestHandler()
func dectect() {
let segmentationRequest = VNGeneratePersonSegmentationRequest()
segmentationRequest.qualityLevel = .balanced
segmentationRequest.outputPixelFormat = kCVPixelFormatType_OneComponent8
if let image = NSImage(byReferencingFile: "./assets/face-samples/tonghua.jpg"), image.isValid {
print(image.size)
input = image.cgImage(forProposedRect: nil, context: nil, hints: nil)
try? requestHandler.perform([segmentationRequest], on: input!)
guard let maskPixelBuffer = segmentationRequest.results?.first?.pixelBuffer else { return }
blend(original: input, mask: maskPixelBuffer)
} else {
print("load face image FAILED")
}
}
func blend(original cgImage: CGImage?, mask maskBuffer: CVPixelBuffer) {
print("# blend")
let originalImage = CIImage(cgImage: cgImage!)
var maskImage = CIImage(cvPixelBuffer: maskBuffer)
// 512x384 or 384x512
print(maskImage.extent.size)
// saving mask image
// let maskImageRep = NSBitmapImageRep(ciImage: maskImage)
// print(maskImageRep.size)
// if let data = maskImageRep.representation(using: .png, properties: [:]) {
// do { try data.write(to: URL(fileURLWithPath: "mask.png")) } catch { print(error) }
// }
// Scale the mask image to fit the bounds of the video frame.
let scaleX = originalImage.extent.width / maskImage.extent.width
let scaleY = originalImage.extent.height / maskImage.extent.height
maskImage = maskImage.transformed(by: .init(scaleX: scaleX, y: scaleY))
// 将mask拉伸成和original一样的尺寸
print(maskImage.extent.size)
guard let backgroundNSImage = NSImage(byReferencingFile: "./assets/baker.jpg") else { return }
guard let backgroundCGImage = backgroundNSImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else { return }
let backgroundImage = CIImage(cgImage: backgroundCGImage)
let blendFilter = CIFilter.blendWithRedMask()
blendFilter.inputImage = originalImage
blendFilter.backgroundImage = backgroundImage
blendFilter.maskImage = maskImage
if let result = blendFilter.outputImage {
let resultImageRep = NSBitmapImageRep(ciImage: result)
if let data = resultImageRep.representation(using: .png, properties: [:]) {
do { try data.write(to: URL(fileURLWithPath: "body2.png")) } catch { print(error) }
}
}
}
dectect()
- "Applying Matte Effects to People in Images and Video"中同时使用了facePoseRequest和segmentationRequest,其实这两个是无关的,他是用facePoseRequest根据人脸的pose来生成一个background CIImage用来做blend,segmentationRequest可以独立使用
- 不管输入图片尺寸是多少,mask的尺寸经测试都会归一化到512x384或384x512
- blend如果不设置,则出来就是透明图片,设置就是叠加
参考阅读
- ML & Vision - Videos - Apple Developer
- Detect people, faces, and poses using Vision - WWDC21 - Videos - Apple Developer
- Build Image and Video Style Transfer models in Create ML - WWDC20 - Videos - Apple Developer
- Understanding Images in Vision Framework - WWDC19 - Videos - Apple Developer
- Vision | Apple Developer Documentation
- Applying Matte Effects to People in Images and Video | Apple Developer Documentation
- Tracking the User’s Face in Real Time | Apple Developer Documentation
- Swift之CIImage、CGImage、UIImage之间的相互转换_iTaacy的博客