ios – CoreML Resnet50 Mannequin has poor accuracy in bodily system however works effectively in Xcode CoreML preview

Spread the love


I am making object recognition app utilizing Resnet50 mannequin offered by apple. I am integrating it with ARKit in order that when ever the mannequin detects an object I’ll show the identifier of the article utilizing a SCNText node.Right here is the mannequin.
Right here is my mannequin
Right here is my code

import CoreML
import Imaginative and prescient
import UIKit
import SceneKit
import ARKit

class ViewController: UIViewController, ARSCNViewDelegate {

    @IBOutlet var sceneView: ARSCNView!
    non-public var hitTestResult: ARHitTestResult?
    non-public var squeezeNet = Resnet50()
    
    override func viewDidLoad() {
        tremendous.viewDidLoad()
        
        // Set the view's delegate
        sceneView.delegate = self
        
        // Present statistics corresponding to fps and timing info
        sceneView.showsStatistics = true
        
        // Create a brand new scene
        let scene = SCNScene()
        let boxGeometry = SCNBox(width: 0.2, peak: 0.2, size: 0.2, chamferRadius: 0.0)
        boxGeometry.supplies.first?.diffuse.contents = UIColor.crimson
        let boxNode = SCNNode(geometry: boxGeometry)
        boxNode.place = SCNVector3(x: 0.0, y: 0.0, z: -0.5)
        boxNode.scale = SCNVector3(x: 0.2, y: 0.2, z: 0.2)
        scene.rootNode.addChildNode(boxNode)
        // Set the scene to the view
        
        sceneView.scene = scene
        sceneView.addGestureRecognizer(UITapGestureRecognizer(goal: self, motion: #selector(tapped)))
    }
    
    override func viewWillAppear(_ animated: Bool) {
        tremendous.viewWillAppear(animated)
        
        // Create a session configuration
        let configuration = ARWorldTrackingConfiguration()

        // Run the view's session
        sceneView.session.run(configuration)
    }
    
    override func viewWillDisappear(_ animated: Bool) {
        tremendous.viewWillDisappear(animated)
        
        // Pause the view's session
        sceneView.session.pause()
    }
    
    
    
    @objc func tapped(recognizer: UITapGestureRecognizer){
        let location = self.sceneView.heart
        guard let currentFrame = sceneView.session.currentFrame else{return }
        let hitResult = sceneView.hitTest(location, varieties: .featurePoint)
        if !hitResult.isEmpty{
            self.hitTestResult = hitResult.first
        }
        self.recognizeImageRequest(buffer: currentFrame.capturedImage)
        
    }
    
    
    func displayPrediction(textual content: String){
      
        if let hitTestResult{
            let node = createText(textual content: textual content)
            node.place = SCNVector3(x: hitTestResult.worldTransform.columns.3
                .x, y: hitTestResult.worldTransform.columns.3.y, z: hitTestResult.worldTransform.columns.3.z)
            self.sceneView.scene.rootNode.addChildNode(node)
        }
        
      
    }
    
    non-public func createText(textual content: String) -> SCNNode {
        
        let parentNode = SCNNode()
        
        let sphere = SCNSphere(radius: 0.01)
        
        let sphereMaterial = SCNMaterial()
        sphereMaterial.diffuse.contents = UIColor.orange
        sphere.firstMaterial = sphereMaterial
        let sphereNode = SCNNode(geometry: sphere)
        
        let textGeometry = SCNText(string: textual content, extrusionDepth: 0)
        
        textGeometry.alignmentMode = CATextLayerAlignmentMode.heart.rawValue
        textGeometry.firstMaterial?.diffuse.contents = UIColor.orange
        textGeometry.firstMaterial?.specular.contents = UIColor.white
        textGeometry.firstMaterial?.isDoubleSided = true
        
        var font = UIFont(title: "Futura", dimension: 0.10)
        textGeometry.font = font
        
        let textNode = SCNNode(geometry: textGeometry)
        textNode.scale = SCNVector3Make(0.2, 0.2, 0.2)
        
        parentNode.addChildNode(sphereNode)
        parentNode.addChildNode(textNode)
        textNode.place = SCNVector3(x: textNode.place.x, y: textNode.place.y - 0.15, z: textNode.place.z)
        return parentNode
    }
    


}


extension ViewController{
    func recognizeImageRequest(buffer: CVPixelBuffer){
        do{
            let visionModel = strive VNCoreMLModel(for: squeezeNet.mannequin)
            let request = VNCoreMLRequest(mannequin: visionModel) { request , error  in
                if let error{
                    print(error)
                }
                if let end result = request.outcomes as? [VNClassificationObservation]{
                    print(end result.first!.identifier)
                    print(end result.first!.confidence)
                    self.displayPrediction(textual content: end result.first!.identifier.elements(separatedBy: ",").first!)
                }
            }
            
            request.imageCropAndScaleOption = .centerCrop
            
            let imageRequestHandeler = VNImageRequestHandler(cvPixelBuffer: buffer,orientation: .upMirrored,choices: [:])
            DispatchQueue.world().async {
                do{
                   strive imageRequestHandeler.carry out([request])
                }
                catch{
                    print(error)
                }
                
            }
            
        }
        catch{
            print(error)
        }
       
    }
}

After I run my app in iPhone 7 with iOS 15.7.6 the accuracy is de facto dangerous. However after I take a look at the mannequin utilizing Xcode CoreML mannequin preview by dragging and dropping some photographs it really works effectively. Am I doing one thing improper in capturing the present picture from the body and passing it to the VNImageRequestHandler.

Leave a Reply

Your email address will not be published. Required fields are marked *