coremltools Used coremltools to make mlmodel updatable with MSE loss but the model gives negative MSE loss when training

Used coremltools to make mlmodel updatable with MSE loss but the model gives negative MSE loss when training

Open baicenxiao opened this issue 1 year ago • 0 comments

❓Question

I converted a toy Pytorch regression model to CoreML mlmodel using coremltools and set it to be updatable with mean_squared_error_loss. But when testing the training, the context.metrics[.lossValue] in MLUpdateProgressHandlers can give negative value which is impossible for MSE loss. Further more, the context.metrics[.lossValue] is very different from my own computed loss as shown in the screenshot below. I was wondering if I used a wrong way to extract the training loss or I used set_mean_squared_error_loss in a wrong way? Does context.metrics[.lossValue] really give MSE if I used coremltools function set_mean_squared_error_loss to set the loss? Any suggestion is appreciated.**

I am using coremltools==7.0, torch==1.13.1, xcode==15.0.1

Here is my full code to convert Pytorch model to updatable CoreML model:

import torch
import torch.optim as optim
import torch.nn as nn
import coremltools as ct

# Define a simple neural network with two layers
class SimpleRegressionModel(nn.Module):
    def __init__(self):
        super(SimpleRegressionModel, self).__init__()
        self.layer1 = nn.Linear(2, 5) # 2 inputs, 5 outputs
        self.layer2 = nn.Linear(5, 1)  # 5 inputs, 1 output

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = self.layer2(x)
        return x

# Create the model
model = SimpleRegressionModel()

# Create a sample input tensor
sample_input = torch.rand(1, 2)  # Adjust the shape according to your model's input

# Trace the model with a sample input
traced_model = torch.jit.trace(model, sample_input)

# Convert the traced model to Core ML format
input_features = [ct.TensorType(shape=(1, 2))]
output_features = ["output"]
mlmodel = ct.convert(
    traced_model,
    inputs=input_features,
    convert_to="neuralnetwork"
)

mlmodel.save("regression.mlmodel")


import coremltools
from coremltools.models.neural_network import NeuralNetworkBuilder, SgdParams, AdamParams
from coremltools.models import datatypes

# Load the model specification
spec = coremltools.utils.load_spec('regression.mlmodel')
builder = NeuralNetworkBuilder(spec=spec)

builder.inspect_output_features() # Name: linear_1
          
# Make layers updatable
builder.make_updatable(['linear_0', 'linear_1'])

# Manually add a mean squared error loss layer
feature = ('linear_1', datatypes.Array(1))
builder.set_mean_squared_error_loss(name='lossLayer', input_feature=feature)


# Define the optimizer (SGD in this example)
# sgd_params = SgdParams(lr=0.001, batch=16)  # Adjust learning rate and batch size as needed
# builder.set_sgd_optimizer(sgd_params)

# define the optimizer (Adam in this example)
adam_params = AdamParams(lr=0.01, beta1=0.9, beta2=0.999, eps=1e-8, batch=16)
builder.set_adam_optimizer(adam_params)

# Set the number of epochs
builder.set_epochs(100)

# Optionally, set descriptions for your training inputs
spec.description.trainingInput[0].shortDescription = 'Input data'
spec.description.trainingInput[1].shortDescription = 'Target output data'

# Save the updated model
updated_model = coremltools.models.MLModel(spec)
updated_model.save('updatable_regression30.mlmodel')

Here is the full code I use to try to update the saved updatable_regression30.mlmodel:

import CoreML

import GameKit

func generateSampleData(numSamples: Int, seed: UInt64) -> ([MLMultiArray], [MLMultiArray]) {
    var inputArray = [MLMultiArray]()
    var outputArray = [MLMultiArray]()

    // Create a random number generator with a fixed seed
    let randomSource = GKLinearCongruentialRandomSource(seed: seed)
    let randomDistribution = GKRandomDistribution(randomSource: randomSource, lowestValue: 0, highestValue: 1000)

    for _ in 0..<numSamples {
        do {
            let input = try MLMultiArray(shape: [1, 2], dataType: .float32)
            let output = try MLMultiArray(shape: [1], dataType: .float32)

            var sumInput: Float = 0

            for i in 0..<input.shape[1].intValue {
                // Generate random value using the fixed seed generator
                let inputValue = Float(randomDistribution.nextInt()) / 1000.0
                input[[0, i] as [NSNumber]] = NSNumber(value: inputValue)
                sumInput += inputValue
            }

            output[0] = NSNumber(value: 10.0 * sumInput + 1.0)

            inputArray.append(input)
            outputArray.append(output)
        } catch {
            print("Error occurred while creating MLMultiArrays: \(error)")
        }
    }

    return (inputArray, outputArray)
}

func computeLoss(model: MLModel, data: ([MLMultiArray], [MLMultiArray])) -> Double {
   // my own function to compute loss in order to compare with context.metrics[.lossValue]
    let (inputData, outputData) = data
    var totalLoss: Double = 0

    for (index, input) in inputData.enumerated() {
        let output = outputData[index]
        
        // Using optional binding to safely unwrap the prediction
        if let prediction = try? model.prediction(from: MLDictionaryFeatureProvider(dictionary: ["x": MLFeatureValue(multiArray: input)])),
           let predictedOutput = prediction.featureValue(for: "linear_1")?.multiArrayValue {
            
            // Now you can safely use predictedOutput
            let loss = (output[0].doubleValue - predictedOutput[0].doubleValue)
            totalLoss += loss * loss // Mean squared error
        }
    }
    return totalLoss / Double(inputData.count) // Calculating the mean of squared errors
}


func trainModel() {
    // Load the updatable model
    guard let updatableModelURL = Bundle.main.url(forResource: "updatable_regression30", withExtension: "mlmodelc") else {
        print("Failed to load the updatable model")
        return
    }

    // Generate sample data
    let (inputData, outputData) = generateSampleData(numSamples: 200, seed: 8)
    
    let validationData = generateSampleData(numSamples: 100, seed:18)

    // Create an MLArrayBatchProvider from the sample data
    var featureProviders = [MLFeatureProvider]()
    for (index, input) in inputData.enumerated() {
        let output = outputData[index]
        let dataPointFeatures: [String: MLFeatureValue] = [
            "x": MLFeatureValue(multiArray: input),
            "linear_1_true": MLFeatureValue(multiArray: output)
        ]
        if let provider = try? MLDictionaryFeatureProvider(dictionary: dataPointFeatures) {
            featureProviders.append(provider)
        }
    }
    let batchProvider = MLArrayBatchProvider(array: featureProviders)
    
    
    // Define progress handlers
    var lossValues: [Double] = []
    var computedTrainingLossValues: [Double] = []
    var validationLossValues: [Double] = []

        let progressHandlers = MLUpdateProgressHandlers(forEvents: [.trainingBegin, .epochEnd],
            progressHandler: { context in
                switch context.event {
                    case .trainingBegin:
                        print("Training began.")
                    case .epochEnd:
                        let loss = context.metrics[.lossValue] as! Double
                        lossValues.append(loss)
                        let validationLoss = computeLoss(model: context.model, data: validationData)
                        validationLossValues.append(validationLoss)
                        let computedTrainLoss = computeLoss(model: context.model, data: (inputData, outputData))
                        computedTrainingLossValues.append(validationLoss)
                        print("Epoch \(context.metrics[.epochIndex]!) ended. Training Loss: \(loss), Computed Training Loss: \(computedTrainLoss), Validation Loss: \(validationLoss)")
                    print("\(context.metrics[.lossValue]!)")
                    default:
                        break
                }
            },
            completionHandler: { context in
                if let error = context.task.error {
                    print("Update task failed with error: \(error)")
                } else {
                    let updatedModel = context.model
                    do {
                        let fileManager = FileManager.default
                        let documentDirectory = try fileManager.url(for: .documentDirectory, in: .userDomainMask, appropriateFor:nil, create:true)
                        let fileURL = documentDirectory.appendingPathComponent("CatDog5.mlmodelc")
                        try updatedModel.write(to: fileURL)
                        print("Model updated and saved successfully to \(fileURL)")
                    } catch {
                        print("Failed to save the updated model: \(error)")
                    }
                }
            }
        )

        // Create an update task with progress handlers
        let updateTask = try! MLUpdateTask(forModelAt: updatableModelURL,
                                           trainingData: batchProvider,
                                           configuration: nil,
                                           progressHandlers: progressHandlers)
    
    // Start the update task
    updateTask.resume()
}

trainModel()

Nov 27 '23 06:11 baicenxiao

coremltools coremltools copied to clipboard

Used coremltools to make mlmodel updatable with MSE loss but the model gives negative MSE loss when training

❓Question

coremltools
coremltools copied to clipboard