openvino_backend
openvino_backend copied to clipboard
Using Intel OpenVINO models doesn't provide good results
By using this model from Intel : https://docs.openvino.ai/2024/omz_models_model_age_gender_recognition_retail_0013.html
I can't get good results (Or this model offers really good accuracy in the demo)...
Instructions to replicate:
Download model repo (it is FP32 version from https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/README.md):
Start triton docker container:
docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /~/repo:/models nvcr.io/nvidia/tritonserver:24.07-py3 tritonserver --model-repository=/models
Code to perform inference:
import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException
import cv2
import argparse
# Configuration
model_name = "age_gender"
model_version = "1"
server_url = "localhost:8001"
input_name = "data"
output_names = ["prob", "age_conv3"]
input_shape = (1, 3, 62, 62) # Dimensions attendues par le modèle
# Charger une image et la prétraiter
def preprocess_image(image_path, input_shape):
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Image non trouvée au chemin: {image_path}")
image = cv2.resize(image, (input_shape[2], input_shape[3]))
image = image.astype(np.float32) / 255.0 # Normalisation
image = np.transpose(image, (2, 0, 1)) # HWC to CHW
image = np.expand_dims(image, axis=0) # Ajouter la dimension batch
return image
# Initialiser le client Triton
def initialize_triton_client(server_url):
try:
triton_client = grpcclient.InferenceServerClient(url=server_url, verbose=True)
except Exception as e:
print("Erreur lors de la création du client Triton: " + str(e))
exit(1)
return triton_client
# Vérifier si le serveur et le modèle sont prêts
def check_server_and_model(triton_client, model_name, model_version):
if not triton_client.is_server_live():
print("Le serveur Triton n'est pas en ligne.")
exit(1)
if not triton_client.is_server_ready():
print("Le serveur Triton n'est pas prêt.")
exit(1)
if not triton_client.is_model_ready(model_name, model_version=model_version):
print(f"Le modèle {model_name} version {model_version} n'est pas prêt.")
exit(1)
# Envoyer la requête d'inférence et récupérer les résultats
def infer(triton_client, model_name, model_version, input_image):
inputs = []
outputs = []
inputs.append(grpcclient.InferInput(input_name, input_shape, "FP32"))
inputs[0].set_data_from_numpy(input_image)
for output_name in output_names:
outputs.append(grpcclient.InferRequestedOutput(output_name))
try:
results = triton_client.infer(model_name=model_name, model_version=model_version, inputs=inputs, outputs=outputs)
except InferenceServerException as e:
print("Erreur lors de l'inférence: " + str(e))
exit(1)
return results
# Interpréter les résultats
def interpret_results(results):
prob_output = results.as_numpy("prob")
age_output = results.as_numpy("age_conv3")
# prob_output: shape (1, 2, 1, 1) - Softmax output across 2 type classes [0 - female, 1 - male]
# age_output: shape (1, 1, 1, 1) - Estimated age divided by 100
print("gender result=", prob_output)
print("age results=", age_output)
gender_prob = prob_output[0, 1, 0, 0] # Probabilité de la classe "male"
age = age_output[0, 0, 0, 0] * 100 # Convertir l'âge en multipliant par 100
gender = "Male" if gender_prob > 0.5 else "Female"
return gender, gender_prob, age
# Fonction principale
def main(image_path):
input_image = preprocess_image(image_path, input_shape)
triton_client = initialize_triton_client(server_url)
check_server_and_model(triton_client, model_name, model_version)
results = infer(triton_client, model_name, model_version, input_image)
gender, gender_prob, age = interpret_results(results)
print(f"Genre prédit: {gender} (probabilité: {gender_prob})")
print(f"Âge prédit: {age}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Tester une inférence sur un modèle de classification age_gender hébergé sur Triton Inference Server.")
parser.add_argument("image_path", type=str, help="Chemin de l'image à utiliser pour l'inférence.")
args = parser.parse_args()
main(args.image_path)
@siretru I think you shouldn't use the normalization. That model expect in input data in a range of 0-255. Try dropping that division by 255 in preprocessing.