Questions icon indicating copy to clipboard operation
Questions copied to clipboard

my flutter sign language detector is not detecting anything

Open janrabang opened this issue 10 months ago • 0 comments

i am using SSDMOBILENETV2 FPN LITE 320x32 pretrained model and i trained it with my custom sign language datasets.

here is my object_detection_service.dart `import 'package:camera/camera.dart'; import 'package:tflite_flutter/tflite_flutter.dart'; import 'package:image/image.dart' as img; import 'dart:developer' as dev;

class InferenceData { final Interpreter interpreter; final List<List<List<List>>> inputData;

InferenceData(this.interpreter, this.inputData); }

class InferenceResult { final List scores; final List<List> boxes; final List classes;

InferenceResult(this.scores, this.boxes, this.classes); }

class ObjectDetectionService { static const int inputSize = 320; static const double confidenceThreshold = 0.1;

static void _log(String message) { dev.log('[ObjectDetection] $message'); }

static Future<List<List<List<List>>>> preprocessImageIsolate(CameraImage image) async { try { final img.Image rgbImage; if (image.format.group == ImageFormatGroup.yuv420) { rgbImage = _convertYUV420(image); } else if (image.format.group == ImageFormatGroup.bgra8888) { rgbImage = _convertBGRA8888(image); } else { throw Exception('Unsupported image format: ${image.format.group}'); }

  final resized = img.copyResize(
    rgbImage,
    width: inputSize,
    height: inputSize,
    interpolation: img.Interpolation.linear,
  );

  final input = List.generate(
    1,
    (index) => List.generate(
      inputSize,
      (y) => List.generate(
        inputSize,
        (x) => List.generate(
          3,
          (c) {
            final pixel = resized.getPixel(x, y);
            // Normalize to [0, 1] instead of [-1, 1]
            return c == 0
                ? pixel.r / 255.0
                : c == 1
                    ? pixel.g / 255.0
                    : pixel.b / 255.0;
          },
        ),
      ),
    ),
  );

  _log('Image preprocessed');
  return input;
} catch (e, stack) {
  _log('Preprocessing error: $e\n$stack');
  throw Exception('Preprocessing failed: $e');
}

}

static img.Image _convertYUV420(CameraImage image) { final width = image.width; final height = image.height; final yPlane = image.planes[0].bytes; final uPlane = image.planes[1].bytes; final vPlane = image.planes[2].bytes;

final yRowStride = image.planes[0].bytesPerRow;
final uvRowStride = image.planes[1].bytesPerRow;
final uvPixelStride = image.planes[1].bytesPerPixel!;

final output = img.Image(width: width, height: height);

for (int y = 0; y < height; y++) {
  for (int x = 0; x < width; x++) {
    final int yIndex = y * yRowStride + x;
    final int uvIndex = (y ~/ 2) * uvRowStride + (x ~/ 2) * uvPixelStride;

    final yValue = yPlane[yIndex];
    final uValue = uPlane[uvIndex];
    final vValue = vPlane[uvIndex];

    // Using standard YUV to RGB conversion
    final int r = (yValue + (1.370705 * (vValue - 128))).toInt().clamp(0, 255);
    final int g = (yValue - (0.698001 * (vValue - 128)) - (0.337633 * (uValue - 128))).toInt().clamp(0, 255);
    final int b = (yValue + (1.732446 * (uValue - 128))).toInt().clamp(0, 255);

    output.setPixelRgb(x, y, r, g, b);
  }
}

return output;

}

static img.Image _convertBGRA8888(CameraImage image) { return img.Image.fromBytes( width: image.width, height: image.height, bytes: image.planes[0].bytes.buffer, order: img.ChannelOrder.bgra, ); }

static Future<InferenceResult?> runInferenceIsolate(InferenceData data) async { try { final outputBoxes = List<List>.generate( 100, (_) => List.filled(4, 0.0), ); final outputClasses = List.filled(100, 0); final outputScores = List.filled(100, 0); final outputCount = [1.0];

  final outputs = {
    0: outputScores,
    1: outputBoxes,
    2: outputCount,
    3: outputClasses,
  };

  data.interpreter.runForMultipleInputs([data.inputData], outputs);

  // Debug logging
  _log('Scores: ${outputScores.take(5).toList()}');
  _log('Classes: ${outputClasses.take(5).toList()}');
  _log('First box: ${outputBoxes[0]}');

  return InferenceResult(
    outputScores,
    outputBoxes,
    outputClasses,
  );
} catch (e, stack) {
  _log('Inference error: $e\n$stack');
  return null;
}

} }

`

and my scan_controller.dart: `import 'dart:developer' as dev; import 'package:camera/camera.dart'; import 'package:get/get.dart'; import 'package:tflite_flutter/tflite_flutter.dart'; import 'package:flutter/services.dart'; import 'package:flutter/foundation.dart'; import 'camera_service.dart'; import 'object_detection_service.dart';

class ScanController extends GetxController { final List<CameraDescription> cameras; late CameraService cameraService; late ObjectDetectionService detectionService; Interpreter? interpreter; List<String> labels = [];

RxBool isCameraInitialized = false.obs; RxString errorMessage = ''.obs; RxString label = ''.obs; RxDouble x = 0.0.obs; RxDouble y = 0.0.obs; RxDouble w = 0.0.obs; RxDouble h = 0.0.obs; RxBool isProcessing = false.obs; RxBool isTabActive = true.obs;

static const String modelPath = 'assets/model.tflite'; static const String labelsPath = 'assets/labels.txt'; static const Duration _processingInterval = Duration(milliseconds: 100); DateTime _lastProcessingTime = DateTime.now();

ScanController({required this.cameras}) { cameraService = CameraService(cameras: cameras); detectionService = ObjectDetectionService(); }

void _log(String message) { dev.log('[ScanController] $message'); }

@override void onInit() { super.onInit(); _initialize(); }

@override void onClose() { disposeResources(); super.onClose(); }

Future _initialize() async { try { _log('Starting initialization');

  final options = InterpreterOptions()..threads = 4;
  
  interpreter = await Interpreter.fromAsset(
    modelPath,
    options: options,
  );

  // Log interpreter details
  final inputTensor = interpreter!.getInputTensor(0);
  final outputTensor = interpreter!.getOutputTensor(0);
  _log('Input tensor shape: ${inputTensor.shape}');
  _log('Output tensor shape: ${outputTensor.shape}');

  await loadLabels();
  await initializeCamera();
  _log('Initialization complete');
} catch (e, stack) {
  errorMessage.value = 'Initialization error: $e';
  _log('Initialization error: $e\n$stack');
}

}

Future loadLabels() async { try { final labelData = await rootBundle.loadString(labelsPath); labels = labelData.split('\n') .where((label) => label.trim().isNotEmpty) .toList(); _log('Labels loaded: ${labels.length}'); _log('First 5 labels: ${labels.take(5).toList()}'); } catch (e) { _log('Error loading labels: $e'); rethrow; } }

Future initializeCamera() async { try { await cameraService.initialize(); if (isTabActive.value) { await startCamera(); } isCameraInitialized.value = true; } catch (e) { errorMessage.value = e.toString(); _log('Camera initialization error: $e'); rethrow; } }

Future startCamera() async { if (!isCameraInitialized.value) return; await cameraService.startImageStream(_processCameraImage); }

Future stopCamera() async { await cameraService.stopImageStream(); }

Future disposeResources() async { try { await cameraService.dispose(); interpreter?.close(); isProcessing.value = false; isCameraInitialized.value = false; } catch (e) { _log('Error during resource disposal: $e'); } }

Future _processCameraImage(CameraImage image) async { if (isProcessing.value) return;

final now = DateTime.now();
if (now.difference(_lastProcessingTime) < _processingInterval) return;

_lastProcessingTime = now;
isProcessing.value = true;

try {
  if (interpreter == null) {
    _log('Interpreter not ready');
    return;
  }

  final inputData = await compute(
    ObjectDetectionService.preprocessImageIsolate,
    image,
  );
  _log('Image preprocessed');

  final outputs = await compute(
    ObjectDetectionService.runInferenceIsolate,
    InferenceData(interpreter!, inputData),
  );
  _log('Inference run completed');

  if (outputs != null) {
    _processDetections(
      outputs.scores,
      outputs.boxes,
      outputs.classes,
    );
  }
} catch (e, stack) {
  _log('Processing error: $e\n$stack');
} finally {
  isProcessing.value = false;
}

}

void _processDetections( List scores, List<List> boxes, List classes, ) { try { double maxScore = 0; int maxIndex = -1;

  _log('Processing detections:');
  _log('Scores: ${scores.take(5)}');
  _log('Classes: ${classes.take(5)}');

  for (var i = 0; i < scores.length; i++) {
    if (scores[i] > maxScore && 
        scores[i] > ObjectDetectionService.confidenceThreshold) {
      maxScore = scores[i];
      maxIndex = i;
    }
  }

  if (maxIndex != -1) {
    final box = boxes[maxIndex];
    final classIndex = classes[maxIndex].toInt();
    if (classIndex < labels.length) {
      label.value = '${labels[classIndex]} ${(maxScore * 100).toStringAsFixed(0)}%';
      y.value = box[0];
      x.value = box[1];
      h.value = box[2] - box[0];
      w.value = box[3] - box[1];
      _log('Detection: ${label.value} at ($x, $y) with size ($w, $h)');
    }
  } else {
    label.value = '';
  }
} catch (e, stack) {
  _log('Detection processing error: $e\n$stack');
}

} }`

i have been working on this for weeks now and i dont know what else to do, i am also new to programming.

janrabang avatar Feb 13 '25 03:02 janrabang