flutter_tflite
flutter_tflite copied to clipboard
SSD Object Detection parsing is broken. parseSSDMobileNet returns erroneous and/or invalid results
I successfully load up my model and start running model on frames from the camera controller. I notice results come in when my object appears in camera field of view (water test strips). However, the parsed parameters from the output tensors are all inaccurate which leads to incorrect bounding and class detection.
For example, in parseSSDMobileNet
here are some of the outputs for locations, classes, scores, and num detection float arrays. Why are classes and num detections float arrays first off? The results of these outputs:
float* output_locations = TfLiteInterpreterGetOutputTensor(interpreter, 0)->data.f;
float* output_classes = TfLiteInterpreterGetOutputTensor(interpreter, 1)->data.f;
float* output_scores = TfLiteInterpreterGetOutputTensor(interpreter, 2)->data.f;
float* num_detections = TfLiteInterpreterGetOutputTensor(interpreter, 3)->data.f;
Results:
when I log first 8 entries for float pointers:
for (int d = 0; d < 8; d++) {
NSLog(@"output_classes loc %d %f", d, output_classes[d]);
}
I get the following:
output_classes loc 0 0.590460 output_classes loc 1 0.468586 output_classes loc 2 0.628208 output_classes loc 3 0.514471 output_classes loc 4 0.355844 output_classes loc 5 0.468039 output_classes loc 6 0.392715 output_classes loc 7 0.510365 output_locations loc 0 0.707110 output_locations loc 1 0.689438 output_locations loc 2 0.580224 output_locations loc 3 0.514742 output_locations loc 4 0.494435 output_locations loc 5 0.494395 output_locations loc 6 0.344939 output_locations loc 7 0.124733 num_detections loc 0 5.000000 num_detections loc 1 2.000000 num_detections loc 2 3.000000 num_detections loc 3 1.000000 num_detections loc 4 4.000000 num_detections loc 5 0.000000 num_detections loc 6 0.000000 num_detections loc 7 1.000000 output_scores loc 0 10.000000 output_scores loc 1 -0.015256 output_scores loc 2 0.140742 output_scores loc 3 0.009112 output_scores loc 4 0.334685 output_scores loc 5 0.185375 output_scores loc 6 0.033109 output_scores loc 7 -0.257905
So these are obviously incorrect. For instance, why the negative scores? Why is the first score regularly either 10 or 0? Why would the locations yield negative widths and heights. num_detections looks interesting, but I can't piece out what it may represent.
My model works perfectly in python, and following the jupyter notebooks in the following tutorials: https://github.com/nicknochnack/TFODCourse
Though when I bring in the transfer model after exporting to tensor flow lite, I notice the model yields these kinds of results.
here's my training config:
model {
ssd {
num_classes: 8
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
feature_extractor {
type: "ssd_mobilenet_v2_fpn_keras"
depth_multiplier: 1.0
min_depth: 16
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4e-05
}
}
initializer {
random_normal_initializer {
mean: 0.0
stddev: 0.01
}
}
activation: RELU_6
batch_norm {
decay: 0.997
scale: true
epsilon: 0.001
}
}
use_depthwise: true
override_base_feature_extractor_hyperparams: true
fpn {
min_level: 3
max_level: 7
additional_layer_depth: 128
}
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 4e-05
}
}
initializer {
random_normal_initializer {
mean: 0.0
stddev: 0.01
}
}
activation: RELU_6
batch_norm {
decay: 0.997
scale: true
epsilon: 0.001
}
}
depth: 128
num_layers_before_predictor: 4
kernel_size: 3
class_prediction_bias_init: -4.6
share_prediction_tower: true
use_depthwise: true
}
}
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
scales_per_octave: 2
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 1e-08
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
use_static_shapes: false
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid_focal {
gamma: 2.0
alpha: 0.25
}
}
classification_weight: 1.0
localization_weight: 1.0
}
encode_background_as_zeros: true
normalize_loc_loss_by_codesize: true
inplace_batchnorm_update: true
freeze_batchnorm: false
}
}
train_config {
batch_size: 4
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
sync_replicas: true
optimizer {
momentum_optimizer {
learning_rate {
cosine_decay_learning_rate {
learning_rate_base: 0.08
total_steps: 50000
warmup_learning_rate: 0.026666
warmup_steps: 1000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
fine_tune_checkpoint: "Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0"
num_steps: 50000
startup_delay_steps: 0.0
replicas_to_aggregate: 8
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_type: "detection"
fine_tune_checkpoint_version: V2
}
train_input_reader {
label_map_path: "Tensorflow/workspace/annotations/label_map.pbtxt"
tf_record_input_reader {
input_path: "Tensorflow/workspace/annotations/train.record"
}
}
eval_config {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader {
label_map_path: "Tensorflow/workspace/annotations/label_map.pbtxt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "Tensorflow/workspace/annotations/test.record"
}
}
my label file: ch tc fc ph ta cya ccv1 ccv2
Is it possible that things are not being parsed to the TensorFlowLiteC library specs?
Wow, well in writing this, I got a better look at the numbers, and switched out the tensor parsing as follows:
float* output_locations = TfLiteInterpreterGetOutputTensor(interpreter, 1)->data.f;
float* output_classes = TfLiteInterpreterGetOutputTensor(interpreter, 3)->data.f;
float* output_scores = TfLiteInterpreterGetOutputTensor(interpreter, 0)->data.f;
float* num_detections = TfLiteInterpreterGetOutputTensor(interpreter, 2)->data.f;
as apposed to:
float* output_locations = TfLiteInterpreterGetOutputTensor(interpreter, 0)->data.f;
float* output_classes = TfLiteInterpreterGetOutputTensor(interpreter, 1)->data.f;
float* output_scores = TfLiteInterpreterGetOutputTensor(interpreter, 2)->data.f;
float* num_detections = TfLiteInterpreterGetOutputTensor(interpreter, 3)->data.f;
I am getting great results now!! I need to figure out the scaling now to present the bounding boxes...the scale seems to be off by a factor of 2 or so. I'll let you know what works.
I'm super glad I noticed this as I was getting a bit hopeless. I also don't think the number of detections is still parsed correctly and will let you know if I find the correct parsing for the num_detections.
This worked on iOS, but is not working on android. I tried to make the same changes, but it's getting hung up on the error others have complained about:
Unhandled Exception: PlatformException(Failed to run model, Cannot copy from a TensorFlowLite tensor (StatefulPartitionedCall:1) with shape [1, 10] to a Java object with shape [1, 1]., java.lang.IllegalArgumentException: Cannot copy from a TensorFlowLite tensor (StatefulPartitionedCall:1) with shape [1, 10] to a Java object with shape [1, 1].
I think it's complaining about the numDetections map, and if I could figure out where to get the number of detections from one of the output tensors, things may work.
Well, I got android working through various hacks.
I take a photo on a periodic timer for android now, rather than streaming the image, unlike iOS...Something is wrong with flutter's YUV 420 conversion
So, for android I do:
XFile tempFile = await photoController!.takePicture();
Uint8List imageBytes = File(tempFile.path).readAsBytesSync();
List<int> values = imageBytes.buffer.asUint8List();
imglib.Image? photo = imglib.decodeImage(values);
ProcessedImage p = ProcessedImage();
var recs = await Tflite.detectObjectOnImage(
path: tempFile.path,
threshold: 0.25,
model: "SSDMobileNet",
numResultsPerClass: 1,
asynch: false,
);
p.recognitions = recs?.map((e) => Recognition.fromData(e)).toList();
Then I modified TFlitePlugin.java to match what I did to the iOS TFlitePlugin.mm:
RunSSDMobileNet(HashMap args, ByteBuffer imgData, int numResultsPerClass, float threshold, Result result) {
super(args, result);
this.num = 10; //tfLite.getOutputTensor(2).shape()[0][0];
this.numResultsPerClass = numResultsPerClass;
this.threshold = threshold;
this.outputLocations = new float[1][num][4];
this.outputClasses = new float[1][num];
this.outputScores = new float[1][num];
this.inputArray = new Object[]{imgData};
Log.v("T0", " " + tfLite.getOutputTensor(0).numBytes());
Log.v("T1", " " + tfLite.getOutputTensor(1).numBytes());
Log.v("T2", " " + tfLite.getOutputTensor(2).numBytes());
Log.v("T3", " " + tfLite.getOutputTensor(3).numBytes());
outputMap.put(1, outputLocations);
outputMap.put(3, outputClasses);
outputMap.put(0, outputScores);
outputMap.put(2, numDetections);
startTime = SystemClock.uptimeMillis();
}
For whatever reason, the number that it expects is 10 for the size of the tensor conversion, so I just hardcode that, which is ugly and I hate it, but it works
I have to rotate the image after the bitmap is read to look like. I tried to do this in flutter and send to the detectObjectOnBinary, but detectObjectOnBinary does not seem to accept flutter's byte fetching:
ByteBuffer feedInputTensorImage(String path, float mean, float std) throws IOException {
InputStream inputStream = new FileInputStream(path.replace("file://", ""));
Bitmap bitmapRaw = BitmapFactory.decodeStream(inputStream);
Matrix matrix = new Matrix();
matrix.postRotate(90);
bitmapRaw = Bitmap.createBitmap(bitmapRaw, 0, 0, bitmapRaw.getWidth(), bitmapRaw.getHeight(), matrix, true);
return feedInputTensor(bitmapRaw, mean, std);
}
Now object detection works for my model on android as it does in iOS. I will have to look into other libraries if I find this not working across all devices as it seems like this lib is getting a bit rusty.
Derp, wish I'd checked this comment before spending a few days solving the same problem. Here's what worked for me: https://github.com/shaqian/flutter_tflite/pull/235/files
Derp, wish I'd checked this comment before spending a few days solving the same problem. Here's what worked for me: https://github.com/shaqian/flutter_tflite/pull/235/files
I have tried this fix, but I get:
error: cannot find symbol
String thisTensorName = tfLiteObjectRecognition.getOutputTensor(outputMapLocationIterator).name();
^
symbol: variable tfLiteObjectRecognition
location: class TflitePlugin.RunSSDMobileNet
I have also tried using tfLite.getOutputTensor(outputMapLocationIterator).name();
, which builds fine, but returns no output.
My problem is, that I always get x: 0.0 in my results, no matter where the object is.
{rect: {w: 0.9994966983795166, x: 0.0, h: 0.9955635070800781, y: 0.0027381181716918945}, confidenceInClass: 0.9135933518409729, detectedClass: 1 Krogec}
is the latest example. This taggs my whole image.
Did I miss something?
Code for tflite
_loadTfModelAndLabels(String modelPath, String labelsPath) async {
String? res = await Tflite.loadModel(
model: modelPath,
labels: labelsPath,
numThreads: 1,
isAsset: true,
useGpuDelegate: false,
);
}
_releaseTfResources() async {
await Tflite.close();
}
_runRecognitionOnImage(String path) async {
final recognitions = await Tflite.detectObjectOnImage(
path: path,
model: 'SSDMobileNet',
imageMean: 0.0,
imageStd: 255.0,
numResultsPerClass: 1,
threshold: 0.5,
asynch: true,
);
return recognitions;
}
Image processing
Future<String?> _takePicture() async {
if (_controller.value.isTakingPicture) {
// * A capture is already pending, do nothing.
return null;
}
try {
XFile imageFile = await _controller.takePicture();
img.Image? capturedImage = img.decodeImage(await imageFile.readAsBytes());
img.Image orientedImage = img.bakeOrientation(capturedImage!);
await File(imageFile.path).writeAsBytes(img.encodePng(orientedImage));
return imageFile.path;
} catch (e) {
debugPrint(e.toString());
}
return null;
}
EDIT: Fixed by changing image mean and std:
_runRecognitionOnImage(String path) async {
final recognitions = await Tflite.detectObjectOnImage(
path: path,
model: 'SSDMobileNet',
imageMean: 255,
imageStd: 255,
numResultsPerClass: 1,
threshold: 0.5,
asynch: true,
);