Skip to content

Commit

Permalink
tensorflow-lite: use new yolov9s model with separate outputs to fix q…
Browse files Browse the repository at this point in the history
…uantization accuracy loss
  • Loading branch information
koush committed Dec 28, 2024
1 parent 5f7ecc0 commit e33a793
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 34 deletions.
4 changes: 2 additions & 2 deletions plugins/tensorflow-lite/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion plugins/tensorflow-lite/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.1.72"
"version": "0.1.73"
}
70 changes: 39 additions & 31 deletions plugins/tensorflow-lite/src/tflite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,20 @@
pass
import asyncio
import concurrent.futures
import queue
import re
from typing import Any, Tuple

import scrypted_sdk
import tflite_runtime.interpreter as tflite
from .yolo_separate_outputs import *
from scrypted_sdk.types import Setting, SettingValue

from common import yolo
from predict import PredictPlugin

availableModels = [
"Default",
"scrypted_yolov9s_relu_sep_320",
"scrypted_yolov9t_relu_320",
"scrypted_yolov9s_relu_320",
"ssd_mobilenet_v2_coco_quant_postprocess",
Expand All @@ -51,6 +52,7 @@
"efficientdet_lite3x_640_ptq",
]


def parse_label_contents(contents: str):
lines = contents.splitlines()
lines = [line for line in lines if line.strip()]
Expand Down Expand Up @@ -96,17 +98,12 @@ def configureModel():
nonlocal model

if defaultModel:
model = "scrypted_yolov9t_relu_320"
# if edge_tpus and next(
# (obj for obj in edge_tpus if obj["type"] == "usb"), None
# ):
# model = "ssdlite_mobiledet_coco_qat_postprocess"
# else:
# model = "efficientdet_lite0_320_ptq"
model = "scrypted_yolov9s_relu_sep_320"
self.yolo = "yolo" in model
self.yolov9 = "yolov9" in model
self.scrypted_model = "scrypted" in model
self.scrypted_yolov10 = "scrypted_yolov10" in model
self.scrypted_yolo_sep = "_sep" in model
self.modelName = model

print(f"model: {model}")
Expand Down Expand Up @@ -184,8 +181,7 @@ def executor_initializer():
thread_name = threading.current_thread().name
interpreter = available_interpreters.pop()
self.interpreters[thread_name] = interpreter
print('Interpreter initialized on thread {}'.format(thread_name))

print("Interpreter initialized on thread {}".format(thread_name))

self.executor = concurrent.futures.ThreadPoolExecutor(
initializer=executor_initializer,
Expand Down Expand Up @@ -247,29 +243,41 @@ def predict():
interpreter.set_tensor(tensor_index, im)
interpreter.invoke()
output_details = interpreter.get_output_details()
output = output_details[0]
x = interpreter.get_tensor(output["index"])
input_scale = self.get_input_details()[0]
if x.dtype == np.int8:
scale, zero_point = output["quantization"]
combined_scale = scale * input_scale
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
objs = yolo.parse_yolov9(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
if self.scrypted_yolo_sep:
outputs = []
for index, output in enumerate(output_details):
o = interpreter.get_tensor(output["index"]).astype(np.float32)
scale, zero_point = output["quantization"]
o -= zero_point
o *= scale
outputs.append(o)

output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height])
objs = yolo.parse_yolov9(output[0])
else:
# this code path is unused.
objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
output = output_details[0]
x = interpreter.get_tensor(output["index"])
if x.dtype == np.int8:
scale, zero_point = output["quantization"]
combined_scale = scale * input_scale
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
objs = yolo.parse_yolov9(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
# this code path is unused.
objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
else:
tflite_common.set_input(interpreter, input)
interpreter.invoke()
Expand Down
69 changes: 69 additions & 0 deletions plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import numpy as np

class DFL:
def __init__(self, c1=16):
self.c1 = c1
self.conv_weights = np.arange(c1).reshape(1, c1, 1, 1)

def forward(self, x):
b, _, a = x.shape # batch, channels, anchors
x = x.reshape(b, 4, self.c1, a).transpose(0, 2, 1, 3)
x = softmax(x, axis=1)
x = np.sum(self.conv_weights * x, axis=1)
return x.reshape(b, 4, a)

def softmax(x, axis=-1):
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return e_x / np.sum(e_x, axis=axis, keepdims=True)

def make_anchors(feats, strides, grid_cell_offset=0.5):
anchor_points, stride_tensor = [], []
assert feats is not None
dtype = feats[0].dtype
for i, stride in enumerate(strides):
_, _, h, w = feats[i].shape
sx = np.arange(w, dtype=dtype) + grid_cell_offset # shift x
sy = np.arange(h, dtype=dtype) + grid_cell_offset # shift y
sy, sx = np.meshgrid(sy, sx, indexing="ij")
anchor_points.append(np.stack((sx, sy), axis=-1).reshape(-1, 2))
stride_tensor.append(np.full((h * w, 1), stride, dtype=dtype))
return np.concatenate(anchor_points), np.concatenate(stride_tensor)

def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
lt, rb = np.split(distance, 2, axis=dim)

anchor_points = anchor_points.transpose(0, 2, 1)

x1y1 = anchor_points - lt
x2y2 = anchor_points + rb
if xywh:
c_xy = (x1y1 + x2y2) / 2
wh = x2y2 - x1y1
return np.concatenate((c_xy, wh), axis=dim) # xywh bbox
return np.concatenate((x1y1, x2y2), axis=dim) # xyxy bbox

def decode_bbox(preds, img_shape):
num_classes = next((o.shape[2] for o in preds if o.shape[2] != 64), -1)
assert num_classes != -1, 'cannot infer postprocessor inputs via output shape if there are 64 classes'
pos = [
i for i, _ in sorted(enumerate(preds),
key=lambda x: (x[1].shape[2] if num_classes > 64 else -x[1].shape[2], -x[1].shape[1]))]
x = np.transpose(
np.concatenate([
np.concatenate([preds[i] for i in pos[:len(pos) // 2]], axis=1),
np.concatenate([preds[i] for i in pos[len(pos) // 2:]], axis=1)], axis=2), (0, 2, 1))
reg_max = (x.shape[1] - num_classes) // 4
dfl = DFL(reg_max) if reg_max > 1 else lambda x: x
img_h, img_w = img_shape[-2], img_shape[-1]
strides = [
int(np.sqrt(img_shape[-2] * img_shape[-1] / preds[p].shape[1])) for p in pos if preds[p].shape[2] != 64]
dims = [(img_h // s, img_w // s) for s in strides]
fake_feats = [np.zeros((1, 1, h, w), dtype=preds[0].dtype) for h, w in dims]
anchors, strides = [x.transpose(0, 1) for x in make_anchors(fake_feats, strides, 0.5)] # generate anchors and strides

strides_tensor = strides.transpose(1, 0)
strides_tensor = np.expand_dims(strides_tensor, 0)

dbox = dist2bbox(dfl.forward(x[:, :-num_classes, :]), anchors[None, ...], xywh=True, dim=1) * strides_tensor

return np.concatenate((dbox, 1 / (1 + np.exp(-x[:, -num_classes:, :]))), axis=1)

0 comments on commit e33a793

Please sign in to comment.