From 1989d1801ff52e5431b5ed3a3162a9070d146ab6 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Mon, 21 Oct 2024 15:36:16 -0400 Subject: [PATCH 01/40] change coco saving to use the annotation --- .../tcn_hpl/predict.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index 4715b3f8f..0b5cfe0e9 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -464,19 +464,30 @@ def collect( raise RuntimeError( "No video set before results collection. See `set_video` method." ) - packet = dict( + # get the global id for the image from the frame number + + # add the image + img = dict( video_id=self._vid, frame_index=frame_index, - activity_pred=activity_pred, - activity_conf=list(activity_conf_vec), ) if name is not None: - packet["name"] = name + img["name"] = name if file_name is not None: - packet["file_name"] = file_name + img["file_name"] = file_name if activity_gt is not None: - packet["activity_gt"] = activity_gt - self._dset.add_image(**packet) + img["activity_gt"] = activity_gt + # save the gid from the image to link to the annot + gid = self._dset.add_image(**img) + + # additional items to save + add_items = dict( + prob=list(activity_conf_vec), + ) + # add the annotation + self._dset.add_annotation( + image_id=gid, category_id=activity_pred, **add_items + ) def write_file(self): """ From ccf803230568e10dbd6947ea7abf03825a8274f1 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 09:05:48 -0400 Subject: [PATCH 02/40] add a debug option to the TCN node in order to see the inputs it has when it decides not to create a classification --- .../activity_classifier_tcn.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 1117fc074..d6adac23f 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -102,6 +102,10 @@ # activity prediction for the "live" image will not occur until object # detections are predicted for that frame. PARAM_WINDOW_LEADS_WITH_OBJECTS = "window_leads_with_objects" +# Debug file saved out to the filesystem for understanding the node's +# inputs when it decides not to create an activity classification. +# the format will be csv with a list of the object detections and the pose +PARAM_DEBUG_FILE = "debug_file" class NoActivityClassification(Exception): @@ -148,6 +152,7 @@ def __init__(self): (PARAM_TOPIC, "medical"), (PARAM_POSE_REPEAT_RATE, 0), (PARAM_WINDOW_LEADS_WITH_OBJECTS, False), + (PARAM_DEBUG_FILE, ""), ], ) self._img_ts_topic = param_values[PARAM_IMG_TS_TOPIC] @@ -166,6 +171,12 @@ def __init__(self): self._window_lead_with_objects = param_values[PARAM_WINDOW_LEADS_WITH_OBJECTS] + self._debug_file = param_values[PARAM_DEBUG_FILE] + # clear the file if it exists (since we are appending to it) + if self._debug_file != "": + with open(self._debug_file, "w") as f: + f.write("") + self.topic = param_values[PARAM_TOPIC] # Load in TCN classification model and weights with SimpleTimer("Loading inference module", log.info): @@ -655,6 +666,12 @@ def rt_loop(self): "not yield an activity classification for " "publishing." ) + if self._debug_file != "": + # save the info for why this window was not processed + repr = window.__repr__() + with open(self._debug_file, "a") as f: + f.write(f"timestamp: {self.get_clock().now().to_msg()}\n") + f.write(f"{repr}\n") # This window has completed processing - record its leading # timestamp now. @@ -888,5 +905,34 @@ def destroy_node(self): main = make_default_main(ActivityClassifierTCN, multithreaded_executor=4) +if __name__ == "__main__": + main() + """ + Save results if we have been initialized to do that. + + This method does nothing if this node has not been initialized to + collect results. + """ + rc = self._results_collector + if rc is not None: + self.get_logger().info( + f"Writing classification results to: {self._output_kwcoco_path}" + ) + self._results_collector.write_file() + + def destroy_node(self): + log = self.get_logger() + log.info("Stopping node runtime") + self.rt_stop() + with SimpleTimer("Shutting down runtime thread...", log.info): + self._rt_active.clear() # make RT active flag "False" + self._rt_thread.join() + self._save_results() + super().destroy_node() + + +main = make_default_main(ActivityClassifierTCN, multithreaded_executor=4) + + if __name__ == "__main__": main() From 24b3080c0871ba86c189c688d1e12167d3766039 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 09:07:20 -0400 Subject: [PATCH 03/40] adjust saving of coco output and add score --- angel_system/activity_classification/tcn_hpl/predict.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index 0b5cfe0e9..94129e01a 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -480,13 +480,12 @@ def collect( # save the gid from the image to link to the annot gid = self._dset.add_image(**img) - # additional items to save - add_items = dict( - prob=list(activity_conf_vec), - ) # add the annotation self._dset.add_annotation( - image_id=gid, category_id=activity_pred, **add_items + image_id=gid, + category_id=activity_pred, + score=activity_conf_vec[activity_pred], + prob=list(activity_conf_vec), ) def write_file(self): From a1e3bef8d3a23225f3f886d82b6814aec771cf28 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 09:08:16 -0400 Subject: [PATCH 04/40] add collection of no activity classification --- .../activity_classification/activity_classifier_tcn.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index d6adac23f..c52f68640 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -660,6 +660,15 @@ def rt_loop(self): self._activity_publisher.publish(act_msg) except NoActivityClassification: + # collect the results if we are saving to coco file + if self._results_collector: + # Prepare output message + activity_msg = ActivityDetection() + # set the only needed items for collection + activity_msg.source_stamp_end_frame = window.frames[-1][0] + activity_msg.conf_vec = [0.0 for x in self._model.classes] + self._collect_results(activity_msg) + # No ramifications, but don't publish activity message. log.warn( "Runtime loop window processing function did " From 6ef0d7122d29f062ed6f224c1cfa883007f836e9 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 09:09:22 -0400 Subject: [PATCH 05/40] add a note about usage to the video/image to bag conversion script --- ros/angel_utils/scripts/convert_video_to_ros_bag.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ros/angel_utils/scripts/convert_video_to_ros_bag.py b/ros/angel_utils/scripts/convert_video_to_ros_bag.py index 90b11d16f..af2a4151d 100755 --- a/ros/angel_utils/scripts/convert_video_to_ros_bag.py +++ b/ros/angel_utils/scripts/convert_video_to_ros_bag.py @@ -1,4 +1,12 @@ #!/usr/bin/env python3 +""" +Convert a video (mp4) or a series of images into a ROS bag. + +Example running (inside ROS environment): +ros2 run angel_utils convert_video_to_ros_bag.py \ + --video-fn video.mp4 \ + --output-bag-folder ros_bags/new_bag +""" import argparse from glob import glob from pathlib import Path From f242ea2bcf8d6c4c2d811b60ab45c60379e48930 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 09:45:27 -0400 Subject: [PATCH 06/40] black formatting a clean copy error --- .../activity_classifier_tcn.py | 33 ++----------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index c52f68640..afe5268c0 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -679,7 +679,9 @@ def rt_loop(self): # save the info for why this window was not processed repr = window.__repr__() with open(self._debug_file, "a") as f: - f.write(f"timestamp: {self.get_clock().now().to_msg()}\n") + f.write( + f"timestamp: {self.get_clock().now().to_msg()}\n" + ) f.write(f"{repr}\n") # This window has completed processing - record its leading @@ -914,34 +916,5 @@ def destroy_node(self): main = make_default_main(ActivityClassifierTCN, multithreaded_executor=4) -if __name__ == "__main__": - main() - """ - Save results if we have been initialized to do that. - - This method does nothing if this node has not been initialized to - collect results. - """ - rc = self._results_collector - if rc is not None: - self.get_logger().info( - f"Writing classification results to: {self._output_kwcoco_path}" - ) - self._results_collector.write_file() - - def destroy_node(self): - log = self.get_logger() - log.info("Stopping node runtime") - self.rt_stop() - with SimpleTimer("Shutting down runtime thread...", log.info): - self._rt_active.clear() # make RT active flag "False" - self._rt_thread.join() - self._save_results() - super().destroy_node() - - -main = make_default_main(ActivityClassifierTCN, multithreaded_executor=4) - - if __name__ == "__main__": main() From 54e81041c45dc625044787b201b3e7f4b1507d73 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 11:18:23 -0400 Subject: [PATCH 07/40] check the frame number first before ignoring a frame (for playing back bags) --- .../python/angel_utils/activity_classification.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ros/angel_utils/python/angel_utils/activity_classification.py b/ros/angel_utils/python/angel_utils/activity_classification.py index 9b5ea102b..e582bb5b3 100644 --- a/ros/angel_utils/python/angel_utils/activity_classification.py +++ b/ros/angel_utils/python/angel_utils/activity_classification.py @@ -192,13 +192,17 @@ def queue_image( # self.get_logger_fn().info(f"self.frames[-1][0] header stamp: {self.frames[-1][0]}") with self.__state_lock: # before the current lead frame? - if self.frames and time_to_int(img_header_stamp) <= time_to_int( - self.frames[-1][0] + if ( + self.frames + and self.frames[-1][2] == image_frame_number + and time_to_int(img_header_stamp) <= time_to_int(self.frames[-1][0]) ): self.get_logger_fn().warn( f"Input image frame was NOT after the previous latest: " f"(prev) {time_to_int(self.frames[-1][0])} " - f"!< {time_to_int(img_header_stamp)} (new)" + f"!< {time_to_int(img_header_stamp)} (new)\n" + f"frame number: {image_frame_number}\n" + f"prev frame number: {self.frames[-1][2]}" ) return False self.frames.append((img_header_stamp, img_mat, image_frame_number)) From 3650376d43c357ac2707405b33672b0d0c38ebcb Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 12:39:42 -0400 Subject: [PATCH 08/40] drop unneeded argument --- angel_system/activity_classification/tcn_hpl/predict.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index 94129e01a..1d8e22d4d 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -454,7 +454,6 @@ def collect( activity_conf_vec: Sequence[float], name: Optional[str] = None, file_name: Optional[str] = None, - activity_gt: Optional[int] = None, ) -> None: """ See `CocoDataset.add_image` for more details. @@ -475,8 +474,6 @@ def collect( img["name"] = name if file_name is not None: img["file_name"] = file_name - if activity_gt is not None: - img["activity_gt"] = activity_gt # save the gid from the image to link to the annot gid = self._dset.add_image(**img) From afec82da16027f4e498b224412846b534639e363 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 13:39:09 -0400 Subject: [PATCH 09/40] change coco saving to add an image regardless of whether the activity classification was created --- .../tcn_hpl/predict.py | 54 +++++++++++++++-- .../activity_classifier_tcn.py | 59 +++++++++++++++---- 2 files changed, 94 insertions(+), 19 deletions(-) diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index 1d8e22d4d..b37e87362 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -447,24 +447,45 @@ def set_video(self, video_name: str) -> None: else: self._vid = self._dset.add_video(name=video_name) - def collect( + def check_for_existing_image(self, name, file_name) -> bool: + """ + Check if an image already exists in the dataset. + """ + already_exists = None + if name is not None: + try: + already_exists = self._dset.images().lookup(name) + except KeyError: + pass + if file_name is not None: + try: + already_exists = self._dset.images().lookup(file_name) + except KeyError: + pass + if already_exists: + return True + return False + + def add_image( self, frame_index: int, - activity_pred: int, - activity_conf_vec: Sequence[float], name: Optional[str] = None, file_name: Optional[str] = None, - ) -> None: + ) -> int: """ - See `CocoDataset.add_image` for more details. + Add an image to the dataset. Returns the global image id. + If the image was already added (by name or file name), returns -1. """ with self._lock: if self._vid is None: raise RuntimeError( "No video set before results collection. See `set_video` method." ) - # get the global id for the image from the frame number + # confirm we haven't already added this image + if self.check_for_existing_image(name, file_name): + return -1 + # get the global id for the image from the frame number # add the image img = dict( video_id=self._vid, @@ -477,6 +498,27 @@ def collect( # save the gid from the image to link to the annot gid = self._dset.add_image(**img) + return gid + + def collect( + self, + gid: int, + activity_pred: int, + activity_conf_vec: Sequence[float], + ) -> None: + """ + See `CocoDataset.add_image` for more details. + + :param gid: Global image id. + :param activity_pred: Predicted activity class index. + :param activity_conf_vec: Confidence vector for all activity classes. + """ + with self._lock: + if self._vid is None: + raise RuntimeError( + "No video set before results collection. See `set_video` method." + ) + # add the annotation self._dset.add_annotation( image_id=gid, diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index afe5268c0..6a9468f29 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -543,6 +543,21 @@ def _rt_keep_looping(self) -> bool: # TODO: add has-finished-processing-file-input check. return rt_active + def _save_image_to_coco(self, window: InputWindow) -> int: + """ + This will add an image to the output coco file + if you are not saving to a coco file, this will return -1 + """ + if self._results_collector: + # Prepare output message + activity_msg = ActivityDetection() + # set the only needed items for collection + activity_msg.source_stamp_end_frame = window.frames[-1][0] + activity_msg.conf_vec = [0.0 for x in self._model.classes] + gid = self._collect_image(activity_msg) + return gid + return -1 + def _window_criterion_correct_size(self, window: InputBuffer) -> bool: window_ok = len(window) == self._window_size if not window_ok: @@ -550,6 +565,8 @@ def _window_criterion_correct_size(self, window: InputBuffer) -> bool: f"Window is not the appropriate size " f"(actual:{len(window)} != {self._window_size}:expected)" ) + self._save_image_to_coco(window) + return window_ok def _window_criterion_new_leading_frame(self, window: InputWindow) -> bool: @@ -643,6 +660,7 @@ def rt_loop(self): # out older data at and before the first item in the window. self._buffer.clear_before(time_to_int(window.frames[1][0])) + image_gid = None # set this to None to signal if we saved the image or not try: if enable_time_trace_logging: log.info( @@ -653,22 +671,17 @@ def rt_loop(self): act_msg = self._process_window(window) # log.info(f"activity message: {act_msg}") - self._collect_results(act_msg) + image_gid = self._collect_image(act_msg) + self._collect_results(act_msg, image_gid) # set the header right before publishing so that the time is after processing act_msg.header.frame_id = "Activity Classification" act_msg.header.stamp = self.get_clock().now().to_msg() self._activity_publisher.publish(act_msg) except NoActivityClassification: - # collect the results if we are saving to coco file - if self._results_collector: - # Prepare output message - activity_msg = ActivityDetection() - # set the only needed items for collection - activity_msg.source_stamp_end_frame = window.frames[-1][0] - activity_msg.conf_vec = [0.0 for x in self._model.classes] - self._collect_results(activity_msg) - + # collect the image if we are saving to coco file + if self._results_collector and image_gid is None: + self._save_image_to_coco(window) # No ramifications, but don't publish activity message. log.warn( "Runtime loop window processing function did " @@ -864,7 +877,7 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: return activity_msg - def _collect_results(self, msg: ActivityDetection): + def _collect_image(self, msg: ActivityDetection) -> int: """ Collect into our ResultsCollector instance from the produced activity classification message if we were initialized to do that. @@ -880,10 +893,30 @@ def _collect_results(self, msg: ActivityDetection): # When reading from an input COCO file, this aligns with the input # `image` `frame_index` attributes. frame_index = time_to_int(msg.source_stamp_end_frame) - pred_cls_idx = int(np.argmax(msg.conf_vec)) - rc.collect( + gid = rc.add_image( frame_index=frame_index, name=f"ros-frame-nsec-{frame_index}", + ) + return gid + return -1 + + def _collect_results(self, msg: ActivityDetection, gid: int) -> None: + """ + Collect into our ResultsCollector instance from the produced activity + classification message if we were initialized to do that. + + This method does nothing if this node has not been initialized to + collect results. + + :param msg: ROS2 activity classification message that would be output. + :param gid: Global ID of the image associated with the activity + """ + rc = self._results_collector + if rc is not None: + # use the gid that was created when the image was added + pred_cls_idx = int(np.argmax(msg.conf_vec)) + rc.collect( + gid=gid, activity_pred=pred_cls_idx, activity_conf_vec=list(msg.conf_vec), ) From cc257d84567dce7ee31880372adaa97767797db2 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 16:00:56 -0400 Subject: [PATCH 10/40] fix for beginning frame issue --- .../activity_classification/activity_classifier_tcn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 6a9468f29..4f953c4ed 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -552,7 +552,10 @@ def _save_image_to_coco(self, window: InputWindow) -> int: # Prepare output message activity_msg = ActivityDetection() # set the only needed items for collection - activity_msg.source_stamp_end_frame = window.frames[-1][0] + if len(window.frames) > 0: + activity_msg.source_stamp_end_frame = window.frames[-1][0] + else: + self.get_logger().warn(f"window.frames: {window.frames}") activity_msg.conf_vec = [0.0 for x in self._model.classes] gid = self._collect_image(activity_msg) return gid From aec3eb1d9ac70f35625644de3a4383548adb72b1 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 16:02:00 -0400 Subject: [PATCH 11/40] simplify handling images that were already added --- .../tcn_hpl/predict.py | 27 +++---------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index b37e87362..97a2bff14 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -447,25 +447,6 @@ def set_video(self, video_name: str) -> None: else: self._vid = self._dset.add_video(name=video_name) - def check_for_existing_image(self, name, file_name) -> bool: - """ - Check if an image already exists in the dataset. - """ - already_exists = None - if name is not None: - try: - already_exists = self._dset.images().lookup(name) - except KeyError: - pass - if file_name is not None: - try: - already_exists = self._dset.images().lookup(file_name) - except KeyError: - pass - if already_exists: - return True - return False - def add_image( self, frame_index: int, @@ -481,9 +462,6 @@ def add_image( raise RuntimeError( "No video set before results collection. See `set_video` method." ) - # confirm we haven't already added this image - if self.check_for_existing_image(name, file_name): - return -1 # get the global id for the image from the frame number # add the image @@ -496,7 +474,10 @@ def add_image( if file_name is not None: img["file_name"] = file_name # save the gid from the image to link to the annot - gid = self._dset.add_image(**img) + try: + gid = self._dset.add_image(**img) + except Exception: + return -1 # image already exists return gid From 473637673b73010c30e382dd244839b4b5ba77ca Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Mon, 28 Oct 2024 12:16:43 -0400 Subject: [PATCH 12/40] cleanup debug input so it is easier to read in normal viewers --- .../activity_classification/activity_classifier_tcn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 4f953c4ed..0c462edde 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -12,6 +12,7 @@ from typing import Dict from typing import List from typing import Optional +import re import kwcoco from builtin_interfaces.msg import Time @@ -694,10 +695,10 @@ def rt_loop(self): if self._debug_file != "": # save the info for why this window was not processed repr = window.__repr__() + # clean this output for easier viewing (CSV) + repr = "index" + repr # add a column for the index + repr = re.sub(" +", ",", repr) # replace spaces with commas with open(self._debug_file, "a") as f: - f.write( - f"timestamp: {self.get_clock().now().to_msg()}\n" - ) f.write(f"{repr}\n") # This window has completed processing - record its leading From 81dc6126c327bb9104476c0ea1dc66767953fb9f Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Tue, 29 Oct 2024 07:32:39 -0400 Subject: [PATCH 13/40] change the way that timestamps are saved to bags --- .../scripts/convert_video_to_ros_bag.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/ros/angel_utils/scripts/convert_video_to_ros_bag.py b/ros/angel_utils/scripts/convert_video_to_ros_bag.py index af2a4151d..7a1622f38 100755 --- a/ros/angel_utils/scripts/convert_video_to_ros_bag.py +++ b/ros/angel_utils/scripts/convert_video_to_ros_bag.py @@ -112,7 +112,6 @@ def convert_video_to_bag( # Starting at this so our first increment starts us at frame ID 0. frame_id = -1 - start_ts = rclpy.time.Time(nanoseconds=time.time_ns()) for frame, frame_rel_ts in frame_iter: frame_id += 1 # Only proceed if we don't have a down-sample rate specified or if the @@ -120,15 +119,18 @@ def convert_video_to_bag( if downsample_rate is not None and frame_id % downsample_rate != 0: continue print(f"==== FRAME {frame_id} ====") - # Create timestamp - - frame_ts = start_ts + rclpy.duration.Duration(seconds=frame_rel_ts) - frame_ts_msg = frame_ts.to_msg() - print("timestamp", frame_ts) # Create image message image_msg = bridge.cv2_to_imgmsg(frame, encoding="bgr8") - image_msg.header.stamp = frame_ts_msg + #image_msg.header.stamp = frame_ts_msg + # split the frame timestamp into sec and nsec + msec = frame_rel_ts + nsec = int((msec - int(msec)) * 1_000_000_000) + msec = int(msec) + image_msg.header.stamp.sec = msec + image_msg.header.stamp.nanosec = nsec + print(f"timestamp: {image_msg.header.stamp}") + image_msg.header.frame_id = "PVFramesBGR" # Write to bag @@ -136,7 +138,7 @@ def convert_video_to_bag( bag_writer.write( output_image_topic, serialize_message(image_msg), - frame_ts.nanoseconds, + image_msg.header.stamp.nanosec, ) except Exception as err: # Truncating the error message because it printed out the whole image_msg input From 36d97d96b2004fe8030dc18ede4b55359e54e92a Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Tue, 29 Oct 2024 07:38:59 -0400 Subject: [PATCH 14/40] remove the need to check frame number now that converting videos to bags gives correct timestamps --- .../activity_classification/activity_classifier_tcn.py | 4 +++- .../python/angel_utils/activity_classification.py | 5 +---- ros/angel_utils/scripts/convert_video_to_ros_bag.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 0c462edde..557e40947 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -664,7 +664,9 @@ def rt_loop(self): # out older data at and before the first item in the window. self._buffer.clear_before(time_to_int(window.frames[1][0])) - image_gid = None # set this to None to signal if we saved the image or not + image_gid = ( + None # set this to None to signal if we saved the image or not + ) try: if enable_time_trace_logging: log.info( diff --git a/ros/angel_utils/python/angel_utils/activity_classification.py b/ros/angel_utils/python/angel_utils/activity_classification.py index e582bb5b3..aeeedf369 100644 --- a/ros/angel_utils/python/angel_utils/activity_classification.py +++ b/ros/angel_utils/python/angel_utils/activity_classification.py @@ -194,15 +194,12 @@ def queue_image( # before the current lead frame? if ( self.frames - and self.frames[-1][2] == image_frame_number and time_to_int(img_header_stamp) <= time_to_int(self.frames[-1][0]) ): self.get_logger_fn().warn( f"Input image frame was NOT after the previous latest: " f"(prev) {time_to_int(self.frames[-1][0])} " - f"!< {time_to_int(img_header_stamp)} (new)\n" - f"frame number: {image_frame_number}\n" - f"prev frame number: {self.frames[-1][2]}" + f"!< {time_to_int(img_header_stamp)} (new)" ) return False self.frames.append((img_header_stamp, img_mat, image_frame_number)) diff --git a/ros/angel_utils/scripts/convert_video_to_ros_bag.py b/ros/angel_utils/scripts/convert_video_to_ros_bag.py index 7a1622f38..fe70a19c7 100755 --- a/ros/angel_utils/scripts/convert_video_to_ros_bag.py +++ b/ros/angel_utils/scripts/convert_video_to_ros_bag.py @@ -122,7 +122,7 @@ def convert_video_to_bag( # Create image message image_msg = bridge.cv2_to_imgmsg(frame, encoding="bgr8") - #image_msg.header.stamp = frame_ts_msg + # image_msg.header.stamp = frame_ts_msg # split the frame timestamp into sec and nsec msec = frame_rel_ts nsec = int((msec - int(msec)) * 1_000_000_000) From c40adfcfaa3a55becd67fcd34701dc0f2a43f9aa Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Tue, 29 Oct 2024 07:39:59 -0400 Subject: [PATCH 15/40] black formatting --- .../python/angel_utils/activity_classification.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ros/angel_utils/python/angel_utils/activity_classification.py b/ros/angel_utils/python/angel_utils/activity_classification.py index aeeedf369..9b5ea102b 100644 --- a/ros/angel_utils/python/angel_utils/activity_classification.py +++ b/ros/angel_utils/python/angel_utils/activity_classification.py @@ -192,9 +192,8 @@ def queue_image( # self.get_logger_fn().info(f"self.frames[-1][0] header stamp: {self.frames[-1][0]}") with self.__state_lock: # before the current lead frame? - if ( - self.frames - and time_to_int(img_header_stamp) <= time_to_int(self.frames[-1][0]) + if self.frames and time_to_int(img_header_stamp) <= time_to_int( + self.frames[-1][0] ): self.get_logger_fn().warn( f"Input image frame was NOT after the previous latest: " From 6105312253547ce148ad40e47aa2e4daf0aeed45 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Mon, 21 Oct 2024 13:35:41 -0400 Subject: [PATCH 16/40] Incremental updates to train README for data and precursor data generation --- TRAIN_AND_RUN_README.md | 177 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 161 insertions(+), 16 deletions(-) diff --git a/TRAIN_AND_RUN_README.md b/TRAIN_AND_RUN_README.md index e6d3df5c7..5443f9274 100644 --- a/TRAIN_AND_RUN_README.md +++ b/TRAIN_AND_RUN_README.md @@ -15,8 +15,6 @@ Follow the following steps (the optional steps are for active development purpos ##### Get required repositories: ``` -(optional) git clone git@github.com:PTG-Kitware/TCN_HPL.git -(optional) git clone git@github.com:PTG-Kitware/yolov7.git git clone git@github.com:PTG-Kitware/angel_system.git cd angel_system git submodule update --init --recursive @@ -26,36 +24,105 @@ git submodule update --init --recursive ``` conda create --name angel_systen python=3.8.10 conda activate angel_test_env -poetry lock --no-update poetry install ``` -##### - ## Docker Installation Follow the following steps (the optional steps are for active development purposes): -##### Get required repositories: +### Get required repositories: ``` -(optional) git clone git@github.com:PTG-Kitware/TCN_HPL.git -(optional) git clone git@github.com:PTG-Kitware/yolov7.git git clone git@github.com:PTG-Kitware/angel_system.git cd angel_system git submodule update --init --recursive ``` -##### Create the environment +### Create the environment ``` ./angel-docker-build.sh -f ./angel-workspace-shell.sh ``` -##### Inside the docker container: +### Inside the docker container: ``` ./workspace_build.sh; source install/setup.sh ``` ## Data -- On gyges, raw data is located at `/data/PTG/medical/bbn_data/Release_v0.5/v0.56/` + +### Object Detection Source Data +TODO + +### Pose Detection Source Data +TODO + +### TCN Source Data + +#### BBN Medical Datasets +Source data from BBN can be acquired from https://bbn.com/private/ptg-magic/. +Consult a team member for login information. + +Each task ("skill") has their own sub-page ("In-Lab Data" link) from which sets +of data are described and referred to. +Data is stored on their SFTP server, to which the "Click to Download" links +refer to. + +Storage of downloaded ZIP archives, and their subsequent extractions, should +follow the pattern. +A script is provided +``` +bbn_data/ +├── README.md # Indicate where we have acquired this BBN data. +└── lab_data-golden/ + ├── m2_tourniquet/ + │ ├── Fri-Apr-21/ + │ │ ├── 20230420_122603_HoloLens.mp4 + │ │ ├── 20230420_122603_HoloLens.skill_labels_by_frame.txt + │ │ ├── 20230420_123212_HoloLens.mp4 + │ │ ├── 20230420_123212_HoloLens.skill_labels_by_frame.txt + │ │ ├── 20230420_124541_HoloLens.mp4 + │ │ ├── 20230420_124541_HoloLens.skill_labels_by_frame.txt + │ │ ├── 20230420_125033_HoloLens.mp4 + │ │ ├── 20230420_125033_HoloLens.skill_labels_by_frame.txt + │ │ ├── 20230420_125517_HoloLens.mp4 + │ │ └── 20230420_125517_HoloLens.skill_labels_by_frame.txt + │ ├── Fri-Apr-21.zip + │ ├── Mon-Apr-17/ + │ │ ... + │ ├── Mon-Apr-17.zip + │ ├── Mon-Apr-24/ + │ │ ... + │ └── Mon-Apr-24.zip + ├── m3_pressure_dressing/ + │ ... + └── r18_chest_seal/ + ... +``` +Golden data should be marked as read-only after downloading and extracting to +prevent accidental modification of the files: +``` +chmod a-w -R bbn_data/lab_data-golden/ +``` + +##### Extracting frames +BBN archives provide MP4 videos, however we will need individual image frames +for the following steps. +The script to convert BBN Truth data into a COCO format will also, by necessity +for down-stream processes, extract frames and dump them into a symmetric layout +in another writable location: +```bash +python-tpl/TCN_HPL/tcn_hpl/data/utils/bbn.py ... +# OR use the console-script entrypoint installed with the package +bbn_create_truth_coco \ + ./bbn_data/lab_data-golden \ + ./bbn_data/lab_data-working + ../../config/activity_labels/medical/m2.yaml \ + activity-truth-COCO.json +``` + + +### Storage on Gyges +- On gyges, raw data is located at + - `/data/PTG/medical/bbn_data/Release_v0.5/v0.56/` - pre-trained models are available on `https://data.kitware.com/#collection/62cc5eb8bddec9d0c4fa9ee1/folder/6605bc558b763ca20ae99f55` - In this pipeline we are only provided with object detection ground truth training data, which is located `/data/PTG/medical/object_anns/` - For real-time execution, we store our models in /angel_system/model_files @@ -73,11 +140,90 @@ git submodule update --init --recursive ## Training Procedure We take the following steps: + 1. train object detection model -2. predict objects in the scene -3. predict poses and patient bounding boxes in the scene -4. generate interaction feature vectors for the TCN -5. train the TCN +2. Generate activity classification truth COCO file. +3. predict objects in the scene +4. predict poses and patient bounding boxes in the scene +5. generate interaction feature vectors for the TCN +6. train the TCN + +### Example with M2 + +#### Train Object Detection Model +First we train the detection model on annotated data. +This would be the same data source for both the lab and professional data. +``` +python3 python-tpl/yolov7/yolov7/train.py \ + --workers 8 --device 0 --batch-size 4 \ + --data configs/data/PTG/medical/m2_task_objects.yaml \ + --img 768 768 \ + --cfg configs/model/training/PTG/medical/yolov7_m2.yaml \ + --weights weights/yolov7.pt \ + --project /data/PTG/medical/training/yolo_object_detector/train/ \ + --name m2_all_v1_example +``` + +#### Generate activity classification truth COCO file +Generate the truth MS-COCO file for per-frame activity truth annotations. +This example presumes we are using BBN Medical data as our source (as of +2024/10/15). +``` +python-tpl/TCN_HPL/tcn_hpl/data/utils/bbn.py \ + ~/data/darpa-ptg/bbn_data/lab_data-golden/m2_tourniquet \ + ~/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet \ + ~/dev/darpa-ptg/angel_system/config/activity_labels/medical/m2.yaml \ + ~/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet-activity_truth.coco.json +``` + +Train, validation, and testing splits can be split from COCO files. +The `kwcoco split` tool may be utilized to create splits at the video level, +otherwise splits may be created manually. + +For example: +``` +kwcoco split \ + --src /home/local/KHQ/paul.tunison/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet/positive/3_tourns_122023/activity_truth.coco.json \ + --dst1 TRAIN-activity_truth.coco.json \ + --dst2 REMAINDER-activity-truth.coco.json \ + --splitter video \ + --factor 2 +kwcoco split \ + --src REMAINDER-activity-truth.coco.json \ + --dst1 VALIDATION-activity-truth.coco.json \ + --dst2 TEST-activity-truth.coco.json \ + --splitter video \ + --factor 2 +``` + +#### Generate Object Predictions in the Scene +Note that the input COCO file is that which was generated in the previous step. +This is to ensure that all represented videos and image frames are predicted on +and present in both COCO files. +``` +python-tpl/yolov7/yolov7/detect_ptg.py \ + -i ~/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet/activity_truth.coco.json \ + -o test_det_output.coco.json + --model-hands ./model_files/object_detector/hands_model.pt \ + --model-objects ./model_files/object_detector/m2_det.pt \ + --model-device 0 \ + --img-size 768 \ +``` +Additional debug outputs may optionally be generated. +See the `-h`/`--help` options for more details. + +#### Generate Pose Predictions +Note that the input COCO file is that which was generated in the +`Generate activity classification truth COCO file` section. +``` +python-tpl/TCN_HPL/tcn_hpl/data/utils/pose_generation/generate_pose_data.py \\ + -i ~/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet/activity_truth.coco.json \\ + -o ./test_pose_output.coco.json \\ + --det-config ./python-tpl/TCN_HPL/tcn_hpl/data/utils/pose_generation/configs/medic_pose.yaml \\ + --det-weights ./model_files/pose_estimation/pose_det_model.pth \\ + --pose-config python-tpl/TCN_HPL/tcn_hpl/data/utils/pose_generation/configs/ViTPose_base_medic_casualty_256x192.py \\ + --pose-weights ./model_files/pose_estimation/pose_model.pth +``` ##### Example with R18 @@ -169,4 +315,3 @@ git submodule update --init --recursive ./workspace_build.sh; source install/setup.sh tmuxinator start demos/medical/BBN-integrate-Kitware-R18 ``` - From 412b8b78bd1525e2261fd5eb9db802c8f69a54e0 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Mon, 21 Oct 2024 13:43:31 -0400 Subject: [PATCH 17/40] Move TCN vectorization into the TCN package --- .../tcn_hpl/predict.py | 3 +- .../train_activity_classifier.py | 3 +- angel_system/activity_classification/utils.py | 1083 ----------------- python-tpl/TCN_HPL | 2 +- .../detections_to_activities/test_utils.py | 2 +- 5 files changed, 4 insertions(+), 1089 deletions(-) delete mode 100644 angel_system/activity_classification/utils.py diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index 97a2bff14..5024399d2 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -17,7 +17,7 @@ from tcn_hpl.data.components.augmentations import NormalizePixelPts, NormalizeFromCenter from tcn_hpl.models.ptg_module import PTGLitModule -from angel_system.activity_classification.utils import ( +from tcn_hpl.data.vectorize_classic import ( tlbr_to_xywh, obj_det2d_set_to_feature, ) @@ -545,7 +545,6 @@ def debug_from_array_file() -> None: import numpy as np import torch from tqdm import tqdm - from tcn_hpl.data.components.augmentations import NormalizePixelPts from angel_system.tcn_hpl.predict import ( load_module, predict, diff --git a/angel_system/activity_classification/train_activity_classifier.py b/angel_system/activity_classification/train_activity_classifier.py index d6b7579b4..051102357 100644 --- a/angel_system/activity_classification/train_activity_classifier.py +++ b/angel_system/activity_classification/train_activity_classifier.py @@ -3,7 +3,6 @@ import pickle import kwcoco import argparse -import json import numpy as np import matplotlib.pyplot as plt @@ -16,7 +15,7 @@ from angel_system.data.common.load_data import sanitize_str -from angel_system.activity_classification.utils import ( +from tcn_hpl.data.vectorize_classic import ( obj_det2d_set_to_feature, ) diff --git a/angel_system/activity_classification/utils.py b/angel_system/activity_classification/utils.py deleted file mode 100644 index eb845ba48..000000000 --- a/angel_system/activity_classification/utils.py +++ /dev/null @@ -1,1083 +0,0 @@ -import os - -from typing import Dict, Tuple, List - -import kwimage -import random - -import numpy as np -import numpy.typing as npt -import matplotlib.pyplot as plt -import matplotlib.colors as mcolors - -from PIL import Image -from pathlib import Path - - -######################### -# Default values -######################### -default_dist = (0, 0) # (1280 * 2, 720 * 2) -default_center_dist = (0, 0) # (1280, 720) -default_bbox = [0, 0, 0, 0] # [0, 0, 1280, 720] -default_center = ([[0]], [[0]]) # kwimage.Boxes([default_bbox], "xywh").center -default_center_list = [default_center[0][0][0], default_center[1][0][0]] -zero_joint_offset = [0 for i in range(22)] - -random_colors = list(mcolors.CSS4_COLORS.keys()) -random.shuffle(random_colors) - - -def tlbr_to_xywh( - top: npt.ArrayLike, - left: npt.ArrayLike, - bottom: npt.ArrayLike, - right: npt.ArrayLike, -) -> Tuple[npt.ArrayLike, npt.ArrayLike, npt.ArrayLike, npt.ArrayLike]: - """ - Convert array-likes of vectorized TLBR (top-left-bottom-right) box - coordinates into XYWH (x, y, width, height) format (similarly vectorized) - - :param top: Array-like of top box coordinate values. - :param left: Array-like of left box coordinate values. - :param bottom: Array-like of bottom box coordinate values. - :param right: Array-like of right box coordinate values. - - :return: - List of x values, List of y values, List of width values, List of height values - """ - assert ( - len(top) == len(left) == len(bottom) == len(right) - ), "No all input array-likes were the same length." - xs = np.asarray(left) - ys = np.asarray(top) - ws = np.asarray(right) - xs - hs = np.asarray(bottom) - ys - return xs, ys, ws, hs - - -def feature_version_to_options(feature_version: int) -> Dict[str, bool]: - """Convert the feature version number to a dict of - boolean flags indicating which data values should be added to the feature vector - - :param feature_version: Version of the feature conversion approach. - - :return: - Dictionary of flag names and boolean values that match the input parameters - to the functions that create/utilize the feature vector - """ - options = {} - - """ - Feature vector that encodes the activation feature of each class - - Len: top_k_objects * num_obj_classes - - [ - for k_obj in top_k_object: - A[obj1] ... A[objN] - ] - """ - options[1] = {"use_activation": True} - - """ - Feature vector that encodes the distance of each object from each hand, - and the activation features - - Len: - top_k_objects * ( - 1 + (num_obj_classes-2)*2 + 1 + (num_obj_classes-2)*2 + 2 + (num_obj_classes-2) - ) - - [ - for k_obj in top_k_object: - A[right hand], - D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y, - A[left hand], - D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y, - D[right hand, left hand]x, D[right hand, left hand]y, - A[obj1_k] ... A[objN_k] - ] - """ - options[2] = { - "use_activation": True, - "use_hand_dist": True, - } - - """ - Feature vector that encodes the distance of each object to the center of the frame, - the intersection of each object to the hands, - and the activation features - - Len: - top_k_objects * ( - 1 + 2 + 1 + 2 + 1 + (1 + 1 + 1 + 2) * (num_obj_classes-2) - ) - - [ - for k_obj in top_k_object: - A[right hand], - D[right hand, center]x, D[right hand, center]y, - A[left hand], - D[left hand, center]x, D[left hand, center]y, - I[right hand, left hand], - A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], D[obj1_k, center]x, D[obj1_k, center]y ... , D[objN_k, center]y - ] - """ - options[3] = { - "use_activation": True, - "use_center_dist": True, - "use_intersection": True, - } - - """ - Feature vector that encodes the distance of each object from each hand, - the intersection of each object to the hands, - and the activation features - - Len: - top_k_objects * ( - 1 + 2 * (num_obj_classes-2) + 1 + 2 * (num_obj_classes-2) + 2 + 1 + (1 + 1 + 1) * (num_obj_classes-2) - ) - - [ - for k_obj in top_k_object: - A[right hand], - D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y, - A[left hand], - D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y, - D[right hand, left hand]x, D[right hand, left hand]y, - I[right hand, left hand], - A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], ... , I[left hand, objN_k] - ] - """ - options[5] = { - "use_activation": True, - "use_hand_dist": True, - "use_intersection": True, - } - - """ - Feature vector that encodes the distance of each object from each hand, - the intersection of each object to the hands, - the distance from the center of the hands to each patient joint, - and the distance from the center of each object to each patient joint, - and the activation features - - Len: - top_k_objects * ( - (1 + (num_obj_classes-2)*2) * 2 + 2 + 1 - + (num_obj_classes-2) * (1+1+1) - ) - + 22*2 + 22*2 - + top_k_objects * ((22*2)*(num_obj_classes-2)) - - - [ - for k_obj in top_k_object: - A[right hand], - D[right hand, obj1_k]x, D[right hand, obj1_k]y, ... , D[right hand, objN_k]y, - A[left hand], - D[left hand, obj1_k]x, D[left hand, obj1_k]y, ... , D[left hand, objN_k]y, - D[right hand, left hand]x, D[right hand, left hand]y, - I[right hand, left hand], - A[obj1_k] I[right hand, obj1_k] I[left hand, obj1_k], ... , I[left hand, objN_k], - D[left hand, joint1]x, ... , D[left hand, joint 22]y, - D[right hand, joint1]x, ... , D[right hand, joint 22]y, - for k_obj in top_k_object: - D[obj1_k, joint1]x, ... , D[obj1_k, joint22]y, - ..., - D[objN_k, joint1]x, ... , D[objN_k, joint22]y - ] - """ - options[6] = { - "use_activation": True, - "use_hand_dist": True, - "use_intersection": True, - "use_joint_hand_offset": True, - "use_joint_object_offset": True, - } - - return options[feature_version] - - -def obj_det2d_set_to_feature( - label_vec: List[str], - xs: List[float], - ys: List[float], - ws: List[float], - hs: List[float], - label_confidences: List[float], - pose_keypoints: List[Dict], - obj_label_to_ind: Dict[str, int], - version: int = 1, - top_k_objects: int = 1, -): - """Convert ObjectDetection2dSet fields into a feature vector. - - :param label_vec: List of object labels for each detection (length: # detections) - :param xs: List of x values for each detection (length: # detections) - :param ys: List of y values for each detection (length: # detections) - :param ws: List of width values for each detection (length: # detections) - :param hs: List of height values for each detection (length: # detections) - :param label_confidences: List of confidence values for each detection (length: # detections) - :param pose_keypoints: - List of joints, represented by a dictionary contining the x and y corrdinates of the points and the category id and string - :param obj_label_to_ind: - Dictionary mapping a label str and returns the index within the feature vector. - :param version: - Version of the feature conversion approach. - :param top_k_objects: Number top confidence objects to use per label, defaults to 1 - - :return: resulting feature data - """ - opts = feature_version_to_options(version) - feature_vec = obj_det2d_set_to_feature_by_method( - label_vec, - xs, - ys, - ws, - hs, - label_confidences, - pose_keypoints, - obj_label_to_ind, - top_k_objects=top_k_objects, - **opts, - ) - - # print(f"feat {feature_vec}") - # print(len(feature_vec)) - return feature_vec - - -def plot_feature_vec( - image_fn: str, - right_hand_center: list, - left_hand_center: list, - feature_vec: np.array, - obj_label_to_ind: Dict[str, int], - output_dir: str, - top_k_objects: int = 1, - use_activation: bool = False, - use_hand_dist: bool = False, - use_center_dist: bool = False, - use_intersection: bool = False, - use_joint_hand_offset: bool = False, - use_joint_object_offset: bool = False, - joint_names: List[str] = [ - "nose", - "mouth", - "throat", - "chest", - "stomach", - "left_upper_arm", - "right_upper_arm", - "left_lower_arm", - "right_lower_arm", - "left_wrist", - "right_wrist", - "left_hand", - "right_hand", - "left_upper_leg", - "right_upper_leg", - "left_knee", - "right_knee", - "left_lower_leg", - "right_lower_leg", - "left_foot", - "right_foot", - "back", - ], - colors: List[str] = [ - "yellow", - "red", - "green", - "lightblue", - "blue", - "purple", - "orange", - ], -): - """Plot the object and joint points based on the hand bbox centers and the distance values - in the feature vector - - :param image_fn: Path to the image to draw on - :param right_hand_center: List of the x and y coordinates of the right hand box center - :param left_hand_center: List of the x and y coordinates of the left hand box center - :param feature_vec: Numpy array of values determined by the provided flags - :param obj_label_to_ind: - Dictionary mapping a label str and returns the index within the feature vector. - :param output_dir: Path to a folder to save the generated images to - :param top_k_objects: Number top confidence objects to use per label, defaults to 1 - :param use_activation: If True, add the confidence values of the detections to the feature vector, defaults to False - :param use_hand_dist: If True, add the distance of the detection centers to both hand centers to the feature vector, defaults to False - :param use_intersection: If True, add the intersection of the detection boxes with the hand boxes to the feature vector, defaults to False - :param use_joint_hand_offset: If True, add the distance of the hand centers to the patient joints to the feature vector, defaults to False - :param use_joint_object_offset: If True, add the distance of the object centers to the patient joints to the feature vector, defaults to False - :param joint_names: List of the joint names - :param colors: List of colors to use when plotting points - """ - Path(output_dir).mkdir(parents=True, exist_ok=True) - - rh_joint_dists = [] - lh_joint_dists = [] - rh_dists_k = [[] for i in range(top_k_objects)] - lh_dists_k = [[] for i in range(top_k_objects)] - obj_confs_k = [[] for i in range(top_k_objects)] - obj_im_center_dists_k = [[] for i in range(top_k_objects)] - obj_joint_dists_k = [[] for i in range(top_k_objects)] - - non_object_labels = ["hand (left)", "hand (right)", "user", "patient"] - labels = sorted(obj_label_to_ind) - for non_obj_label in non_object_labels: - labels.remove(non_obj_label) - - ind = -1 - for object_k_index in range(top_k_objects): - # RIGHT HAND - if use_activation: - ind += 1 - right_hand_conf = feature_vec[ind] - - if use_hand_dist: - for obj_label in labels: - ind += 1 - obj_rh_dist_x = feature_vec[ind] - ind += 1 - obj_rh_dist_y = feature_vec[ind] - - rh_dists_k[object_k_index].append([obj_rh_dist_x, obj_rh_dist_y]) - - if use_center_dist: - ind += 1 - rh_im_center_dist_x = feature_vec[ind] - ind += 1 - rh_im_center_dist_y = feature_vec[ind] - - # LEFT HAND - if use_activation: - ind += 1 - left_hand_conf = feature_vec[ind] - - if use_hand_dist: - # Left hand distances - for obj_label in labels: - ind += 1 - obj_lh_dist_x = feature_vec[ind] - ind += 1 - obj_lh_dist_y = feature_vec[ind] - - lh_dists_k[object_k_index].append([obj_lh_dist_x, obj_lh_dist_y]) - - if use_center_dist: - ind += 1 - lh_im_center_dist_x = feature_vec[ind] - ind += 1 - lh_im_center_dist_y = feature_vec[ind] - - # Right - left hand - if use_hand_dist: - # Right - left hand distance - ind += 1 - rh_lh_dist_x = feature_vec[ind] - ind += 1 - rh_lh_dist_y = feature_vec[ind] - if use_intersection: - ind += 1 - lh_rh_intersect = feature_vec[ind] - - # OBJECTS - for obj_label in labels: - if use_activation: - # Object confidence - ind += 1 - obj_conf = feature_vec[ind] - - obj_confs_k[object_k_index].append(obj_conf) - - if use_intersection: - # obj - right hand intersection - ind += 1 - obj_rh_intersect = feature_vec[ind] - # obj - left hand intersection - ind += 1 - obj_lh_intersect = feature_vec[ind] - - if use_center_dist: - # image center - obj distances - ind += 1 - obj_im_center_dist_x = feature_vec[ind] - ind += 1 - obj_im_center_dist_y = feature_vec[ind] - - obj_im_center_dists_k[object_k_index].append( - [obj_im_center_dist_x, obj_im_center_dist_y] - ) - - # HANDS-JOINTS - if use_joint_hand_offset: - # left hand - joints distances - for i in range(22): - ind += 1 - lh_jointi_dist_x = feature_vec[ind] - ind += 1 - lh_jointi_dist_y = feature_vec[ind] - - lh_joint_dists.append([lh_jointi_dist_x, lh_jointi_dist_y]) - - # right hand - joints distances - for i in range(22): - ind += 1 - rh_jointi_dist_x = feature_vec[ind] - ind += 1 - rh_jointi_dist_y = feature_vec[ind] - - rh_joint_dists.append([rh_jointi_dist_x, rh_jointi_dist_y]) - - # OBJS-JOINTS - if use_joint_object_offset: - for object_k_index in range(top_k_objects): - # obj - joints distances - for obj_label in labels: - joints_dists = [] - for i in range(22): - ind += 1 - obj_jointi_dist_x = feature_vec[ind] - ind += 1 - obj_jointi_dist_y = feature_vec[ind] - - joints_dists.append([obj_jointi_dist_x, obj_jointi_dist_y]) - - obj_joint_dists_k[object_k_index].append(joints_dists) - - # Draw - fig, ( - (lh_dist_ax, rh_dist_ax), - (im_center_dist_ax, obj_joint_dist_ax), - (lh_joint_dist_ax, rh_joint_dist_ax), - ) = plt.subplots(3, 2, figsize=(15, 15)) - axes = [ - rh_dist_ax, - lh_dist_ax, - im_center_dist_ax, - obj_joint_dist_ax, - rh_joint_dist_ax, - lh_joint_dist_ax, - ] - flags = [ - use_hand_dist, - use_hand_dist, - use_center_dist, - use_joint_object_offset, - use_joint_hand_offset, - use_joint_hand_offset, - ] - - rh_dist_ax.set_title("Objects from distance to right hand") - lh_dist_ax.set_title("Objects from distance to left hand") - im_center_dist_ax.set_title("Objects from distance to image center") - obj_joint_dist_ax.set_title("Joints from distance to objects*") - rh_joint_dist_ax.set_title("Joints from distance to right hand") - lh_joint_dist_ax.set_title("Joints from distance to left hand") - - rh_dist_color = colors[2] - lh_dist_color = colors[3] - obj_im_center_dist_color = colors[4] - lh_joint_color = colors[5] - rh_joint_color = colors[6] - - image = Image.open(image_fn) - image = np.array(image) - - # Default values for each plot - for ax, flag in zip(axes, flags): - if not flag: - continue - - ax.imshow(image) - - ax.plot(right_hand_center[0], right_hand_center[1], color=colors[0], marker="o") - ax.annotate( - f"hand (right): {round(right_hand_conf, 2)}", - right_hand_center, - color="black", - annotation_clip=False, - ) - - ax.plot(left_hand_center[0], left_hand_center[1], color=colors[1], marker="o") - ax.annotate( - f"hand (left): {round(left_hand_conf, 2)}", - left_hand_center, - color="black", - annotation_clip=False, - ) - - def draw_points_by_distance(ax, distances, pt, color, labels, confs): - # Make sure the reference point exists - if pt == default_center_list: - return - - for i, dist in enumerate(distances): - # Make sure the object point exists - if dist == list(default_dist): - continue - - obj_pt = [pt[0] - dist[0], pt[1] - dist[1]] # pt - obj_pt = dist - - ax.plot([pt[0], obj_pt[0]], [pt[1], obj_pt[1]], color=color, marker="o") - ax.annotate( - f"{labels[i]}: {round(confs[i], 2)}", - obj_pt, - color="black", - annotation_clip=False, - ) - - if use_joint_hand_offset: - draw_points_by_distance( - rh_joint_dist_ax, - rh_joint_dists, - right_hand_center, - rh_joint_color, - joint_names, - [1] * len(joint_names), - ) - draw_points_by_distance( - lh_joint_dist_ax, - lh_joint_dists, - left_hand_center, - lh_joint_color, - joint_names, - [1] * len(joint_names), - ) - - if use_hand_dist: - rh_dist_ax.plot( - [right_hand_center[0], right_hand_center[0] - rh_lh_dist_x], - [right_hand_center[1], right_hand_center[1] - rh_lh_dist_y], - color=random_colors[0], - marker="o", - ) - - for object_k_index in range(top_k_objects): - if use_hand_dist: - draw_points_by_distance( - rh_dist_ax, - rh_dists_k[object_k_index], - right_hand_center, - rh_dist_color, - labels, - obj_confs_k[object_k_index], - ) - draw_points_by_distance( - lh_dist_ax, - lh_dists_k[object_k_index], - left_hand_center, - lh_dist_color, - labels, - obj_confs_k[object_k_index], - ) - - if use_center_dist: - image_center = [1280 // 2, 720 // 2] - im_center_dist_ax.plot(image_center, color=colors[1], marker="o") - im_center_dist_ax.annotate( - "image_center", image_center, color="black", annotation_clip=False - ) - draw_points_by_distance( - im_center_dist_ax, - obj_im_center_dists_k[object_k_index], - image_center, - obj_im_center_dist_color, - labels, - obj_confs_k[object_k_index], - ) - - if use_joint_object_offset: - - obj_pts = [] - if use_hand_dist: - if right_hand_center != default_center_list: - obj_pts = [ - ( - [ - right_hand_center[0] - rh_dist[0], - right_hand_center[1] - rh_dist[1], - ] - if rh_dist != list(default_dist) - else default_center_list - ) - for rh_dist in rh_dists_k[object_k_index] - ] - elif left_hand_center != default_center_list: - obj_pts = [ - ( - [ - left_hand_center[0] - lh_dist[0], - left_hand_center[1] - lh_dist[1], - ] - if lh_dist != list(default_dist) - else default_center_list - ) - for lh_dist in lh_dists_k[object_k_index] - ] - elif use_center_dist: - obj_pts = [ - ( - [ - image_center[0] - im_center_dist[0], - image_center[1] - im_center_dist[1], - ] - if im_center_dist != list(default_dist) - else default_center_list - ) - for im_center_dist in obj_im_center_dists_k[object_k_index] - ] - - if not obj_pts: - continue - - for i, obj_pt in enumerate(obj_pts): - if obj_pt == default_center_list: - continue - - obj_joint_color = random_colors[(object_k_index * len(obj_pt)) + i] - obj_joint_dist_ax.plot( - obj_pt[0], obj_pt[1], color=obj_joint_color, marker="o" - ) - obj_joint_dist_ax.annotate( - f"{labels[i]}: {round(obj_confs_k[object_k_index][i], 2)}", - obj_pt, - color="black", - annotation_clip=False, - ) - draw_points_by_distance( - obj_joint_dist_ax, - obj_joint_dists_k[object_k_index][i], - obj_pt, - obj_joint_color, - joint_names, - [1] * len(joint_names), - ) - - Path(f"{output_dir}/full_feature_vec").mkdir(parents=True, exist_ok=True) - plt.savefig(f"{output_dir}/full_feature_vec/{os.path.basename(image_fn)}") - - def copy_ax_to_new_fig(ax, subfolder): - ax.remove() - - fig2 = plt.figure(figsize=(15, 15)) - ax.figure = fig2 - fig2.axes.append(ax) - fig2.add_axes(ax) - - dummy = fig2.add_subplot(111) - ax.set_position(dummy.get_position()) - dummy.remove() - - Path(f"{output_dir}/{subfolder}").mkdir(parents=True, exist_ok=True) - plt.savefig(f"{output_dir}/{subfolder}/{os.path.basename(image_fn)}") - - plt.close(fig2) - - # Save each subplot as its own image - for ax, subfolder, flag in zip( - [ - lh_dist_ax, - rh_dist_ax, - im_center_dist_ax, - obj_joint_dist_ax, - lh_joint_dist_ax, - rh_joint_dist_ax, - ], - [ - "left_hand_obj_dist", - "right_hand_obj_dist", - "image_center_obj_dist", - "obj_joints_dist", - "left_hand_joints_dist", - "right_hand_joints_dist", - ], - flags, - ): - if not flag: - continue - copy_ax_to_new_fig(ax, subfolder) - - plt.close(fig) - - -def obj_det2d_set_to_feature_by_method( - label_vec: List[str], - xs: List[float], - ys: List[float], - ws: List[float], - hs: List[float], - label_confidences: List[float], - pose_keypoints: List[Dict], - obj_label_to_ind: Dict[str, int], - top_k_objects: int = 1, - use_activation: bool = False, - use_hand_dist: bool = False, - use_center_dist: bool = False, - use_intersection: bool = False, - use_joint_hand_offset: bool = False, - use_joint_object_offset: bool = False, -): - """ - :param label_vec: List of object labels for each detection (length: # detections) - :param xs: List of x values for each detection (length: # detections) - :param ys: List of y values for each detection (length: # detections) - :param ws: List of width values for each detection (length: # detections) - :param hs: List of height values for each detection (length: # detections) - :param label_confidences: List of confidence values for each detection (length: # detections) - :param pose_keypoints: - List of joints, represented by a dictionary contining the x and y corrdinates of the points and the category id and string - :param obj_label_to_ind: - Dictionary mapping a label str and returns the index within the feature vector. - :param top_k_objects: Number top confidence objects to use per label, defaults to 1 - :param use_activation: If True, add the confidence values of the detections to the feature vector, defaults to False - :param use_hand_dist: If True, add the distance of the detection centers to both hand centers to the feature vector, defaults to False - :param use_intersection: If True, add the intersection of the detection boxes with the hand boxes to the feature vector, defaults to False - :param use_joint_hand_offset: If True, add the distance of the hand centers to the patient joints to the feature vector, defaults to False - :param use_joint_object_offset: If True, add the distance of the object centers to the patient joints to the feature vector, defaults to False - - :return: - resulting feature data - """ - ######################### - # Data - ######################### - # Number of object detection classes - num_det_classes = len(obj_label_to_ind) - - # Maximum confidence observe per-class across input object detections. - # If a class has not been observed, it is set to 0 confidence. - det_class_max_conf = np.zeros((num_det_classes, top_k_objects)) - # The bounding box of the maximally confident detection - det_class_bbox = np.zeros((top_k_objects, num_det_classes, 4), dtype=np.float64) - det_class_bbox[:] = default_bbox - - # Binary mask indicate which detection classes are present on this frame. - det_class_mask = np.zeros((top_k_objects, num_det_classes), dtype=np.bool_) - - # Record the most confident detection for each object class as recorded in - # `obj_label_to_ind` (confidence & bbox) - for i, label in enumerate(label_vec): - assert label in obj_label_to_ind, f"Label {label} is unknown" - - conf = label_confidences[i] - ind = obj_label_to_ind[label] - - conf_list = det_class_max_conf[ind, :] - if conf > det_class_max_conf[ind].min(): - # Replace the lowest confidence object with our new higher confidence object - min_conf_ind = np.where(conf_list == conf_list.min())[0][0] - - conf_list[min_conf_ind] = conf - det_class_bbox[min_conf_ind, ind] = [xs[i], ys[i], ws[i], hs[i]] - det_class_mask[min_conf_ind, ind] = True - - # Sort the confidences to determine the top_k order - sorted_index = np.argsort(conf_list)[::-1] - sorted_conf_list = np.array([conf_list[k] for k in sorted_index]) - - # Reorder the values to match the confidence top_k order - det_class_max_conf[ind] = sorted_conf_list - - bboxes = det_class_bbox.copy() - mask = det_class_mask.copy() - for idx, sorted_ind in enumerate(sorted_index): - det_class_bbox[idx, ind] = bboxes[sorted_ind, ind] - det_class_mask[idx, ind] = mask[sorted_ind, ind] - - det_class_kwboxes = kwimage.Boxes(det_class_bbox, "xywh") - - ######################### - # util functions - ######################### - def find_hand(hand_str): - hand_idx = obj_label_to_ind[hand_str] - hand_conf = det_class_max_conf[hand_idx][0] - hand_bbox = kwimage.Boxes([det_class_bbox[0, hand_idx]], "xywh") - - return hand_idx, hand_bbox, hand_conf, hand_bbox.center - - def dist_to_center(center1, center2): - center_dist = [ - center1[0][0][0] - center2[0][0][0], - center1[1][0][0] - center2[1][0][0], - ] - return center_dist - - ######################### - # Hands - ######################### - # Find the right hand - (right_hand_idx, right_hand_bbox, right_hand_conf, right_hand_center) = find_hand( - "hand (right)" - ) - - # Find the left hand - (left_hand_idx, left_hand_bbox, left_hand_conf, left_hand_center) = find_hand( - "hand (left)" - ) - - right_left_hand_kwboxes = det_class_kwboxes[0, [right_hand_idx, left_hand_idx]] - - # Mask detailing hand presence in the scene. - RIGHT_IDX = 0 - LEFT_IDX = 1 - hand_mask = [det_class_mask[0][right_hand_idx], det_class_mask[0][left_hand_idx]] - # Mask detailing hand and object presence in the scene. - hand_by_object_mask_k = np.zeros( - (top_k_objects, 2, num_det_classes), dtype=np.bool_ - ) - - for object_k_index in range(top_k_objects): - x = np.array( - [ - [ - hand_mask[RIGHT_IDX] and det_class - for det_class in det_class_mask[object_k_index] - ], - [ - hand_mask[LEFT_IDX] and det_class - for det_class in det_class_mask[object_k_index] - ], - ] - ) - hand_by_object_mask_k[object_k_index] = x - - ######################### - # Hand distances - ######################### - if use_hand_dist: - # Compute distances to the right and left hands. Distance to the hand - # is defined by `hand.center - object.center`. - # `kwcoco.Boxes.center` returns a tuple of two arrays, each shaped - # [n_boxes, 1]. - all_obj_centers_x, all_obj_centers_y = det_class_kwboxes.center # [n_dets, 1] - hand_centers_x, hand_centers_y = right_left_hand_kwboxes.center # [2, 1] - - # Hand distances from objects. Shape: [top_k, n_dets, 2] - right_hand_dist_k = np.zeros((top_k_objects, num_det_classes, 2)) - left_hand_dist_k = np.zeros((top_k_objects, num_det_classes, 2)) - for object_k_index in range(top_k_objects): - obj_centers_x = all_obj_centers_x[object_k_index] - obj_centers_y = all_obj_centers_y[object_k_index] - - hand_dist_x = np.subtract( - hand_centers_x, - obj_centers_x.T, - where=hand_by_object_mask_k[object_k_index], - # required, otherwise indices may be left uninitialized. - out=np.zeros(shape=(2, num_det_classes)), - ) - hand_dist_y = np.subtract( - hand_centers_y, - obj_centers_y.T, - where=hand_by_object_mask_k[object_k_index], - # required, otherwise indices may be left uninitialized. - out=np.zeros(shape=(2, num_det_classes)), - ) - - # Collate into arrays of (x, y) coordinates. - right_hand_dist = np.stack( - [hand_dist_x[RIGHT_IDX], hand_dist_y[RIGHT_IDX]], axis=1 - ) - # for dist in right_hand_dist: - # if not hand_by_object_mask_k[object_k_index][RIGHT_IDX] - left_hand_dist = np.stack( - [hand_dist_x[LEFT_IDX], hand_dist_y[LEFT_IDX]], axis=1 - ) - - right_hand_dist_k[object_k_index] = right_hand_dist - left_hand_dist_k[object_k_index] = left_hand_dist - - else: - right_hand_dist_k = left_hand_dist_k = None - - ######################### - # Image center - # distances - ######################### - if use_center_dist: - image_center = kwimage.Boxes( - [0, 0, 1280, 720], "xywh" - ).center # Hard coded image size - default_center_dist = [image_center[0][0][0] * 2, image_center[1][0][0] * 2] - - # Object distances from image center. Shape: [top_k, n_dets, 2] - image_center_obj_dist_k = np.zeros((top_k_objects, num_det_classes, 2)) - for object_k_index in range(top_k_objects): - obj_centers_x = all_obj_centers_x[object_k_index] - obj_centers_y = all_obj_centers_y[object_k_index] - - for obj_ind in range(num_det_classes): - obj_conf = det_class_max_conf[obj_ind] - - obj_bbox = kwimage.Boxes( - [det_class_bbox[object_k_index][obj_ind]], "xywh" - ) - obj_center = obj_bbox.center - - center_dist = ( - dist_to_center(image_center, obj_center) - if obj_conf != 0 - else default_center_dist - ) - - image_center_obj_dist_k[object_k_index][obj_ind] = center_dist - else: - image_center_obj_dist_k = None - - ######################### - # Intersection - ######################### - if use_intersection: - # Computing hand-object intersection. - # Intersection here is defined as the percentage of the hand box - # intersected by the representative object bounding-box. - # If a hand or object is not present in the scene, then their - # respective intersection area is 0. - # Shape: [top_k, n_dets] - right_hand_intersection_k = np.zeros((top_k_objects, num_det_classes)) - left_hand_intersection_k = np.zeros((top_k_objects, num_det_classes)) - for object_k_index in range(top_k_objects): - obj_bboxes = det_class_kwboxes[object_k_index] - - hand_obj_intersection_vol = right_left_hand_kwboxes.isect_area(obj_bboxes) - right_left_hand_area = right_left_hand_kwboxes.area - - # Handling avoiding div-by-zero using the `where` parameter. - hand_obj_intersection = np.divide( - hand_obj_intersection_vol, - right_left_hand_area, - where=right_left_hand_area != 0, - # Specifying out otherwise there may be uninitialized values in - # indices where `right_left_hand_area == 0`. - out=np.zeros_like(hand_obj_intersection_vol), - ) - right_hand_intersection = hand_obj_intersection[0] - left_hand_intersection = hand_obj_intersection[1] - - right_hand_intersection_k[object_k_index] = right_hand_intersection - left_hand_intersection_k[object_k_index] = left_hand_intersection - else: - right_hand_intersection_k = left_hand_intersection_k = None - - ######################### - # Joints - ######################### - def calc_joint_offset(bbox_center_x, bbox_center_y): - offset_vector = [] - if pose_keypoints == zero_joint_offset or ( - bbox_center_x == default_center_list[0] - and bbox_center_y == default_center_list[1] - ): - # If we don't have the joints or the object, return default values - for joint in pose_keypoints: - offset_vector.append(default_dist) - return offset_vector - - for joint in pose_keypoints: - jx, jy = joint["xy"] - joint_point = [jx, jy] - - dist = [bbox_center_x - joint_point[0], bbox_center_y - joint_point[1]] - offset_vector.append(dist) - - return offset_vector - - # HAND - JOINTS - if use_joint_hand_offset: - joint_right_hand_offset = calc_joint_offset( - right_hand_center[0][0][0], right_hand_center[1][0][0] - ) - joint_left_hand_offset = calc_joint_offset( - left_hand_center[0][0][0], left_hand_center[1][0][0] - ) - - # OBJECTS - JOINTS - if use_joint_object_offset: - # Object distances from patient joints. Shape: [top_k, n_dets, 22, 2] - obj_joints_dist_k = np.zeros((top_k_objects, num_det_classes, 22, 2)) - for object_k_index in range(top_k_objects): - obj_centers_x = all_obj_centers_x[object_k_index] - obj_centers_y = all_obj_centers_y[object_k_index] - - joint_object_offset = [] - for obj_ind in range(num_det_classes): - offset_vector = calc_joint_offset( - obj_centers_x[obj_ind], obj_centers_y[obj_ind] - ) - joint_object_offset.append(offset_vector) - - obj_joints_dist_k[object_k_index] = joint_object_offset - - ######################### - # Feature vector - ######################### - feature_vec = [] - - for object_k_index in range(top_k_objects): - # HANDS - for hand_conf, hand_idx, hand_dist in [ - (right_hand_conf, right_hand_idx, right_hand_dist_k[object_k_index]), - (left_hand_conf, left_hand_idx, left_hand_dist_k[object_k_index]), - ]: - if use_activation: - feature_vec.append([hand_conf]) - if use_hand_dist: - hd1 = [ - item - for ii, tupl in enumerate(hand_dist) - for item in tupl - if ii not in [right_hand_idx, left_hand_idx] - ] - feature_vec.append(hd1) - if use_center_dist: - feature_vec.append(image_center_obj_dist_k[0][hand_idx]) - - # RIGHT-LEFT HAND - if use_hand_dist: - feature_vec.append(right_hand_dist_k[0][left_hand_idx]) - if use_intersection: - feature_vec.append([right_hand_intersection_k[0][left_hand_idx]]) - - # OBJECTS - for obj_ind in range(num_det_classes): - if obj_ind in [right_hand_idx, left_hand_idx]: - # We already have the hand data - continue - - if use_activation: - feature_vec.append([det_class_max_conf[obj_ind][object_k_index]]) - if use_intersection: - feature_vec.append([right_hand_intersection_k[object_k_index][obj_ind]]) - feature_vec.append([left_hand_intersection_k[object_k_index][obj_ind]]) - if use_center_dist: - feature_vec.append(image_center_obj_dist_k[object_k_index][obj_ind]) - - # HANDS-JOINTS - if use_joint_hand_offset: - for lh_offset in joint_left_hand_offset: - feature_vec.append(lh_offset) - - for rh_offset in joint_right_hand_offset: - feature_vec.append(rh_offset) - - # OBJ-JOINTS - if use_joint_object_offset: - for object_k_index in range(top_k_objects): - for obj_ind in range(num_det_classes): - if obj_ind in [right_hand_idx, left_hand_idx]: - # We already have the hand data - continue - for offset in obj_joints_dist_k[object_k_index][obj_ind]: - feature_vec.append(offset) - - feature_vec = [item for sublist in feature_vec for item in sublist] # flatten - feature_vec = np.array(feature_vec, dtype=np.float64) - - return feature_vec diff --git a/python-tpl/TCN_HPL b/python-tpl/TCN_HPL index f17241944..9f047fe63 160000 --- a/python-tpl/TCN_HPL +++ b/python-tpl/TCN_HPL @@ -1 +1 @@ -Subproject commit f172419443025143d1a15db7eadf75ef0c120fe9 +Subproject commit 9f047fe630770cebf5ae6e5c12647e988e0bb34a diff --git a/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py b/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py index 772b3ba4f..7146ef768 100644 --- a/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py +++ b/tests/angel_system/impls/detect_activities/detections_to_activities/test_utils.py @@ -2,7 +2,7 @@ from tcn_hpl.data.components.augmentations import NormalizePixelPts -from angel_system.activity_classification.utils import ( +from tcn_hpl.data.vectorize_classic import ( obj_det2d_set_to_feature, ) From 3d3163164a8815ff56d10eb63311f1c9002a2345 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Tue, 22 Oct 2024 12:39:47 -0400 Subject: [PATCH 18/40] Add colorlog dep to support TCN training --- poetry.lock | 17 ++++++++++++++++- pyproject.toml | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index cc3c4e8d9..6cdb6abaa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1800,6 +1800,21 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "hydra-colorlog" +version = "1.2.0" +description = "Enables colorlog for Hydra apps" +optional = false +python-versions = "*" +files = [ + {file = "hydra-colorlog-1.2.0.tar.gz", hash = "sha256:d44f85008fabd2448c7e3b496c31b44d7610560f6fff74f3673afaa949870899"}, + {file = "hydra_colorlog-1.2.0-py3-none-any.whl", hash = "sha256:33d05fc11ca9bc7a5d69cfb3c8fb395a1bc52fa1dfe7aca6a6f5ffb57f6e7c4b"}, +] + +[package.dependencies] +colorlog = "*" +hydra-core = ">=1.0.0" + [[package]] name = "hydra-core" version = "1.3.2" @@ -7491,4 +7506,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "==3.8.10" -content-hash = "5ff3fcf2e78b589959900537b5da76b2f5cc4a6fbebb3d56bf88e04ccd409c6d" +content-hash = "3f2096cbf129b8839cc8ad8d60c5bf596e9ea330bdfccee493d95f0e2d6158c0" diff --git a/pyproject.toml b/pyproject.toml index 50d20e831..d81a12df0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,7 @@ networkx = ">=3.1" # TCN Activity classifier +hydra-colorlog = "^1.2.0" tcn-hpl = {path = "python-tpl/TCN_HPL", develop = true} # Yolo v7 object detection From c6a36f1d7fd586bbdbfc620814a903e07bef68ca Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Wed, 23 Oct 2024 09:53:55 -0400 Subject: [PATCH 19/40] Minor dep order reorg, pin hydra-colorlog version like in tcn-hpl --- poetry.lock | 2 +- pyproject.toml | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 6cdb6abaa..59c7e7b0f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7506,4 +7506,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "==3.8.10" -content-hash = "3f2096cbf129b8839cc8ad8d60c5bf596e9ea330bdfccee493d95f0e2d6158c0" +content-hash = "9b1b12203c38751ed9f97e7d4a11d7deb43e3c2d544ae29ecddf6f9234868bcb" diff --git a/pyproject.toml b/pyproject.toml index d81a12df0..ad397e6bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,9 +55,9 @@ tensorrt-cu12-libs = "==10.1.0" ## For UHO Activity Classifier ## - see arisia: /data/dawei.du/ptg-activity-recognition/activity_hydra/requirements.txt -hydra-core = "==1.3.2" +#hydra-core = "==1.3.2" #python-dotenv = "^0.21.0" -pytorch-lightning = "==1.7.7" +#pytorch-lightning = "==1.7.7" # Pinning setup tools to this known version due to torch 1.10.2 tensorboard # functionality assuming `distutils.version.LooseVersion` functionality. @@ -88,7 +88,9 @@ networkx = ">=3.1" # TCN Activity classifier -hydra-colorlog = "^1.2.0" +hydra-core = "==1.3.2" +hydra-colorlog = "==1.2.0" +pytorch-lightning = "==1.7.7" tcn-hpl = {path = "python-tpl/TCN_HPL", develop = true} # Yolo v7 object detection From 2f1c2a344eaa44f2229ece2828461a441acb4d54 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Wed, 23 Oct 2024 10:30:46 -0400 Subject: [PATCH 20/40] Updates to training README --- TRAIN_AND_RUN_README.md | 104 +++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 28 deletions(-) diff --git a/TRAIN_AND_RUN_README.md b/TRAIN_AND_RUN_README.md index 5443f9274..c2f6bbf35 100644 --- a/TRAIN_AND_RUN_README.md +++ b/TRAIN_AND_RUN_README.md @@ -1,15 +1,15 @@ # Step-by-step how to run the Angel System pipeline ## Table of Contents -- [Local installation](#localinstallation) -- [Docker installation](#dockerinstallation) +- [Local installation](#local-installation) +- [Docker installation](#docker-installation) - [Data and pretrained models](#data) -- [Training](#training) +- [Training](#training-procedure) - [Training on lab data](#lab_data) -- [Docker local testing with pre-recorded data](#local) -- [Real-time](#realtime) +- [Docker local testing with pre-recorded data](#docker-local-testing) +- [Real-time](#docker-real-time) -## Local Installation +## Local Installation Follow the following steps (the optional steps are for active development purposes): @@ -22,12 +22,16 @@ git submodule update --init --recursive ##### Create the environment ``` +# IF YOU DON'T ALREADY HAVE PYTHON 3.8.10 AVAILABLE conda create --name angel_systen python=3.8.10 conda activate angel_test_env poetry install + +# OR JUST +poetry install ``` -## Docker Installation +## Docker Installation Follow the following steps (the optional steps are for active development purposes): ### Get required repositories: @@ -47,7 +51,7 @@ git submodule update --init --recursive ./workspace_build.sh; source install/setup.sh ``` -## Data +## Data ### Object Detection Source Data TODO @@ -66,7 +70,7 @@ of data are described and referred to. Data is stored on their SFTP server, to which the "Click to Download" links refer to. -Storage of downloaded ZIP archives, and their subsequent extractions, should +Storage of downloaded ZIP archives, and their subsequent extractions, should follow the pattern. A script is provided ``` @@ -97,7 +101,7 @@ bbn_data/ └── r18_chest_seal/ ... ``` -Golden data should be marked as read-only after downloading and extracting to +Golden data should be marked as read-only after downloading and extracting to prevent accidental modification of the files: ``` chmod a-w -R bbn_data/lab_data-golden/ @@ -137,7 +141,7 @@ bbn_create_truth_coco \ - `/angel_system/model_files/models/r18_det.pt`: object detection trained model -## Training Procedure +## Training Procedure We take the following steps: @@ -149,6 +153,13 @@ We take the following steps: 6. train the TCN ### Example with M2 +Contents: +- [Train Object Detection Model](#train-object-detection-model) +- [Generate activity classification truth COCO file](#generate-activity-classification-truth-coco-file) +- [Generate Object Predictions in the Scene](#generate-object-predictions-in-the-scene) +- [Generate Pose Predictions](#generate-pose-predictions) +- [Configure TCN Training Experiment](#configure-tcn-training-experiment) +- [Run TCN Training](#run-tcn-training) #### Train Object Detection Model First we train the detection model on annotated data. @@ -183,17 +194,23 @@ otherwise splits may be created manually. For example: ``` kwcoco split \ - --src /home/local/KHQ/paul.tunison/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet/positive/3_tourns_122023/activity_truth.coco.json \ + --src ~/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet-activity_truth.coco.json \ --dst1 TRAIN-activity_truth.coco.json \ - --dst2 REMAINDER-activity-truth.coco.json \ + --dst2 REMAINDER-activity_truth.coco.json \ --splitter video \ --factor 2 kwcoco split \ - --src REMAINDER-activity-truth.coco.json \ - --dst1 VALIDATION-activity-truth.coco.json \ - --dst2 TEST-activity-truth.coco.json \ + --src REMAINDER-activity_truth.coco.json \ + --dst1 VALIDATION-activity_truth.coco.json \ + --dst2 TEST-activity_truth.coco.json \ --splitter video \ --factor 2 +# Protect your files! +chmod a-w \ + TRAIN-activity_truth.coco.json \ + REMAINDER-activity_truth.coco.json \ + VALIDATION-activity_truth.coco.json \ + TEST-activity_truth.coco.json ``` #### Generate Object Predictions in the Scene @@ -202,12 +219,13 @@ This is to ensure that all represented videos and image frames are predicted on and present in both COCO files. ``` python-tpl/yolov7/yolov7/detect_ptg.py \ - -i ~/data/darpa-ptg/bbn_data/lab_data-working/m2_tourniquet/activity_truth.coco.json \ + -i TRAIN-activity_truth.coco.json \ -o test_det_output.coco.json --model-hands ./model_files/object_detector/hands_model.pt \ --model-objects ./model_files/object_detector/m2_det.pt \ --model-device 0 \ --img-size 768 \ +# Repeat for other relevant activity truth inputs ``` Additional debug outputs may optionally be generated. See the `-h`/`--help` options for more details. @@ -223,22 +241,49 @@ python-tpl/TCN_HPL/tcn_hpl/data/utils/pose_generation/generate_pose_data.py \\ --det-weights ./model_files/pose_estimation/pose_det_model.pth \\ --pose-config python-tpl/TCN_HPL/tcn_hpl/data/utils/pose_generation/configs/ViTPose_base_medic_casualty_256x192.py \\ --pose-weights ./model_files/pose_estimation/pose_model.pth +# Repeat for other relevant activity truth inputs ``` -##### Example with R18 +#### Configure TCN Training Experiment +Create a new version of, or modify an existing (preferring the former) and +modify attributes appropriately for your experiment. + +TODO: Configuration file update guidance. + +#### Run TCN Training +TODO + +## Example with R18 -First we train the detection model on annotated data. This would be the same data source for both the lab and professional data +First we train the detection model on annotated data. This would be the same +data source for both the lab and professional data ``` cd yolo7 -python yolov7/train.py --workers 8 --device 0 --batch-size 4 --data configs/data/PTG/medical/r18_task_objects.yaml --img 768 768 --cfg configs/model/training/PTG/medical/yolov7_r18.yaml --weights weights/yolov7.pt --project /data/PTG/medical/training/yolo_object_detector/train/ --name r18_all_v1_example +python yolov7/train.py \ + --workers 8 \ + --device 0 \ + --batch-size 4 \ + --data configs/data/PTG/medical/r18_task_objects.yaml \ + --img 768 768 \ + --cfg configs/model/training/PTG/medical/yolov7_r18.yaml \ + --weights weights/yolov7.pt \ + --project /data/PTG/medical/training/yolo_object_detector/train/ \ + --name r18_all_v1_example ``` ###### Note on training on lab data : since we do not have detection GT for lab data, this is our start point for training the TCN on the lab data -Next, we generate detection predictions in kwcoco file using the following script. Note that this +Next, we generate detection predictions in kwcoco file using the following script. Note that this ``` -python yolov7/detect_ptg.py --tasks r18 --weights /data/PTG/medical/training/yolo_object_detector/train/r18_all_v1_example/weights/best.pt --project /data/PTG/medical/training/yolo_object_detector/detect/ --name r18_all_example --device 0 --img-size 768 --conf-thres 0.25 +python yolov7/detect_ptg.py \ + --tasks r18 \ + --weights /data/PTG/medical/training/yolo_object_detector/train/r18_all_v1_example/weights/best.pt \ + --project /data/PTG/medical/training/yolo_object_detector/detect/ \ + --name r18_all_example \ + --device 0 \ + --img-size 768 \ + --conf-thres 0.25 cd TCN_HPL/tcn_hpl/data/utils/pose_generation/configs ``` @@ -247,20 +292,23 @@ with the above scripts, we should get a kwcoco file at: /data/PTG/medical/training/yolo_object_detector/detect/r18_all_example/ ``` -Edit `TCN_HPL/tcn_hpl/data/utils/pose_generation/configs/main.yaml` with the task in hand (here, we use r18), the path to the output detection kwcoco, and where to output kwcoco files from our pose generation step. +Edit `TCN_HPL/tcn_hpl/data/utils/pose_generation/configs/main.yaml` with the +task in hand (here, we use r18), the path to the output detection kwcoco, and +where to output kwcoco files from our pose generation step. ``` cd .. python generate_pose_data.py cd TCN_HPL/tcn_hpl/data/utils ``` -At this stage, there should be a new kwcoco file generated in the field defined at `main.yaml`: +At this stage, there should be a new kwcoco file generated in the field defined +at `main.yaml`: ``` data: save_root: ``` -Next, edit the `/TCN_HPL/configs/experiment/r18/feat_v6.yaml` file with the correct experiment name and kwcoco file in the following fields: - +Next, edit the `/TCN_HPL/configs/experiment/r18/feat_v6.yaml` file with the +correct experiment name and kwcoco file in the following fields: ``` exp_name: path: @@ -278,7 +326,7 @@ python train.py experiment=r18/feat_v6 The TCN training script produced a `text_activity_preds.mscoco.json` which is used by the Global Step Predictor. That file should be copied to `/angel_system/model_files/coco/`. -## Docker local testing +## Docker local testing ***to start the service run:*** ``` @@ -302,7 +350,7 @@ tmuxinator stop demos/medical/Kitware-R18 ``` -## Docker real-time +## Docker real-time This step requires a user on the BBN systems to login to the Kitware machine. After it is set up: From d8bff161134c598695f4523665691be29ffad135 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Wed, 23 Oct 2024 14:57:57 -0400 Subject: [PATCH 21/40] Fix archive extraction script --- scripts/extract_bbn_video_archives.bash | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/extract_bbn_video_archives.bash b/scripts/extract_bbn_video_archives.bash index d7752b7f4..5c1ed257d 100755 --- a/scripts/extract_bbn_video_archives.bash +++ b/scripts/extract_bbn_video_archives.bash @@ -7,15 +7,18 @@ # they are named in such a way as to allow for extraction into a subdirectory # with the same name (e.g., "foo-1.0.zip" will extract into "./foo-1.0"). # +shopt -s globstar nullglob -for NAME in *.zip +for NAME in ./**/*.zip do echo "+++ Starting $NAME +++" + DNAME="$(dirname "$NAME")" BNAME="$(basename "$NAME" .zip)" - if [[ ! -d "${BNAME}" ]] + TARGET="${DNAME}/${BNAME}" + if [[ ! -d "${TARGET}" ]] then - mkdir "$BNAME"; - unzip -d "$BNAME" "$NAME" + mkdir "$TARGET"; + unzip -d "$TARGET" "$NAME" fi echo "--- Finished $NAME ---" done From 502322e98628682c05e2f117320a1efc7b2032b6 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Wed, 23 Oct 2024 15:13:28 -0400 Subject: [PATCH 22/40] Add final grep to probe helper script --- scripts/extract_bbn_video_ffprobe.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/extract_bbn_video_ffprobe.bash b/scripts/extract_bbn_video_ffprobe.bash index a36e63259..dfcc582ce 100755 --- a/scripts/extract_bbn_video_ffprobe.bash +++ b/scripts/extract_bbn_video_ffprobe.bash @@ -29,4 +29,4 @@ do done # Collect all probed video streams metadata summary for resolution and fps info -# $> grep -rin "Stream.*: Video" ../lab_data-working/ >../probe_summary.txt +grep -rin "Stream.*: Video" "${WORKDIR}" >"${WORKDIR}/probe_summary.txt" From 7fd0a2694e77bd4c2c46403fd241782ca312572b Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Wed, 23 Oct 2024 20:55:35 -0400 Subject: [PATCH 23/40] Revert TensorRT Engine model usage Turns out the engine model is too fragile and is very specific to the hardware to which the conversion occurred. --- ansible/roles/provision-files/vars/main.yml | 6 +++--- tmux/demos/medical/Kitware-R18.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/provision-files/vars/main.yml b/ansible/roles/provision-files/vars/main.yml index 0648991e1..20fd83550 100644 --- a/ansible/roles/provision-files/vars/main.yml +++ b/ansible/roles/provision-files/vars/main.yml @@ -61,9 +61,9 @@ girder_file_downloads: sha512: 7ded5cb71ef0efe2444a19c7452073c63a34d65e3038b24236b304836abd47e8911e3d3023d2ba7e5a8d8c1e337e6f4b8e3080709827eb2b0daaedb0b6248561 dest: "{{ stage_dirs.pose_estimation }}/pose_model.pth" # Hand detector - - file_id: 6710037cba16447d41e1663a - sha512: cf6a20a966dcaee46354545ef5d2086f2dcf31112eb90282e5dd6a24824a584d5e549274112f6f028911050a48bcaba6c944e5d3d870edcafe2fcd34e61d95f5 - dest: "{{ stage_dirs.object_detector }}/hands_model.engine" + - file_id: 6605ca2e8b763ca20ae99f77 + sha512: 42b56f34c0c443ad00d71e66334d6e852a7811011b22a60da9b5e2721565c21d1b5daad17c8a708908c9658ded81a663729090526feb5286bd0a591cf4fb5d8f + dest: "{{ stage_dirs.object_detector }}/hands_model.pt" # ---- M2 ---- # Object detector diff --git a/tmux/demos/medical/Kitware-R18.yml b/tmux/demos/medical/Kitware-R18.yml index fe77c1d49..7f59442e3 100644 --- a/tmux/demos/medical/Kitware-R18.yml +++ b/tmux/demos/medical/Kitware-R18.yml @@ -112,7 +112,7 @@ windows: -p det_topic:=ObjectDetections2d -p object_net_checkpoint:=${MODEL_DIR}/object_detector/r18_det.pt -p inference_img_size:=768 - -p hand_net_checkpoint:=${MODEL_DIR}/object_detector/hands_model.engine + -p hand_net_checkpoint:=${MODEL_DIR}/object_detector/hands_model.pt -p cuda_device_id:=0 - activity_classifier: ros2 run angel_system_nodes activity_classifier_tcn --ros-args From 21d442377112bdbc42c0eb3dac54565234537624 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Thu, 24 Oct 2024 00:31:11 -0400 Subject: [PATCH 24/40] Update TCN related things to use common dataset and vector computation --- .../tcn_hpl/predict.py | 482 ------------------ angel_system/data/common/config_structs.py | 8 +- config/activity_labels/medical/m2.yaml | 1 + config/activity_labels/medical/m3.yaml | 1 + config/activity_labels/medical/m4.yaml | 1 + config/activity_labels/medical/m5.yaml | 1 + config/activity_labels/medical/r18-demo.yaml | 1 + config/activity_labels/medical/r18.yaml | 1 + python-tpl/TCN_HPL | 2 +- ros/angel_msgs/msg/HandJointPosesUpdate.msg | 1 + .../activity_classifier_tcn.py | 310 ++++------- .../python/angel_utils/object_detection.py | 4 +- tmux/demos/medical/Kitware-M2.yml | 13 +- 13 files changed, 131 insertions(+), 695 deletions(-) diff --git a/angel_system/activity_classification/tcn_hpl/predict.py b/angel_system/activity_classification/tcn_hpl/predict.py index 5024399d2..433752a38 100644 --- a/angel_system/activity_classification/tcn_hpl/predict.py +++ b/angel_system/activity_classification/tcn_hpl/predict.py @@ -1,404 +1,13 @@ from dataclasses import dataclass from pathlib import Path -import os from threading import RLock from typing import List from typing import Dict from typing import Mapping from typing import Optional from typing import Sequence -from typing import Tuple import kwcoco -import numpy as np -import numpy.typing as npt -import torch - -from tcn_hpl.data.components.augmentations import NormalizePixelPts, NormalizeFromCenter -from tcn_hpl.models.ptg_module import PTGLitModule - -from tcn_hpl.data.vectorize_classic import ( - tlbr_to_xywh, - obj_det2d_set_to_feature, -) - - -def load_module( - checkpoint_file, label_mapping_file, torch_device, topic -) -> PTGLitModule: - """ - - :param checkpoint_file: - :param label_mapping_file: - :param torch_device: - :param topic: - :return: - """ - # # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility - # torch.use_deterministic_algorithms(True) - # os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" - # lightning.fabric.utilities.seed.seed_everything(12345) - - mapping_file_dir = os.path.abspath(os.path.dirname(label_mapping_file)) - mapping_file_name = os.path.basename(label_mapping_file) - model_device = torch.device(torch_device) - model = PTGLitModule.load_from_checkpoint( - checkpoint_file, - map_location=model_device, - # HParam overrides - data_dir=mapping_file_dir, - mapping_file_name=mapping_file_name, - topic=topic, - ) - - # print(f"CLASSES IN MODEL: {model.classes}") - # print(f"class_ids IN MODEL: {model.class_ids}") - - return model - - -@dataclass -class ObjectDetectionsLTRB: - """ - Expected object detections format for a single frame from the ROS2 - ecosystem. - """ - - # Identifier for this set of detections. - id: int - # Vectorized detection bbox left pixel bounds - left: Tuple[float] - # Vectorized detection bbox top pixel bounds - top: Tuple[float] - # Vectorized detection bbox right pixel bounds - right: Tuple[float] - # Vectorized detection bbox bottom pixel bounds - bottom: Tuple[float] - # Vectorized detection label of the most confident class. - labels: Tuple[str] - # Vectorized detection confidence value of the most confidence class. - confidences: Tuple[float] - - -@dataclass -class PatientPose: - # Identifier for this set of detections. - id: int - # Vectorized keypoints - positions: list - # Vectorized orientations - # orientations: list - # Vectorized keypoint label - labels: str - - -def normalize_detection_features( - det_feats: npt.ArrayLike, - feat_version: int, - top_k_objects: int, - img_width: int, - img_height: int, - num_det_classes: int, - normalize_pixel_pts: bool, - normalize_center_pts: bool, -) -> None: - """ - Normalize input object detection descriptor vectors, outputting new vectors - of the same shape. - - Expecting input `det_feats` to be in the shape `[window_size, num_feats]'. - - NOTE: This method normalizes in-place, so be sure to clone the input array - if that is not desired. - - :param det_feats: Object Detection features to be normalized. - :param feature_version: Version of the feature conversion approach. - :param top_k_objects: Number top confidence objects to use per label, defaults to 1 - :param image_width: Integer pixel width of the image that object detections - were generated on. - :param image_height: Integer pixel height of the image that object - detections were generated on. - :param num_det_classes: Number of object detection classes (note: DOES include the hand labels but DOES NOT include the patient and user labels) - :param normalize_pixel_pts: If true, will apply the NormalizePixelPts data augmentation to - the ``det_feats`` - :param normalize_center_pts: If true, will apply the NormalizeFromCenter data augmentation to - the ``det_feats`` - - :return: Normalized object detection features. - """ - if normalize_pixel_pts: - # This method is known to normalize in-place. - # Shape [window_size, n_feats] - NormalizePixelPts( - img_width, img_height, num_det_classes, feat_version, top_k_objects - )(det_feats) - if normalize_center_pts: - NormalizeFromCenter( - img_width, img_height, num_det_classes, feat_version, top_k_objects - )(det_feats) - - -def objects_to_feats( - frame_object_detections: Sequence[Optional[ObjectDetectionsLTRB]], - frame_patient_poses: Sequence[Optional[PatientPose]], - det_label_to_idx: Dict[str, int], - feat_version: int, - image_width: int, - image_height: int, - feature_memo: Optional[Dict[int, npt.NDArray]] = None, - pose_memo: Optional[Dict[int, npt.NDArray]] = None, - top_k_objects: int = 1, - normalize_pixel_pts=False, - normalize_center_pts=False, - pose_repeat_rate=0, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Convert some object detections for some window of frames into a feature - vector of version requested. - - :param frame_object_detections: Sequence of object detections for some - window of frames. The window size is dictated by this length of this - sequence. Some frame "slots" may be None to indicate there were no - object detections for that frame. - :param frame_patient_poses: Sequence of poses for some - window of frames. The window size is dictated by this length of this - sequence. Some frame "slots" may be None to indicate there were was not - a pose for that frame. - :param det_label_to_idx: Mapping of object detector classes to the - activity-classifier input index expectation. - :param feat_version: Integer version of the feature vector to generate. - See the `obj_det2d_set_to_feature` function for details. - :param image_width: Integer pixel width of the image that object detections - were generated on. - :param image_height: Integer pixel height of the image that object - detections were generated on. - :param feature_memo: Optional memoization cache to given us that we will - access and insert into based on the IDs given to `ObjectDetectionsLTRB` - instances encountered. - :param pose_memo: Optional memoization cache of the pose used for each - frame and the repeat pose count at each frame - :param top_k_objects: Number top confidence objects to use per label, defaults to 1 - :param normalize_pixel_pts: If true, will apply the NormalizePixelPts data augmentation to - the feature vector - :param normalize_center_pts: If true, will apply the NormalizeFromCenter data augmentation to - the feature vector - :param pose_repeat_rate: The maximum number of sequential None value poses that can be replaced with - a valid pose in a previous frame. If this number is exceeded, the pose - for the frame will remain None. - - :raises ValueError: No object detections nor patient poses passed in. - :raises ValueError: No non-None object detections in the given input - window. - :raises ValueError: No non-None patient poses in the given input - window. - - :return: Window of normalized feature vectors for the given object - detections (shape=[window_size, n_feats]), and an appropriate mask - vector for use with activity classification (shape=[window_size]). - """ - if 0 in [len(frame_patient_poses), len(frame_object_detections)]: - raise ValueError( - "Need at least one patient pose or object det in input sequences" - ) - if all([d is None for d in frame_object_detections]): - raise ValueError("No frames with detections in input.") - if all([p is None for p in frame_patient_poses]): - raise ValueError("No frames with patient poses in input.") - - print(f"{len(frame_object_detections)} detections") - print(f"{len(frame_patient_poses)} poses") - - feat_memo = {} if feature_memo is None else feature_memo - pose_memo = {} if pose_memo is None else pose_memo - - window_size = len(frame_object_detections) - - # Shape [window_size, None|n_feats] - feature_list: List[Optional[npt.NDArray]] = [None] * window_size - feature_ndim = None - feature_dtype = None - - # hands-joints offset vectors - zero_joint_offset = [0 for i in range(22)] - - last_pose = None - repeated_pose_count = 0 - # for pose in frame_patient_poses: - for i, (pose, detections) in enumerate( - zip(frame_patient_poses, frame_object_detections) - ): - - if detections is None: - print("no detections!") - continue - - detection_id = detections.id - pose_id = pose[0].id if pose else None - memo_key = (detection_id, pose_id) - - print(f"memo_key: {memo_key}") - confidences = detections.confidences - if memo_key in feat_memo: - # We've already processed this set - print("feature already in history") - feat = feat_memo[memo_key] - last_pose = pose_memo[memo_key]["last_pose"] - repeated_pose_count = pose_memo[memo_key]["repeated_pose_count"] - else: - # Detections - labels = detections.labels - xs, ys, ws, hs = tlbr_to_xywh( - detections.top, - detections.left, - detections.bottom, - detections.right, - ) - - # Determine what pose to use - pose_keypoints = [] - - if pose is not None: - print("======================") - print("======================") - print("New pose") - print("======================") - print("======================") - repeated_pose_count = 0 - last_pose = pose - elif last_pose is not None: - repeated_pose_count += 1 - # Repeat at most {pose_repeat_rate} poses in a row - if repeated_pose_count > (pose_repeat_rate / 2): - last_pose = None - print("Resetting pose to None") - repeated_pose_count = 0 - else: - print("************************") - print("************************") - print("Repeating pose") - print("************************") - print("************************") - else: - print("pose is None") - - pose_memo[memo_key] = { - "last_pose": last_pose, - "repeated_pose_count": repeated_pose_count, - } - - # Grab the joint keypoints - if last_pose: - for joint in last_pose: - kwcoco_format_joint = { - "xy": [joint.positions.x, joint.positions.y], - "keypoint_category_id": -1, # TODO: not in message - "keypoint_category": joint.labels, - } - pose_keypoints.append(kwcoco_format_joint) - - # Create the feature vector - feat = ( - obj_det2d_set_to_feature( - labels, - xs, - ys, - ws, - hs, - confidences, - pose_keypoints=( - pose_keypoints if pose_keypoints else zero_joint_offset - ), - obj_label_to_ind=det_label_to_idx, - version=feat_version, - top_k_objects=top_k_objects, - ) - .ravel() - .astype(np.float32) - ) - - feat_memo[memo_key] = feat - - feature_ndim = feat.shape - feature_dtype = feat.dtype - feature_list[i] = feat - # Already checked that we should have non-zero frames with detections above - # so feature_ndim/_dtype should not be None at this stage - assert feature_ndim is not None - assert feature_dtype is not None - - # Create mask vector, which should indicate which window indices should not - # be considered. - # NOTE: The expected network is not yet trained to do this, so the mask is - # always 1's right now. - # Shape [window_size] - mask = torch.ones(window_size) - - # Fill in the canonical "empty" feature vector for those frames that had no - # detections. - empty_vec = np.zeros(shape=feature_ndim, dtype=feature_dtype) - for i in range(window_size): - if feature_list[i] is None: - feature_list[i] = empty_vec - - # Shape [window_size, n_feats] - feature_vec = torch.tensor(feature_list) - - # Normalize features - # Shape [window_size, n_feats] - if normalize_pixel_pts or normalize_center_pts: - normalize_detection_features( - feature_vec, - feat_version, - top_k_objects, - image_width, - image_height, - len(det_label_to_idx), - normalize_pixel_pts, - normalize_center_pts, - ) - - return feature_vec, mask - - -def predict( - model: PTGLitModule, - window_feats: torch.Tensor, - mask: torch.Tensor, -): - """ - Compute model activity classifications, returning a tensor of softmax - probabilities. - - We assume the input model and tensors are already on the appropriate - device. - - We assume that input features normalized before being provided to this - function. See :ref:`normalize_detection_features`. - - The "prediction" of this result can be determined via the `argmax` - function:: - - proba = predict(model, window_feats, mask) - pred = torch.argmax(proba) - - :param model: PTGLitModule instance to use. - :param window_feats: Window (sequence) of *normalized* object detection - features. Shape: [window_size, feat_dim]. - :param mask: Boolean array indicating which frames of the input window for - the network to consider. Shape: [window_size]. - - :return: Probabilities (softmax) of the activity classes. - """ - x = window_feats.T.unsqueeze(0).float() - m = mask[None, :] - # print(f"window_feats: {x.shape}") - # print(f"mask: {m.shape}") - with torch.no_grad(): - logits = model(x, m) - # Logits access mirrors model step function argmax access here: - # tcn_hpl.models.ptg_module --> PTGLitModule.model_step - # ¯\_(ツ)_/¯ - return torch.softmax(logits[-1, :, :, -1], dim=1)[0] @dataclass @@ -515,94 +124,3 @@ def write_file(self): with self._lock: dset = self._dset dset.dump(dset.fpath, newlines=True) - - -############################################################################### -# Functions for debugging things in an interpreter -# -def windows_from_all_feature( - all_features: npt.ArrayLike, window_size: int -) -> npt.ArrayLike: - """ - Iterate over overlapping windows in the frame detections features given. - - :param all_features: All object detection feature vectors for all frames to - consider. Shape: [n_frames, n_feats] - :param window_size: Size of the window to slide. - - :return: Generator yielding different windows of feature vectors. - """ - i = 0 - stride = 1 - while (i + window_size) < np.shape(all_features)[0]: - yield all_features[i : (i + window_size), :] - i += stride - - -def debug_from_array_file() -> None: - import functools - import re - import numpy as np - import torch - from tqdm import tqdm - from angel_system.tcn_hpl.predict import ( - load_module, - predict, - windows_from_all_feature, - ) - - # Pre-computed, un-normalized features per-frame extracted from the - # training harness, in temporally ascending order. - # Shape = [n_frames, n_feats] - all_features = torch.tensor( - np.load("./model_files/all_activities_20.npy").astype(np.float32).T - ).to("cuda") - - model = load_module( - "./model_files/activity_tcn-coffee-checkpoint.ckpt", - "./model_files/activity_tcn-coffee-mapping.txt", - "cuda", - ).eval() - - # Above model window size = 30 - mask = torch.ones(30).to("cuda") - - # Normalize features - # The `objects_to_feats` above includes normalization along with the - # bounding box conversion, so this needs to be applied explicitly outside - # using `objects_to_feats` (though, using the same normalize func). - norm_func = functools.partial( - normalize_detection_features, - feat_version=5, - img_width=1280, - img_height=720, - num_det_classes=42, - ) - - # Shape [n_windows, window_size, n_feats] - all_windows = list(windows_from_all_feature(all_features, 30)) - - all_proba = list( - tqdm( - (predict(model, norm_func(w.clone()), mask) for w in all_windows), - total=len(all_windows), - ) - ) - - all_preds_idx = np.asarray([int(torch.argmax(p)) for p in all_proba]) - all_preds_lbl = [model.classes[p] for p in all_preds_idx] - - # Load Hannah preds - comparison_preds_file = "./model_files/all_activities_20_preds.txt" - re_window_pred = re.compile(r"^gt: (\d+), pred: (\d+)$") - comparison_gt = [] - comparison_preds_idx = [] - with open(comparison_preds_file) as infile: - for l in infile.readlines(): - m = re_window_pred.match(l.strip()) - comparison_gt.append(int(m.groups()[0])) - comparison_preds_idx.append(int(m.groups()[1])) - comparison_preds_idx = np.asarray(comparison_preds_idx) - - ne_mask = all_preds_idx != comparison_preds_idx - all_preds_idx[ne_mask], comparison_preds_idx[ne_mask] diff --git a/angel_system/data/common/config_structs.py b/angel_system/data/common/config_structs.py index 56e536244..b3ce72b06 100644 --- a/angel_system/data/common/config_structs.py +++ b/angel_system/data/common/config_structs.py @@ -77,7 +77,7 @@ class ActivityLabel: class ActivityLabelSet: version: str title: str - labels: Tuple[ActivityLabel] + labels: Sequence[ActivityLabel] def __post_init__(self): # coerce nested label objects into the ObjectLabel type. @@ -108,7 +108,7 @@ class TaskStep: id: int label: str full_str: str - activity_ids: Tuple[int] + activity_ids: Sequence[int] @dataclass @@ -119,7 +119,7 @@ class LinearTask: version: str title: str - labels: Tuple[TaskStep] + labels: Sequence[TaskStep] def __post_init__(self): # Coerce pathlike input (str) into a Path instance if not already. @@ -168,7 +168,7 @@ class MultiTaskConfig: version: str title: str - tasks: Tuple[OneTaskConfig] + tasks: Sequence[OneTaskConfig] def __post_init__(self): # coerce nested task objects into OneTaskConfig types diff --git a/config/activity_labels/medical/m2.yaml b/config/activity_labels/medical/m2.yaml index 835b65811..77d0a7223 100644 --- a/config/activity_labels/medical/m2.yaml +++ b/config/activity_labels/medical/m2.yaml @@ -1,4 +1,5 @@ version: "1" +title: "Tourniquet" labels: # Item: # - id: Integer identification number of the label. diff --git a/config/activity_labels/medical/m3.yaml b/config/activity_labels/medical/m3.yaml index 3501a1f3b..6fafea84e 100644 --- a/config/activity_labels/medical/m3.yaml +++ b/config/activity_labels/medical/m3.yaml @@ -1,4 +1,5 @@ version: "1" +title: "Pressure Dressing" labels: # Item: # - id: Integer identification number of the label. diff --git a/config/activity_labels/medical/m4.yaml b/config/activity_labels/medical/m4.yaml index 7b739fe93..71a868e49 100644 --- a/config/activity_labels/medical/m4.yaml +++ b/config/activity_labels/medical/m4.yaml @@ -1,4 +1,5 @@ version: "1" +title: "Wound Packing" labels: # Item: # - id: Integer identification number of the label. diff --git a/config/activity_labels/medical/m5.yaml b/config/activity_labels/medical/m5.yaml index 7b1acaea6..de3ea7410 100644 --- a/config/activity_labels/medical/m5.yaml +++ b/config/activity_labels/medical/m5.yaml @@ -1,4 +1,5 @@ version: "1" +title: "X-Stat" labels: # Item: # - id: Integer identification number of the label. diff --git a/config/activity_labels/medical/r18-demo.yaml b/config/activity_labels/medical/r18-demo.yaml index 1579ec16c..52dc57b5c 100644 --- a/config/activity_labels/medical/r18-demo.yaml +++ b/config/activity_labels/medical/r18-demo.yaml @@ -1,4 +1,5 @@ version: "1" +title: "Chest Seal - Demo-able Descriptions" labels: # Item: # - id: Integer identification number of the label. diff --git a/config/activity_labels/medical/r18.yaml b/config/activity_labels/medical/r18.yaml index 27aa5a925..b2adeee01 100644 --- a/config/activity_labels/medical/r18.yaml +++ b/config/activity_labels/medical/r18.yaml @@ -1,4 +1,5 @@ version: "1" +title: "Chest Seal" labels: # Item: # - id: Integer identification number of the label. diff --git a/python-tpl/TCN_HPL b/python-tpl/TCN_HPL index 9f047fe63..f17241944 160000 --- a/python-tpl/TCN_HPL +++ b/python-tpl/TCN_HPL @@ -1 +1 @@ -Subproject commit 9f047fe630770cebf5ae6e5c12647e988e0bb34a +Subproject commit f172419443025143d1a15db7eadf75ef0c120fe9 diff --git a/ros/angel_msgs/msg/HandJointPosesUpdate.msg b/ros/angel_msgs/msg/HandJointPosesUpdate.msg index 90b8943f8..9fe7e1092 100644 --- a/ros/angel_msgs/msg/HandJointPosesUpdate.msg +++ b/ros/angel_msgs/msg/HandJointPosesUpdate.msg @@ -14,4 +14,5 @@ builtin_interfaces/Time source_stamp string hand # List of joints +# The order of these should be the same as the model's predicted order. angel_msgs/HandJointPose[] joints diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 557e40947..93461fdf2 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -5,31 +5,36 @@ """ import json -from heapq import heappush, heappop from pathlib import Path from threading import Condition, Event, Lock, Thread from typing import Callable -from typing import Dict from typing import List from typing import Optional import re +from typing import Tuple import kwcoco from builtin_interfaces.msg import Time import numpy as np import numpy.typing as npt +from pytorch_lightning.utilities import move_data_to_device from rclpy.callback_groups import MutuallyExclusiveCallbackGroup from rclpy.node import Node import torch +from torch.utils.data import DataLoader +from tcn_hpl.data.ptg_datamodule import create_dataset_from_hydra +from tcn_hpl.data.utils.pose_generation.generate_pose_data import DETECTION_CLASS_KEYPOINTS +from tcn_hpl.data.vectorize import ( + FrameData, + FrameObjectDetections, + FramePoses, +) +from tcn_hpl.models.ptg_module import PTGLitModule from angel_system.activity_classification.tcn_hpl.predict import ( - load_module, - ObjectDetectionsLTRB, - objects_to_feats, - predict, ResultsCollector, - PatientPose, ) +from angel_system.data.common.config_structs import load_activity_label_set from angel_system.utils.event import WaitAndClearEvent from angel_system.utils.simple_timer import SimpleTimer @@ -42,7 +47,6 @@ from angel_utils import declare_and_get_parameters, make_default_main, RateTracker from angel_utils.activity_classification import InputWindow, InputBuffer from angel_utils.conversion import time_to_int -from angel_utils.object_detection import max_labels_and_confs # Input ROS topic for RGB Image Timestamps @@ -51,25 +55,16 @@ PARAM_DET_TOPIC = "det_topic" # Output ROS topic for activity classifications. PARAM_ACT_TOPIC = "act_topic" +# Filesystem path to the Angel-System activity configuration file for the task +# we are predicting for. +PARAM_ACT_CONFIG_FILE = "activity_config_file" # Filesystem path to the TCN model weights PARAM_MODEL_WEIGHTS = "model_weights" -# Filesystem path to the class mapping file. -PARAM_MODEL_MAPPING = "model_mapping" -# Bool flag to indicate if the NormalizePixelPts augmentation should be applied -PARAM_MODEL_NORMALIZE_PIXEL_PTS = "model_normalize_pixel_pts" -# Bool flag to indicate if the NormalizeFromCenter augmentation should be applied -PARAM_MODEL_NORMALIZE_CENTER_PTS = "model_normalize_center_pts" -# Filesystem path to the input object detection label mapping. -# This is expected to be a JSON file containing a list of strings. -PARAM_MODEL_OD_MAPPING = "model_det_label_mapping" +# Filesystem path to the YAML config file paired with the model containing +# relevant hyperparameters. +PARAM_MODEL_CONFIG = "model_config" # Device the model should be loaded onto. "cuda" and "cpu" are PARAM_MODEL_DEVICE = "model_device" -# Version of the detections-to-descriptors algorithm the model is expecting as -# input. -PARAM_MODEL_DETS_CONV_VERSION = "model_dets_conv_version" -# Number of (image) frames to consider as the "window" when collating -# correlated data. -PARAM_WINDOW_FRAME_SIZE = "window_size" # Maximum amount of data we will buffer in seconds. PARAM_BUFFER_MAX_SIZE_SECONDS = "buffer_max_size_seconds" # Width in pixels of the imagery that object detections were predicted from. @@ -116,6 +111,21 @@ class NoActivityClassification(Exception): """ +def max_det_class_score( + msg: ObjectDetection2dSet +) -> Tuple[npt.NDArray[int], npt.NDArray[float]]: + """ + Get the index and score of the highest scoring class. + :param msg: Input message. + :return: Tuple of index and score. + """ + mat_shape = (msg.num_detections, len(msg.label_vec)) + conf_mat = np.asarray(msg.label_confidences).reshape(mat_shape) + max_conf_idxs = conf_mat.argmax(axis=1) + max_confs = conf_mat[np.arange(conf_mat.shape[0]), max_conf_idxs] + return max_conf_idxs, max_confs + + class ActivityClassifierTCN(Node): """ ROS node that publishes `ActivityDetection` messages using a classifier and @@ -135,14 +145,10 @@ def __init__(self): (PARAM_DET_TOPIC,), (PARAM_POSE_TOPIC,), (PARAM_ACT_TOPIC,), + (PARAM_ACT_CONFIG_FILE,), (PARAM_MODEL_WEIGHTS,), - (PARAM_MODEL_MAPPING,), - (PARAM_MODEL_NORMALIZE_PIXEL_PTS, False), - (PARAM_MODEL_NORMALIZE_CENTER_PTS, False), - (PARAM_MODEL_OD_MAPPING,), + (PARAM_MODEL_CONFIG,), (PARAM_MODEL_DEVICE, "cuda"), - (PARAM_MODEL_DETS_CONV_VERSION, 6), - (PARAM_WINDOW_FRAME_SIZE, 25), (PARAM_BUFFER_MAX_SIZE_SECONDS, 15), (PARAM_IMAGE_PIX_WIDTH, 1280), (PARAM_IMAGE_PIX_HEIGHT, 720), @@ -163,88 +169,48 @@ def __init__(self): self._pose_repeat_rate = param_values[PARAM_POSE_REPEAT_RATE] self._act_topic = param_values[PARAM_ACT_TOPIC] + self._act_config = load_activity_label_set( + param_values[PARAM_ACT_CONFIG_FILE] + ) self._img_pix_width = param_values[PARAM_IMAGE_PIX_WIDTH] self._img_pix_height = param_values[PARAM_IMAGE_PIX_HEIGHT] self._enable_trace_logging = param_values[PARAM_TIME_TRACE_LOGGING] - self.model_normalize_pixel_pts = param_values[PARAM_MODEL_NORMALIZE_PIXEL_PTS] - self.model_normalize_center_pts = param_values[PARAM_MODEL_NORMALIZE_CENTER_PTS] - self._window_lead_with_objects = param_values[PARAM_WINDOW_LEADS_WITH_OBJECTS] - self._debug_file = param_values[PARAM_DEBUG_FILE] - # clear the file if it exists (since we are appending to it) - if self._debug_file != "": - with open(self._debug_file, "w") as f: - f.write("") + # Cache activity class labels in ID order + self._act_class_names = [ + x[1] + for x in sorted((l.id, l.label) for l in self._act_config.labels) + ] - self.topic = param_values[PARAM_TOPIC] - # Load in TCN classification model and weights + # Load in TCN classification dataset and model/weights + # The dataset includes info on the window size appropriate for the + # model as well as how to embed input data into the appropriate + # vectorization the model requires. + self._model_dset = create_dataset_from_hydra(Path(param_values[PARAM_MODEL_CONFIG])) with SimpleTimer("Loading inference module", log.info): self._model_device = torch.device(param_values[PARAM_MODEL_DEVICE]) - self._model = load_module( + self._model = PTGLitModule.load_from_checkpoint( param_values[PARAM_MODEL_WEIGHTS], - param_values[PARAM_MODEL_MAPPING], - self._model_device, - topic=self.topic, + map_location=self._model_device, ).eval() # from pytorch_lightning.utilities.model_summary import summarize # from torchsummary import summary # print(summary(self._model)) # print(self._model) - # Load labels list from configured activity_labels YAML file. - print(f"json path: {param_values[PARAM_MODEL_OD_MAPPING]}") - with open(param_values[PARAM_MODEL_OD_MAPPING]) as infile: - det_label_list = json.load(infile) - self._det_label_to_id = { - c: i for i, c in enumerate(det_label_list) if c not in ["patient", "user"] - } - print(self._det_label_to_id) + # # Load labels list from configured activity_labels YAML file. + # print(f"json path: {param_values[PARAM_MODEL_OD_MAPPING]}") + # with open(param_values[PARAM_MODEL_OD_MAPPING]) as infile: + # det_label_list = json.load(infile) + # self._det_label_to_id = { + # c: i for i, c in enumerate(det_label_list) if c not in ["patient", "user"] + # } + # print(self._det_label_to_id) # Feature version aligned with model current architecture - self._feat_version = param_values[PARAM_MODEL_DETS_CONV_VERSION] - - # Memoization structure for structures created as input to feature - # embedding function in the `_predict` method. - self._memo_preproc_input: Dict[int, ObjectDetectionsLTRB] = {} - self._memo_preproc_input_poses: Dict[int, PatientPose] = {} - - self.keypoints_cats = [ - "nose", - "mouth", - "throat", - "chest", - "stomach", - "left_upper_arm", - "right_upper_arm", - "left_lower_arm", - "right_lower_arm", - "left_wrist", - "right_wrist", - "left_hand", - "right_hand", - "left_upper_leg", - "right_upper_leg", - "left_knee", - "right_knee", - "left_lower_leg", - "right_lower_leg", - "left_foot", - "right_foot", - "back", - ] - # Memoization structure for feature embedding function used in the - # `_predict` method. - self._memo_objects_to_feats: Dict[int, npt.NDArray] = {} - # We expire memoized content when the ID (nanosecond timestamp) is - # older than what will be processed going forward. That way we don't - # keep content around forever and "leak" memory. - self._memo_preproc_input_id_heap = [] - self._memo_preproc_input_id_heap_poses = [] - self._memo_objects_to_feats_id_heap = [] - # Queue of poses and repeat pose count - self._queued_pose_memo = {} + self.keypoints_cats = DETECTION_CLASS_KEYPOINTS["patient"] # Optionally initialize buffer-feeding from input COCO-file of object # detections. @@ -274,7 +240,7 @@ def __init__(self): ) self._results_collector = ResultsCollector( self._output_kwcoco_path, - {i: c for i, c in enumerate(self._model.classes)}, + {l.id: l.label for l in self._act_config.labels}, ) # If we are loading from a COCO detections file, it will set the # video in the loading thread. @@ -286,7 +252,7 @@ def __init__(self): # image frame with the object detections descriptor vector. # Buffer initialization must be before ROS callback and runtime-loop # initialization. - self._window_size = param_values[PARAM_WINDOW_FRAME_SIZE] + self._window_size = self._model_dset.window_size self._buffer = InputBuffer( 0, # Not using msgs with tolerance. self.get_logger, @@ -738,109 +704,65 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: determined for this input window. """ log = self.get_logger() - memo_preproc_input = self._memo_preproc_input - memo_preproc_input_h = self._memo_preproc_input_id_heap - - memo_object_to_feats = self._memo_objects_to_feats - memo_object_to_feats_h = self._memo_objects_to_feats_id_heap - queued_pose_memo = self._queued_pose_memo - log.info(f"Input Window (oldest-to-newest frame):\n{window}") # TCN wants to know the label and confidence for the maximally # confident class only. Input object detection messages log.info("processing window...") - # log.info(f"window object detections: {window.obj_dets}") - frame_object_detections: List[Optional[ObjectDetectionsLTRB]] - frame_object_detections = [None] * len(window) - for i, det_msg in enumerate(window.obj_dets): - if det_msg is not None: - msg_id = time_to_int(det_msg.source_stamp) - if msg_id not in memo_preproc_input: - memo_preproc_input[msg_id] = v = ObjectDetectionsLTRB( - msg_id, - det_msg.left, - det_msg.top, - det_msg.right, - det_msg.bottom, - *max_labels_and_confs(det_msg), - ) - # print(f"DETECTION memo_preproc_input[msg_id]: {memo_preproc_input[msg_id]}") - heappush(memo_preproc_input_h, msg_id) - else: - v = memo_preproc_input[msg_id] - frame_object_detections[i] = v - log.debug( - f"[_process_window] Window vector presence: " - f"{[(v is not None) for v in frame_object_detections]}" - ) - - # log.info(f"window patient_joint_kps: {window.patient_joint_kps}") - memo_preproc_input_poses = self._memo_preproc_input_poses - memo_preproc_input_h_poses = self._memo_preproc_input_id_heap_poses - frame_patient_poses: List[Optional[PatientPose]] - frame_patient_poses = [None] * len(window) - for i, pose_msg in enumerate(window.patient_joint_kps): - if pose_msg is not None: - msg_id = time_to_int(pose_msg.source_stamp) - if msg_id not in memo_preproc_input_poses: - # for pose in memo_preproc_input_poses[msg_id]: - # print(f"num of joints: {len(pose_msg.joints)}") - # if len(pose_msg.joints) > len(self.keypoints_cats): - # print(f"num of joints: {pose_msg}") - # print(f"num of keypoints cats: {len(self.keypoints_cats)}") - # print(f"message id: {msg_id}") - # print(f"memo_preproc_input_poses length: {len(memo_preproc_input_poses)}") - memo_preproc_input_poses[msg_id] = v = [ - PatientPose(msg_id, pm.pose.position, self.keypoints_cats[i]) - for i, pm in enumerate(pose_msg.joints) - ] - - # print(f"POSE memo_preproc_input_poses[msg_id]: {memo_preproc_input_poses[msg_id]}") - # msg_id, - # pm.positions, - # # pose_msg.orientations, - # pm.labels, - # ) - heappush(memo_preproc_input_h_poses, msg_id) - else: - v = memo_preproc_input_poses[msg_id] - frame_patient_poses[i] = v - log.debug( - f"[_process_window] Window vector presence: " - f"{[(v is not None) for v in frame_patient_poses]}" - ) - # print(f"frame_object_detections: {frame_object_detections}") - - try: - feats, mask = objects_to_feats( - frame_object_detections=frame_object_detections, - frame_patient_poses=frame_patient_poses, - det_label_to_idx=self._det_label_to_id, - feat_version=self._feat_version, - image_width=self._img_pix_width, - image_height=self._img_pix_height, - # feature_memo=memo_object_to_feats, # passed by reference so this gets updated in the function and changes persist here - # pose_memo=queued_pose_memo, - normalize_pixel_pts=self.model_normalize_pixel_pts, - normalize_center_pts=self.model_normalize_center_pts, - pose_repeat_rate=self._pose_repeat_rate, - ) - except ValueError as ex: - log.warn(f"object-to-feats: ValueError: {ex}") - # feature detections were all None - raise NoActivityClassification() + # Convert window ROS Messages into something appropriate for setting to + # the vectorization dataset. + det_label_vec: List[Optional[str]] = [] + window_data: List[FrameData] = [] + for m_dets, m_pose in zip(window.obj_dets, window.patient_joint_kps): + m_dets: Optional[ObjectDetection2dSet] + m_pose: Optional[HandJointPosesUpdate] + f_dets: Optional[FrameObjectDetections] = None + f_pose: Optional[FramePoses] = None + if m_dets is not None: + det_label_vec = m_dets.label_vec + # Convert message xyxy into xywh + bbox = np.asarray([m_dets.left, m_dets.top, m_dets.right, m_dets.bottom]).T + bbox[:, 2:] -= bbox[:, :2] + cats, scores = max_det_class_score(m_dets) + f_dets = FrameObjectDetections( + bbox, + cats, + scores, + ) + if m_pose is not None: + f_pose = FramePoses( + # No whole-pose score, so just filling in 1.0 for now. + np.array([1.0]), + # (x,y) coordinates for each joint for our single pose. + # Shape (1, n_joints, 2) + np.array([[(j.pose.position.x, j.pose.position.y) for j in m_pose.joints]]), + # Turns out, we are storing the confidence as the Z + # position in the message. + np.array([[j.pose.position.z for j in m_pose.joints]]), + ) + window_data.append(FrameData(f_dets, f_pose)) + assert len(det_label_vec) + # We do not set a slot in `det_label_vec` to represent background + # because the confidences pushed forward from the detection source + # because it should only be providing confidences for the provided + # labels. - feats = feats.to(self._model_device) - mask = mask.to(self._model_device) + self._model_dset.load_data_online(window_data, det_label_vec) + loader = DataLoader(dataset=self._model_dset, batch_size=1) + batch = move_data_to_device(list(loader)[0], device=self._model_device) with SimpleTimer("[_process_window] Model processing", log.info): - proba = predict(self._model, feats, mask).cpu() + with torch.no_grad(): + _, proba, preds, _, _, _ = self._model.model_step( + batch, + compute_loss=False, + ) + pred = preds.cpu()[0] + proba = proba.cpu()[0] - pred = torch.argmax(proba) log.info(f"activity probabilities: {proba}, prediction class: {pred}") - log.info(f"self._model.classes: {self._model.classes}") + log.info(f"activity class names: {self._act_class_names}") # Prepare output message activity_msg = ActivityDetection() @@ -849,7 +771,7 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: activity_msg.source_stamp_end_frame = window.frames[-1][0] # save label vector - activity_msg.label_vec = self._model.classes + activity_msg.label_vec = self._act_class_names # save the activity probabilities activity_msg.conf_vec = proba.tolist() @@ -862,18 +784,6 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: f"{time_to_int(activity_msg.source_stamp_end_frame)})" ) - # Clean up our memos from IDs at or earlier than this window's earliest - # frame. - window_start_time_ns = time_to_int(window.frames[0][0]) - while memo_preproc_input_h and memo_preproc_input_h[0] <= window_start_time_ns: - del memo_preproc_input[heappop(memo_preproc_input_h)] - while ( - memo_object_to_feats_h and memo_object_to_feats_h[0] <= window_start_time_ns - ): - detection_id = heappop(memo_object_to_feats_h) - del memo_object_to_feats[detection_id] - del queued_pose_memo[detection_id] - self._rate_tracker.tick() log.info( f"[_process_window] Activity classification rate " diff --git a/ros/angel_utils/python/angel_utils/object_detection.py b/ros/angel_utils/python/angel_utils/object_detection.py index c4b311832..35561b541 100644 --- a/ros/angel_utils/python/angel_utils/object_detection.py +++ b/ros/angel_utils/python/angel_utils/object_detection.py @@ -1,4 +1,3 @@ -from typing import List from typing import Tuple import numpy as np @@ -12,8 +11,7 @@ def max_labels_and_confs( ) -> Tuple[npt.NDArray[str], npt.NDArray[float]]: """ Get out a tuple of the maximally confident class label and - confidence value for each detection as a tuple of two lists for - expansion into the `ObjectDetectionsLTRB` constructor + confidence value for each detection as a tuple of two arrays. :param msg: Input 2D object detection set message. diff --git a/tmux/demos/medical/Kitware-M2.yml b/tmux/demos/medical/Kitware-M2.yml index b68ad2e9f..2fd9d62ab 100644 --- a/tmux/demos/medical/Kitware-M2.yml +++ b/tmux/demos/medical/Kitware-M2.yml @@ -63,9 +63,12 @@ windows: -p image_topic:=PVFramesBGR -p output_topic:=PVFramesBGR_ts - run_latency_node: ros2 run angel_system_nodes latency_tracker --ros-args - -r __ns:=${ROS_NAMESPACE} -p image_ts_topic:=PVFramesBGR_ts - -p det_topic:=ObjectDetections2d -p pose_topic:=PatientPose - -p activity_topic:=activity_topic -p latency_topic:=latency + -r __ns:=${ROS_NAMESPACE} + -p image_ts_topic:=PVFramesBGR_ts + -p det_topic:=ObjectDetections2d + -p pose_topic:=PatientPose + -p activity_topic:=activity_topic + -p latency_topic:=latency # - sensor_input_stream: # layout: even-vertical @@ -115,9 +118,9 @@ windows: -p image_ts_topic:=PVFramesBGR_ts -p det_topic:=ObjectDetections2d -p pose_topic:=PatientPose + -p activity_config_file:=${CONFIG_DIR}/activity_labels/medical/m2.yaml -p model_weights:=${MODEL_DIR}/activity_classifier/m2_tcn.ckpt - -p model_mapping:=${MODEL_DIR}/activity_classifier/m2_mapping.txt - -p model_det_label_mapping:=${ANGEL_WORKSPACE_DIR}/config/object_labels/medical/m2.json + -p model_config:=${MODEL_DIR}/activity_classifier/m2_config.yaml -p act_topic:=activity_topic -p pose_repeat_rate:=7.5 -p window_leads_with_objects:=true From c2bc70931b9a7c185db9df3368c1e076227ea38b Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Thu, 24 Oct 2024 20:54:29 -0400 Subject: [PATCH 25/40] Update yolo v7 submodule for CLI updates --- python-tpl/yolov7 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-tpl/yolov7 b/python-tpl/yolov7 index a63e1a37d..e0e9aab74 160000 --- a/python-tpl/yolov7 +++ b/python-tpl/yolov7 @@ -1 +1 @@ -Subproject commit a63e1a37dbabebcb5033df1c6250997880c6e368 +Subproject commit e0e9aab74050b4965a5fa5b5231698adf54596c3 From 1110b8c3c042c1dcae9c85a8cec9f1c9ab6465ae Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Fri, 25 Oct 2024 11:03:09 -0400 Subject: [PATCH 26/40] Fix formatting --- .../activity_classifier_tcn.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 93461fdf2..50f93af20 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -23,7 +23,9 @@ import torch from torch.utils.data import DataLoader from tcn_hpl.data.ptg_datamodule import create_dataset_from_hydra -from tcn_hpl.data.utils.pose_generation.generate_pose_data import DETECTION_CLASS_KEYPOINTS +from tcn_hpl.data.utils.pose_generation.generate_pose_data import ( + DETECTION_CLASS_KEYPOINTS, +) from tcn_hpl.data.vectorize import ( FrameData, FrameObjectDetections, @@ -112,7 +114,7 @@ class NoActivityClassification(Exception): def max_det_class_score( - msg: ObjectDetection2dSet + msg: ObjectDetection2dSet, ) -> Tuple[npt.NDArray[int], npt.NDArray[float]]: """ Get the index and score of the highest scoring class. @@ -169,9 +171,7 @@ def __init__(self): self._pose_repeat_rate = param_values[PARAM_POSE_REPEAT_RATE] self._act_topic = param_values[PARAM_ACT_TOPIC] - self._act_config = load_activity_label_set( - param_values[PARAM_ACT_CONFIG_FILE] - ) + self._act_config = load_activity_label_set(param_values[PARAM_ACT_CONFIG_FILE]) self._img_pix_width = param_values[PARAM_IMAGE_PIX_WIDTH] self._img_pix_height = param_values[PARAM_IMAGE_PIX_HEIGHT] self._enable_trace_logging = param_values[PARAM_TIME_TRACE_LOGGING] @@ -180,15 +180,16 @@ def __init__(self): # Cache activity class labels in ID order self._act_class_names = [ - x[1] - for x in sorted((l.id, l.label) for l in self._act_config.labels) + x[1] for x in sorted((l.id, l.label) for l in self._act_config.labels) ] # Load in TCN classification dataset and model/weights # The dataset includes info on the window size appropriate for the # model as well as how to embed input data into the appropriate # vectorization the model requires. - self._model_dset = create_dataset_from_hydra(Path(param_values[PARAM_MODEL_CONFIG])) + self._model_dset = create_dataset_from_hydra( + Path(param_values[PARAM_MODEL_CONFIG]) + ) with SimpleTimer("Loading inference module", log.info): self._model_device = torch.device(param_values[PARAM_MODEL_DEVICE]) self._model = PTGLitModule.load_from_checkpoint( @@ -722,7 +723,9 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: if m_dets is not None: det_label_vec = m_dets.label_vec # Convert message xyxy into xywh - bbox = np.asarray([m_dets.left, m_dets.top, m_dets.right, m_dets.bottom]).T + bbox = np.asarray( + [m_dets.left, m_dets.top, m_dets.right, m_dets.bottom] + ).T bbox[:, 2:] -= bbox[:, :2] cats, scores = max_det_class_score(m_dets) f_dets = FrameObjectDetections( @@ -736,7 +739,14 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: np.array([1.0]), # (x,y) coordinates for each joint for our single pose. # Shape (1, n_joints, 2) - np.array([[(j.pose.position.x, j.pose.position.y) for j in m_pose.joints]]), + np.array( + [ + [ + (j.pose.position.x, j.pose.position.y) + for j in m_pose.joints + ] + ] + ), # Turns out, we are storing the confidence as the Z # position in the message. np.array([[j.pose.position.z for j in m_pose.joints]]), From ffb0d432be0b1fb4f4abb4d519028e925edb5967 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Fri, 25 Oct 2024 11:31:12 -0400 Subject: [PATCH 27/40] Finish documentation sentence --- TRAIN_AND_RUN_README.md | 52 +++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/TRAIN_AND_RUN_README.md b/TRAIN_AND_RUN_README.md index c2f6bbf35..556a2eb27 100644 --- a/TRAIN_AND_RUN_README.md +++ b/TRAIN_AND_RUN_README.md @@ -71,36 +71,48 @@ Data is stored on their SFTP server, to which the "Click to Download" links refer to. Storage of downloaded ZIP archives, and their subsequent extractions, should -follow the pattern. -A script is provided +follow the pattern below. ``` bbn_data/ ├── README.md # Indicate where we have acquired this BBN data. └── lab_data-golden/ ├── m2_tourniquet/ - │ ├── Fri-Apr-21/ - │ │ ├── 20230420_122603_HoloLens.mp4 - │ │ ├── 20230420_122603_HoloLens.skill_labels_by_frame.txt - │ │ ├── 20230420_123212_HoloLens.mp4 - │ │ ├── 20230420_123212_HoloLens.skill_labels_by_frame.txt - │ │ ├── 20230420_124541_HoloLens.mp4 - │ │ ├── 20230420_124541_HoloLens.skill_labels_by_frame.txt - │ │ ├── 20230420_125033_HoloLens.mp4 - │ │ ├── 20230420_125033_HoloLens.skill_labels_by_frame.txt - │ │ ├── 20230420_125517_HoloLens.mp4 - │ │ └── 20230420_125517_HoloLens.skill_labels_by_frame.txt - │ ├── Fri-Apr-21.zip - │ ├── Mon-Apr-17/ - │ │ ... - │ ├── Mon-Apr-17.zip - │ ├── Mon-Apr-24/ - │ │ ... - │ └── Mon-Apr-24.zip + │ ├── positive/ + │ │ ├── Fri-Apr-21/ + │ │ │ ├── 20230420_122603_HoloLens.mp4 + │ │ │ ├── 20230420_122603_HoloLens.skill_labels_by_frame.txt + │ │ │ ├── 20230420_123212_HoloLens.mp4 + │ │ │ ├── 20230420_123212_HoloLens.skill_labels_by_frame.txt + │ │ │ ├── 20230420_124541_HoloLens.mp4 + │ │ │ ├── 20230420_124541_HoloLens.skill_labels_by_frame.txt + │ │ │ ├── 20230420_125033_HoloLens.mp4 + │ │ │ ├── 20230420_125033_HoloLens.skill_labels_by_frame.txt + │ │ │ ├── 20230420_125517_HoloLens.mp4 + │ │ │ └── 20230420_125517_HoloLens.skill_labels_by_frame.txt + │ │ ├── Fri-Apr-21.zip + │ │ ├── Mon-Apr-17/ + │ │ │ ... + │ │ └── Mon-Apr-17.zip + │ │ ... + │ └── negative/ + │ ├── Fri-Aug-25/ + │ │ ... + │ └── Fri-Aug-25.zip ├── m3_pressure_dressing/ │ ... └── r18_chest_seal/ ... ``` +A script is provided at `scripts/extract_bbn_video_archives.bash` to automate +the recursive extraction of such ZIP files. +To operate this script, change directories to be in a parent directory +under which the ZIP files to be extracted are located, and then execute the +script: +```bash +cd ${PATH_TO_DATA}/ +bash ${PATH_TO_ANGEL_SYSTEM}/scripts/extract_bbn_video_archives.bash +``` + Golden data should be marked as read-only after downloading and extracting to prevent accidental modification of the files: ``` From 7c4d69320fffdfbf1bda88a0037dc4981dc43144 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Fri, 25 Oct 2024 11:41:16 -0400 Subject: [PATCH 28/40] Cache old configurations into an "old" directory for now Not removing them until we can meet some form of parity with the new functionality version. --- tmux/demos/medical/{ => old}/BBN-M2.yml | 0 tmux/demos/medical/{ => old}/BBN-M3.yml | 0 tmux/demos/medical/{ => old}/BBN-M5.yml | 0 tmux/demos/medical/{ => old}/BBN-R18.yml | 0 tmux/demos/medical/{ => old}/Kitware-M3.yml | 0 tmux/demos/medical/{ => old}/Kitware-M5.yml | 0 tmux/demos/medical/{ => old}/Kitware-R18-Zed2i-qa.yml | 0 tmux/demos/medical/{ => old}/Kitware-R18-Zed2i.yml | 0 tmux/demos/medical/{ => old}/Kitware-R18-qa.yml | 0 tmux/demos/medical/{ => old}/Kitware-R18.yml | 0 10 files changed, 0 insertions(+), 0 deletions(-) rename tmux/demos/medical/{ => old}/BBN-M2.yml (100%) rename tmux/demos/medical/{ => old}/BBN-M3.yml (100%) rename tmux/demos/medical/{ => old}/BBN-M5.yml (100%) rename tmux/demos/medical/{ => old}/BBN-R18.yml (100%) rename tmux/demos/medical/{ => old}/Kitware-M3.yml (100%) rename tmux/demos/medical/{ => old}/Kitware-M5.yml (100%) rename tmux/demos/medical/{ => old}/Kitware-R18-Zed2i-qa.yml (100%) rename tmux/demos/medical/{ => old}/Kitware-R18-Zed2i.yml (100%) rename tmux/demos/medical/{ => old}/Kitware-R18-qa.yml (100%) rename tmux/demos/medical/{ => old}/Kitware-R18.yml (100%) diff --git a/tmux/demos/medical/BBN-M2.yml b/tmux/demos/medical/old/BBN-M2.yml similarity index 100% rename from tmux/demos/medical/BBN-M2.yml rename to tmux/demos/medical/old/BBN-M2.yml diff --git a/tmux/demos/medical/BBN-M3.yml b/tmux/demos/medical/old/BBN-M3.yml similarity index 100% rename from tmux/demos/medical/BBN-M3.yml rename to tmux/demos/medical/old/BBN-M3.yml diff --git a/tmux/demos/medical/BBN-M5.yml b/tmux/demos/medical/old/BBN-M5.yml similarity index 100% rename from tmux/demos/medical/BBN-M5.yml rename to tmux/demos/medical/old/BBN-M5.yml diff --git a/tmux/demos/medical/BBN-R18.yml b/tmux/demos/medical/old/BBN-R18.yml similarity index 100% rename from tmux/demos/medical/BBN-R18.yml rename to tmux/demos/medical/old/BBN-R18.yml diff --git a/tmux/demos/medical/Kitware-M3.yml b/tmux/demos/medical/old/Kitware-M3.yml similarity index 100% rename from tmux/demos/medical/Kitware-M3.yml rename to tmux/demos/medical/old/Kitware-M3.yml diff --git a/tmux/demos/medical/Kitware-M5.yml b/tmux/demos/medical/old/Kitware-M5.yml similarity index 100% rename from tmux/demos/medical/Kitware-M5.yml rename to tmux/demos/medical/old/Kitware-M5.yml diff --git a/tmux/demos/medical/Kitware-R18-Zed2i-qa.yml b/tmux/demos/medical/old/Kitware-R18-Zed2i-qa.yml similarity index 100% rename from tmux/demos/medical/Kitware-R18-Zed2i-qa.yml rename to tmux/demos/medical/old/Kitware-R18-Zed2i-qa.yml diff --git a/tmux/demos/medical/Kitware-R18-Zed2i.yml b/tmux/demos/medical/old/Kitware-R18-Zed2i.yml similarity index 100% rename from tmux/demos/medical/Kitware-R18-Zed2i.yml rename to tmux/demos/medical/old/Kitware-R18-Zed2i.yml diff --git a/tmux/demos/medical/Kitware-R18-qa.yml b/tmux/demos/medical/old/Kitware-R18-qa.yml similarity index 100% rename from tmux/demos/medical/Kitware-R18-qa.yml rename to tmux/demos/medical/old/Kitware-R18-qa.yml diff --git a/tmux/demos/medical/Kitware-R18.yml b/tmux/demos/medical/old/Kitware-R18.yml similarity index 100% rename from tmux/demos/medical/Kitware-R18.yml rename to tmux/demos/medical/old/Kitware-R18.yml From 65eb7156e33ed1c7aa9dac50dc7199bd30b24dff Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Fri, 25 Oct 2024 09:05:48 -0400 Subject: [PATCH 29/40] add a debug option to the TCN node in order to see the inputs it has when it decides not to create a classification --- python-tpl/yolov7 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-tpl/yolov7 b/python-tpl/yolov7 index e0e9aab74..a63e1a37d 160000 --- a/python-tpl/yolov7 +++ b/python-tpl/yolov7 @@ -1 +1 @@ -Subproject commit e0e9aab74050b4965a5fa5b5231698adf54596c3 +Subproject commit a63e1a37dbabebcb5033df1c6250997880c6e368 From 0839d1c3d4a0ddeb06919416438800b6f0e54de6 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Mon, 28 Oct 2024 12:16:43 -0400 Subject: [PATCH 30/40] cleanup debug input so it is easier to read in normal viewers --- .../activity_classification/activity_classifier_tcn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 50f93af20..680a25812 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -12,6 +12,7 @@ from typing import Optional import re from typing import Tuple +import re import kwcoco from builtin_interfaces.msg import Time From fd55d1f19fdabbcfdda63954ff6a02ab3eef4b1d Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Thu, 24 Oct 2024 00:31:11 -0400 Subject: [PATCH 31/40] Update TCN related things to use common dataset and vector computation --- .../activity_classification/activity_classifier_tcn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 680a25812..50f93af20 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -12,7 +12,6 @@ from typing import Optional import re from typing import Tuple -import re import kwcoco from builtin_interfaces.msg import Time From 59c5598eaffe9fd58aaf1a5f73c8bb8dcc7ac172 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Mon, 28 Oct 2024 12:16:43 -0400 Subject: [PATCH 32/40] cleanup debug input so it is easier to read in normal viewers --- .../activity_classification/activity_classifier_tcn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 50f93af20..680a25812 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -12,6 +12,7 @@ from typing import Optional import re from typing import Tuple +import re import kwcoco from builtin_interfaces.msg import Time From b6895f0d795c63f8a2432759a196a0be7a5da8a7 Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Thu, 24 Oct 2024 00:31:11 -0400 Subject: [PATCH 33/40] Update TCN related things to use common dataset and vector computation --- .../activity_classification/activity_classifier_tcn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 680a25812..f4aaa195a 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -13,6 +13,7 @@ import re from typing import Tuple import re +from typing import Tuple import kwcoco from builtin_interfaces.msg import Time From fe065efe43da4f53ddbe6feee4f43c0a4875c24a Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Mon, 28 Oct 2024 12:16:43 -0400 Subject: [PATCH 34/40] cleanup debug input so it is easier to read in normal viewers --- .../activity_classification/activity_classifier_tcn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index f4aaa195a..5539f64a8 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -14,6 +14,7 @@ from typing import Tuple import re from typing import Tuple +import re import kwcoco from builtin_interfaces.msg import Time From 7d5230e0979f4c2614884a79385135e1d42d329b Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Tue, 29 Oct 2024 09:50:36 -0400 Subject: [PATCH 35/40] fix for classes not in model --- .../activity_classification/activity_classifier_tcn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 50f93af20..e6e66597a 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -524,7 +524,7 @@ def _save_image_to_coco(self, window: InputWindow) -> int: activity_msg.source_stamp_end_frame = window.frames[-1][0] else: self.get_logger().warn(f"window.frames: {window.frames}") - activity_msg.conf_vec = [0.0 for x in self._model.classes] + activity_msg.conf_vec = [0.0 for x in self._act_class_names] gid = self._collect_image(activity_msg) return gid return -1 From ef38e74157ab6553266b4e9626d85d999cb78ec7 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt <88556427+josephvanpeltkw@users.noreply.github.com> Date: Wed, 30 Oct 2024 10:48:08 -0400 Subject: [PATCH 36/40] Update ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py Co-authored-by: Paul Tunison <735270+Purg@users.noreply.github.com> --- .../activity_classification/activity_classifier_tcn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index e6e66597a..97b3b55a3 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -631,9 +631,8 @@ def rt_loop(self): # out older data at and before the first item in the window. self._buffer.clear_before(time_to_int(window.frames[1][0])) - image_gid = ( - None # set this to None to signal if we saved the image or not - ) + # set this to None to signal if we saved the image or not + image_gid = None try: if enable_time_trace_logging: log.info( From a3ce11211d0e2d61eca2c410dd564e42f1ecdf9e Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Wed, 30 Oct 2024 13:07:40 -0400 Subject: [PATCH 37/40] remove unneeded code and change var name --- ros/angel_utils/scripts/convert_video_to_ros_bag.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ros/angel_utils/scripts/convert_video_to_ros_bag.py b/ros/angel_utils/scripts/convert_video_to_ros_bag.py index fe70a19c7..855396e99 100755 --- a/ros/angel_utils/scripts/convert_video_to_ros_bag.py +++ b/ros/angel_utils/scripts/convert_video_to_ros_bag.py @@ -122,12 +122,11 @@ def convert_video_to_bag( # Create image message image_msg = bridge.cv2_to_imgmsg(frame, encoding="bgr8") - # image_msg.header.stamp = frame_ts_msg # split the frame timestamp into sec and nsec - msec = frame_rel_ts - nsec = int((msec - int(msec)) * 1_000_000_000) - msec = int(msec) - image_msg.header.stamp.sec = msec + seconds = frame_rel_ts + nsec = int((seconds - int(seconds)) * 1_000_000_000) + seconds = int(seconds) + image_msg.header.stamp.sec = seconds image_msg.header.stamp.nanosec = nsec print(f"timestamp: {image_msg.header.stamp}") From 3441c1868b98c379c811a532adc04a4719006b6f Mon Sep 17 00:00:00 2001 From: Joseph VanPelt Date: Wed, 30 Oct 2024 13:12:47 -0400 Subject: [PATCH 38/40] change to collect image outside of try statement so it always collects --- .../activity_classifier_tcn.py | 31 +++---------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 97b3b55a3..9b105abed 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -511,24 +511,6 @@ def _rt_keep_looping(self) -> bool: # TODO: add has-finished-processing-file-input check. return rt_active - def _save_image_to_coco(self, window: InputWindow) -> int: - """ - This will add an image to the output coco file - if you are not saving to a coco file, this will return -1 - """ - if self._results_collector: - # Prepare output message - activity_msg = ActivityDetection() - # set the only needed items for collection - if len(window.frames) > 0: - activity_msg.source_stamp_end_frame = window.frames[-1][0] - else: - self.get_logger().warn(f"window.frames: {window.frames}") - activity_msg.conf_vec = [0.0 for x in self._act_class_names] - gid = self._collect_image(activity_msg) - return gid - return -1 - def _window_criterion_correct_size(self, window: InputBuffer) -> bool: window_ok = len(window) == self._window_size if not window_ok: @@ -536,7 +518,6 @@ def _window_criterion_correct_size(self, window: InputBuffer) -> bool: f"Window is not the appropriate size " f"(actual:{len(window)} != {self._window_size}:expected)" ) - self._save_image_to_coco(window) return window_ok @@ -611,6 +592,9 @@ def rt_loop(self): have_leading_object=self._window_lead_with_objects, ) + window_end_frame = window.frames[-1][0] + image_gid = self._collect_image(window_end_frame) + # log.info(f"buffer contents: {window.obj_dets}") # if enable_time_trace_logging: @@ -632,7 +616,6 @@ def rt_loop(self): self._buffer.clear_before(time_to_int(window.frames[1][0])) # set this to None to signal if we saved the image or not - image_gid = None try: if enable_time_trace_logging: log.info( @@ -643,7 +626,6 @@ def rt_loop(self): act_msg = self._process_window(window) # log.info(f"activity message: {act_msg}") - image_gid = self._collect_image(act_msg) self._collect_results(act_msg, image_gid) # set the header right before publishing so that the time is after processing act_msg.header.frame_id = "Activity Classification" @@ -651,9 +633,6 @@ def rt_loop(self): self._activity_publisher.publish(act_msg) except NoActivityClassification: - # collect the image if we are saving to coco file - if self._results_collector and image_gid is None: - self._save_image_to_coco(window) # No ramifications, but don't publish activity message. log.warn( "Runtime loop window processing function did " @@ -802,7 +781,7 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: return activity_msg - def _collect_image(self, msg: ActivityDetection) -> int: + def _collect_image(self, end_frame_time) -> int: """ Collect into our ResultsCollector instance from the produced activity classification message if we were initialized to do that. @@ -817,7 +796,7 @@ def _collect_image(self, msg: ActivityDetection) -> int: # Use window end timestamp nanoseconds as the frame index. # When reading from an input COCO file, this aligns with the input # `image` `frame_index` attributes. - frame_index = time_to_int(msg.source_stamp_end_frame) + frame_index = time_to_int(end_frame_time) gid = rc.add_image( frame_index=frame_index, name=f"ros-frame-nsec-{frame_index}", From c1f7fbbb929b36d52b333dc031c79bbc321a2604 Mon Sep 17 00:00:00 2001 From: Joseph VanPelt <88556427+josephvanpeltkw@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:30:01 -0400 Subject: [PATCH 39/40] Update ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py Co-authored-by: Paul Tunison <735270+Purg@users.noreply.github.com> --- .../activity_classification/activity_classifier_tcn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index 9b105abed..e1260073a 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -615,7 +615,6 @@ def rt_loop(self): # out older data at and before the first item in the window. self._buffer.clear_before(time_to_int(window.frames[1][0])) - # set this to None to signal if we saved the image or not try: if enable_time_trace_logging: log.info( From db2820e4257aef142c238fa15cc9218292e224bc Mon Sep 17 00:00:00 2001 From: Joseph VanPelt <88556427+josephvanpeltkw@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:30:15 -0400 Subject: [PATCH 40/40] Update ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py Co-authored-by: Paul Tunison <735270+Purg@users.noreply.github.com> --- .../activity_classification/activity_classifier_tcn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py index e1260073a..2aac71f75 100644 --- a/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py +++ b/ros/angel_system_nodes/angel_system_nodes/activity_classification/activity_classifier_tcn.py @@ -780,7 +780,7 @@ def _process_window(self, window: InputWindow) -> ActivityDetection: return activity_msg - def _collect_image(self, end_frame_time) -> int: + def _collect_image(self, end_frame_time: Time) -> int: """ Collect into our ResultsCollector instance from the produced activity classification message if we were initialized to do that.