Skip to content

Commit

Permalink
reporting module passes
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Nov 22, 2024
1 parent b953b7e commit 3f43d49
Show file tree
Hide file tree
Showing 13 changed files with 221 additions and 98 deletions.
5 changes: 3 additions & 2 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ pipeline_evaluation:
image_dir:

reporting:
image_dir:
report_dir:

active_learning:
images_to_annotate_dir:
image_dir:
strategy: 'random'
n_images: 100
m: 10
Expand All @@ -77,7 +78,7 @@ active_learning:
pool_limit: null

active_testing:
images_to_annotate_dir:
image_dir:
strategy: 'random'
n_images: 100
m: 10
Expand Down
12 changes: 6 additions & 6 deletions src/active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dask.array as da
import pandas as pd

def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, patch_overlap=0.1, min_score=0.5, m=None, model_path=None, dask_client=None, target_labels=None, pool_limit=1000):
def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, patch_overlap=0.1, min_score=0.5, model=None, model_path=None, dask_client=None, target_labels=None, pool_limit=1000):
"""Choose images to annotate.
Args:
evaluation (dict): A dictionary of evaluation metrics.
Expand All @@ -19,7 +19,7 @@ def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, p
patch_size (int, optional): The size of the image patches to predict on. Defaults to 512.
patch_overlap (float, optional): The amount of overlap between image patches. Defaults to 0.1.
min_score (float, optional): The minimum score for a prediction to be included. Defaults to 0.5.
m (main.deepforest, optional): A trained deepforest model. Defaults to None.
model (main.deepforest, optional): A trained deepforest model. Defaults to None.
model_path (str, optional): The path to the model checkpoint file. Defaults to None. Only used in combination with dask
target_labels: (list, optional): A list of target labels to filter images by. Defaults to None.
pool_limit (int, optional): The maximum number of images to consider. Defaults to 1000.
Expand Down Expand Up @@ -68,7 +68,7 @@ def update_sys_path():
dask_results.append(pd.concat(block_result))
preannotations = pd.concat(dask_results)
else:
preannotations = detection.predict(m=m, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, min_score=min_score)
preannotations = detection.predict(model=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, min_score=min_score)
preannotations = pd.concat(preannotations)

if strategy == "most-detections":
Expand All @@ -86,7 +86,7 @@ def update_sys_path():

return chosen_images

def choose_test_images(image_dir, strategy, n=10, patch_size=512, patch_overlap=0.1, min_score=0.5, m=None, model_path=None, dask_client=None, target_labels=None, pool_limit=1000):
def choose_test_images(image_dir, strategy, n=10, patch_size=512, patch_overlap=0.1, min_score=0.5, model=None, model_path=None, dask_client=None, target_labels=None, pool_limit=1000):
"""Choose images to annotate.
Args:
evaluation (dict): A dictionary of evaluation metrics.
Expand All @@ -100,7 +100,7 @@ def choose_test_images(image_dir, strategy, n=10, patch_size=512, patch_overlap=
patch_size (int, optional): The size of the image patches to predict on. Defaults to 512.
patch_overlap (float, optional): The amount of overlap between image patches. Defaults to 0.1.
min_score (float, optional): The minimum score for a prediction to be included. Defaults to 0.5.
m (main.deepforest, optional): A trained deepforest model. Defaults to None.
model (main.deepforest, optional): A trained deepforest model. Defaults to None.
model_path (str, optional): The path to the model checkpoint file. Defaults to None. Only used in combination with dask
target_labels: (list, optional): A list of target labels to filter images by. Defaults to None.
pool_limit (int, optional): The maximum number of images to consider. Defaults to 1000.
Expand Down Expand Up @@ -149,7 +149,7 @@ def update_sys_path():
dask_results.append(pd.concat(block_result))
preannotations = pd.concat(dask_results)
else:
preannotations = detection.predict(m=m, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, min_score=min_score)
preannotations = detection.predict(model=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, min_score=min_score)
preannotations = pd.concat(preannotations)

if strategy == "most-detections":
Expand Down
12 changes: 12 additions & 0 deletions src/label_studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,18 @@ def gather_data(annotation_dir):

return df

def get_api_key():
"""Get Label Studio API key from config file"""
config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)),
'.label_studio.config')
if not os.path.exists(config_path):
return None

with open(config_path, 'r') as f:
for line in f:
if line.startswith('api_key'):
return line.split('=')[1].strip()
return None

def connect_to_label_studio(url, project_name, label_config=None):
"""Connect to the Label Studio server.
Expand Down
30 changes: 21 additions & 9 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@


class Pipeline:
"""Pipeline for training and evaluating a detection and classification model"""
def __init__(self, cfg: DictConfig):
"""Initialize the pipeline with optional configuration"""
self.config = cfg
Expand Down Expand Up @@ -60,14 +61,19 @@ def run(self):
self.config.classification_model.checkpoint_dir)

pipeline_monitor = PipelineEvaluation(
model=trained_detection_model, **self.config.pipeline_evaluation)
model=trained_detection_model,
crop_model=trained_classification_model,
**self.config.pipeline_evaluation)

performance = pipeline_monitor.evaluate()

reporting = Reporting(self.config.reporting.report_dir)
reporting.generate_reports(pipeline_monitor)
reporter = Reporting(self.config.reporting.report_dir,
self.config.reporting.image_dir,
pipeline_monitor)

if pipeline_monitor.check_success():
print("Pipeline performance is satisfactory, exiting")
reporter.generate_report()
return None
else:
train_images_to_annotate = choose_train_images(
Expand All @@ -86,26 +92,32 @@ def run(self):
combined_predictions = pd.concat(predictions)

# Split predictions into confident and uncertain
confident_predictions = combined_predictions[
combined_predictions["score"] >
self.config.active_learning.confident_threshold]
uncertain_predictions = combined_predictions[
combined_predictions["score"] <=
self.config.active_learning.confident_threshold]

confident_predictions = combined_predictions[
~combined_predictions["image_path"].isin(
uncertain_predictions["image_path"])]

reporter.confident_predictions = confident_predictions
reporter.uncertain_predictions = uncertain_predictions

print(f"Images requiring human review: {len(confident_predictions)}")
print(f"Images auto-annotated: {len(uncertain_predictions)}")

# Intelligent cropping
image_paths = uncertain_predictions["image_path"].unique()
cropped_image_annotations = density_cropping(
image_paths, uncertain_predictions, **self.config.intelligent_cropping)
# cropped_image_annotations = density_cropping(
# image_paths, uncertain_predictions, **self.config.intelligent_cropping)

# Align the predictions with the cropped images
# Run the annotation pipeline
label_studio.upload_to_label_studio(self.sftp_client,
cropped_image_annotations,
uncertain_predictions,
**self.config)
label_studio.upload_to_label_studio(self.sftp_client,
test_images_to_annotate,
**self.config)
reporter.generate_report()

2 changes: 1 addition & 1 deletion src/pipeline_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def evaluate(self):
confident_classification_results = self.evaluate_confident_classification()
uncertain_classification_results = self.evaluate_uncertain_classification()

self.results = {"detection": detection_results, "confident_classficiation":confident_classification_results, "uncertain_classification":uncertain_classification_results}
self.results = {"detection": detection_results, "confident_classification":confident_classification_results, "uncertain_classification":uncertain_classification_results}

return self.results

Expand Down
75 changes: 53 additions & 22 deletions src/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,48 @@
import os
from datetime import datetime
from src.visualization import PredictionVisualizer

class Reporting:
def __init__(self, report_dir, pipeline_monitor):
"""Initialize reporting class"""
def __init__(self, report_dir, image_dir, pipeline_monitor):
"""Initialize reporting class
Args:
report_dir: Directory to save reports
image_dir: Directory containing images to create video from
pipeline_monitor: PipelineEvaluation instance containing model performance metrics
"""

self.report_dir = report_dir
self.report_file = f"{report_dir}/report.csv"
self.image_dir = image_dir
self.pipeline_monitor = pipeline_monitor
self.all_predictions = pd.concat(self.pipeline_monitor.predictions)

def generate_report(self):
"""Generate a report"""
self.write_predictions()
self.write_metrics()
self.generate_video()

def write_predictions(self, predictions):
def write_predictions(self):
"""Write predictions to a csv file"""
all_predictions = pd.concat(self.pipeline_monitor.predictions)
all_predictions.to_csv(f"{self.report_dir}/predictions.csv", index=False)

self.all_predictions.to_csv(f"{self.report_dir}/predictions.csv", index=False)

return f"{self.report_dir}/predictions.csv"
def get_coco_datasets(self):
"""Get coco datasets"""
self.pipeline_monitor.mAP.get_coco_datasets()

def generate_video(self):
"""Generate a video from the predictions"""
visualizer = PredictionVisualizer()
visualizer.create_video(
predictions_list=self.pipeline_monitor.predictions,
output_path=f"{self.report_dir}/predictions.mp4"
)
visualizer = PredictionVisualizer(self.all_predictions, self.report_dir)
output_path = f"{self.report_dir}/predictions.mp4"
images = self.all_predictions['image_path'].unique()
images = [os.path.join(self.image_dir, image) for image in images]

output_path = visualizer.create_visualization(images=images)

return output_path

def write_metrics(self):
"""Write metrics to a csv file
Expand All @@ -39,26 +59,35 @@ def write_metrics(self):

# Extract key metrics
detection_map = performance['detection']['mAP']['map']
confident_acc = performance['confident_classification']['accuracy']
uncertain_acc = performance['uncertain_classification']['accuracy']
confident_acc = performance['confident_classification']["confident_classification_accuracy"]
uncertain_acc = performance['uncertain_classification']["uncertain_classification_accuracy"]

# Get annotation counts and completion rate
human_reviewed_images = len(self.all_predictions['image_path'].unique())
total_images = len(os.listdir(self.image_dir))
completion_rate = human_reviewed_images / total_images
total_annotations = self.all_predictions.shape[0]

try:
confident_annotations = self.pipeline_monitor.confident_predictions.shape[0]
except:
confident_annotations = 0
try:
uncertain_annotations = self.pipeline_monitor.uncertain_predictions.shape[0]
except:
uncertain_annotations = 0

# Get annotation counts
total_annotations = len(performance['detection']['annotations'])
confident_annotations = len(performance['confident_classification']['annotations'])
uncertain_annotations = len(performance['uncertain_classification']['annotations'])

# Calculate completion rate
completion_rate = (confident_annotations + uncertain_annotations) / total_annotations if total_annotations > 0 else 0

# Create report row
report_data = {
'timestamp': timestamp,
'model_name': self.pipeline_monitor.model.__class__.__name__,
'completion_rate': completion_rate,
'total_annotations': total_annotations,
'confident_annotations': confident_annotations,
'uncertain_annotations': uncertain_annotations,
'detection_map': detection_map,
'human_reviewed_images': human_reviewed_images,
'total_images': total_images,
'completion_rate': completion_rate,
'confident_classification_accuracy': confident_acc,
'uncertain_classification_accuracy': uncertain_acc
}
Expand All @@ -72,3 +101,5 @@ def write_metrics(self):

# Save updated reports
df.to_csv(self.report_file, index=False)

return f"{self.report_dir}/report.csv"
Loading

0 comments on commit 3f43d49

Please sign in to comment.