diff --git a/README.md b/README.md index 9286445..eac93a8 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ geo_inference = GeoInference( # Perform feature extraction on a TIFF image image_path = "/path/to/image.tif" -bands_requested = [1,2,3] +bands_requested = "1,2,3" patch_size = 1024 workers = 0 patch_size = 512 diff --git a/geo_inference/geo_inference.py b/geo_inference/geo_inference.py index 0671a3b..20d0eeb 100644 --- a/geo_inference/geo_inference.py +++ b/geo_inference/geo_inference.py @@ -92,9 +92,8 @@ def __init__( transformer_rotate: bool = False, ): self.work_dir: Path = get_directory(work_dir) - self.device = ( - device if device == "cpu" else select_model_device(gpu_id, multi_gpu) - ) + self.device = select_model_device(gpu_id, multi_gpu, device) + self.model = torch.jit.load( get_model( model_path_or_url=model, diff --git a/geo_inference/utils/helpers.py b/geo_inference/utils/helpers.py index 070ed84..2d8c499 100644 --- a/geo_inference/utils/helpers.py +++ b/geo_inference/utils/helpers.py @@ -196,9 +196,56 @@ def get_model(model_path_or_url: str, work_dir: Path) -> Path: raise ValueError("Invalid model path") -def select_model_device(gpu_id: int, multi_gpu: bool): - device = "cpu" - if torch.cuda.is_available(): +def select_model_device(gpu_id: int, multi_gpu: bool, device: str="cpu"): + """ + Selects an appropriate GPU device based on memory usage and GPU utilization. + + The function checks if a GPU is available using `torch.cuda.is_available()` + and then evaluates either a single GPU or multiple GPUs based on the provided `multi_gpu` flag. + It analyzes memory and utilization for each available GPU, and selects a device that has memory + and utilization usage below a specific threshold. + + Parameters: + ----------- + multi_gpu : bool + If True, checks multiple GPUs and selects one with suitable memory and utilization stats. + gpu_id : int + The index of the GPU to evaluate when not in multi-GPU mode. + device : str + The device string representing the current device (e.g., "cpu" or "cuda:X"). + + Returns: + -------- + device : str + The updated device string, specifying which GPU to use (e.g., "cuda:0", "cuda:1", etc.), + or retains the original device (e.g., "cpu") if no suitable GPU is found. + + Logic: + ------ + 1. If a GPU is available and the device is not set to "cpu": + - **Single GPU Mode (multi_gpu=False)**: + - Checks the specified `gpu_id`'s memory and utilization. + - If the memory usage is below 70% and GPU utilization is below 70%, sets the device to the appropriate GPU. + - **Multi-GPU Mode (multi_gpu=True)**: + - Iterates over all available GPUs. + - For each GPU, checks memory usage and utilization. + - Selects the first GPU that has memory and utilization below the 70% threshold. + 2. If no GPU meets the criteria, retains the current device (usually "cpu"). + + Note: + ----- + - Memory is calculated as the difference between total and available memory (via `torch.cuda.mem_get_info`). + - Both memory and GPU utilization thresholds are set to 70%. + - The GPU utilization is retrieved via `torch.cuda.utilization()`. + + Exceptions: + ----------- + - Assumes that GPU-related PyTorch functions like `torch.cuda.utilization()` and `torch.cuda.mem_get_info()` are available and accessible. + - GPU-related functions will fail if run in an environment without CUDA support. + + """ + + if torch.cuda.is_available() and device != "cpu": if not multi_gpu: res = {"gpu": torch.cuda.utilization(gpu_id)} torch_cuda_mem = torch.cuda.mem_get_info(gpu_id) diff --git a/geo_inference/utils/polygon.py b/geo_inference/utils/polygon.py index 761996b..8a7a481 100644 --- a/geo_inference/utils/polygon.py +++ b/geo_inference/utils/polygon.py @@ -114,6 +114,10 @@ def gdf_to_yolo(geojson_path="", mask_path="", output_path="", column='value', except pyogrio.errors.DataSourceError as e: logger.error(f"Error reading GeoJSON file: {geojson_path}: {e}") return + + if len(gdf) == 0: + logger.info(f"No vector to write to yolo file.") + return [x0, y0, x1, y1] = [0, 0, im_size[0], im_size[1]] out_coords = [[x0, y0], [x0, y1], [x1, y1], [x1, y0]] @@ -196,6 +200,11 @@ def geojson2coco(image_src, label_src, output_path=None, category_attribute="val except pyogrio.errors.DataSourceError as e: logger.error(f"Error reading GeoJSON file: {label_src}: {e}") return + + if len(curr_gdf) == 0: + logger.info(f"No vector to write to coco file.") + return + curr_gdf['label_fname'] = label_src curr_gdf['image_fname'] = '' curr_gdf['image_id'] = 1 diff --git a/pyproject.toml b/pyproject.toml index 4ad8360..9e1851b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ version = {file = ["VERSION"]} dev = ["black", "bumpver", "isort", "pip-tools", "pytest"] [project.urls] -Homepage = "https://github.com/valhassan/geo-inference" +Homepage = "https://github.com/NRCan/geo-inference" [project.scripts] geo_inference = "geo_inference.geo_inference:main" diff --git a/tests/data/inference/test_model/cpu_scripted.pt b/tests/data/inference/test_model/cpu_scripted.pt new file mode 100644 index 0000000..1f0ddae Binary files /dev/null and b/tests/data/inference/test_model/cpu_scripted.pt differ diff --git a/tests/data/inference/test_model/gpu_scripted.pt b/tests/data/inference/test_model/gpu_scripted.pt new file mode 100644 index 0000000..b4a17fa Binary files /dev/null and b/tests/data/inference/test_model/gpu_scripted.pt differ diff --git a/tests/data/inference/test_model/test_model.pt b/tests/data/inference/test_model/test_model.pt deleted file mode 100644 index 3c22d04..0000000 Binary files a/tests/data/inference/test_model/test_model.pt and /dev/null differ diff --git a/tests/test_geo_inference.py b/tests/test_geo_inference.py index 758e64e..b7aa72e 100644 --- a/tests/test_geo_inference.py +++ b/tests/test_geo_inference.py @@ -5,49 +5,71 @@ from geo_inference.geo_inference import GeoInference from pathlib import Path + @pytest.fixture def test_data_dir(): return Path(__file__).parent / "data" + class TestGeoInference: - + @pytest.fixture def geo_inference(self, test_data_dir): - model = str(test_data_dir / "inference"/ "test_model" / "test_model.pt") + model = str(test_data_dir / "inference" / "test_model" / "cpu_scripted.pt") work_dir = str(test_data_dir / "inference") mask_to_vec = True mask_to_yolo = True mask_to_coco = True - device = 'cpu' + device = "cpu" gpu_id = 0 - return GeoInference(model, work_dir, mask_to_vec, mask_to_yolo, mask_to_coco, device, gpu_id) + num_classes = 5 + prediction_threshold = 0.3 + transformer = True + transform_flip = True + transform_rotate = True + return GeoInference( + model=model, + work_dir=work_dir, + mask_to_vec=mask_to_vec, + mask_to_yolo=mask_to_yolo, + mask_to_coco=mask_to_coco, + device=device, + gpu_id=gpu_id, + multi_gpu=False, + num_classes=num_classes, + prediction_threshold=prediction_threshold, + transformers=transformer, + transformer_flip=transform_flip, + transformer_rotate=transform_rotate, + ) def test_init(self, geo_inference, test_data_dir): assert geo_inference.work_dir == test_data_dir / "inference" - assert geo_inference.device == 'cpu' + assert geo_inference.device == "cpu" assert geo_inference.mask_to_vec == True assert geo_inference.mask_to_yolo == True assert geo_inference.mask_to_coco == True - assert isinstance(geo_inference.model, torch.jit.ScriptModule) - assert geo_inference.classes >0 + assert isinstance(geo_inference.model.model, torch.jit.ScriptModule) + assert geo_inference.classes > 0 - def test_call(self, geo_inference, test_data_dir): - tiff_image = test_data_dir / '0.tif' + def test_call(self, geo_inference: GeoInference, test_data_dir: Path): + tiff_image = test_data_dir / "0.tif" # bbox = '0,0,100,100' + bbox = None patch_size = 512 - bands_requested="1,2,3" - geo_inference(str(tiff_image), bands_requested, patch_size, None) - mask_path = geo_inference.work_dir / "0_mask.tif" + bands_requested = "1,2,3" + workers = 10 + mask_name = geo_inference( + inference_input=str(tiff_image), + bands_requested=bands_requested, + patch_size=patch_size, + workers=workers, + bbox=bbox, + ) + mask_path = geo_inference.work_dir / mask_name assert mask_path.exists() - if geo_inference.mask_to_vec: - polygons_path = geo_inference.work_dir / "0_polygons.geojson" - yolo_csv_path = geo_inference.work_dir / "0_yolo.csv" - coco_path = geo_inference.work_dir / "0_coco.json" - assert polygons_path.exists() - assert yolo_csv_path.exists() - assert coco_path.exists() - os.remove(polygons_path) - os.remove(yolo_csv_path) - os.remove(coco_path) - os.remove(mask_path) \ No newline at end of file + polygons_path = geo_inference.work_dir / "0_polygons.geojson" + assert polygons_path.exists() + os.remove(polygons_path) + os.remove(mask_path) diff --git a/tests/utils/test_helpers.py b/tests/utils/test_helpers.py index 4bf6b30..958ecef 100644 --- a/tests/utils/test_helpers.py +++ b/tests/utils/test_helpers.py @@ -80,7 +80,7 @@ def test_validate_asset_type(test_data_dir): reopened_dataset = validate_asset_type(dataset) assert reopened_dataset.name == dataset.name assert not reopened_dataset.closed - assert validate_asset_type(local_tiff_path).name == local_tiff_path + assert Path(validate_asset_type(local_tiff_path).name) == Path(local_tiff_path) def test_calculate_gpu_stats(): with patch('torch.cuda.utilization', return_value=50), patch('torch.cuda.mem_get_info', return_value=(500, 1000)): @@ -105,22 +105,22 @@ def test_get_device(): with patch('geo_inference.utils.helpers.calculate_gpu_stats') as mock_calculate_gpu_stats: mock_calculate_gpu_stats.return_value = ({"gpu": 10}, {"used": 100, "total": 1024}) device = select_model_device(gpu_id=1, multi_gpu=False) - assert device == 'cpu' + assert device == "cpu" def test_get_directory(): with patch('pathlib.Path.is_dir', return_value=False), patch('pathlib.Path.mkdir'): assert get_directory('test') == Path('test') def test_get_model_local_file(test_data_dir): - model_file = test_data_dir / "inference" / "test_model" / "test_model.pt" + model_file = test_data_dir / "inference" / "test_model" / "cpu_scripted.pt" model_path = get_model(str(model_file), test_data_dir) assert model_path == model_file @patch('geo_inference.utils.helpers.download_file_from_url') def test_get_model_url(mock_download_file_from_url, test_data_dir): mock_download_file_from_url.return_value = None - model_path = get_model("https://example.com/test_model.pt", test_data_dir) - assert model_path == test_data_dir / "test_model.pt" + model_path = get_model("https://example.com/cpu_scripted.pt", test_data_dir) + assert model_path == test_data_dir / "cpu_scripted.pt" def test_get_model_file_not_exists(test_data_dir): with pytest.raises(ValueError):