AtomScott · AtomScott · Oct 9, 2022 · Oct 9, 2022
diff --git a/notebooks/02_user_guide/dataset_preparation.ipynb b/notebooks/02_user_guide/dataset_preparation.ipynb
@@ -1241,6 +1241,34 @@
     "\n",
     "show_df(df.head())"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `get_path` method returns the path to the downloaded dataset. A usage example is shown below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from soccertrack.datasets import get_path\n",
+    "\n",
+    "# show the available datasets\n",
+    "get_path()\n",
+    "\n",
+    "# show the path to the dataset\n",
+    "wide_view_path = get_path('wide-view')\n",
+    "print(wide_view_path)\n",
+    "\n",
+    "# get a csv-mp4 pair from the wide-view dataset\n",
+    "wide_view_csv = get_path('wide-view/F_20200220_1_0000_0030.csv')\n",
+    "wide_view_mp4 = get_path('wide-view/F_20200220_1_0000_0030.mp4')\n",
+    "print(wide_view_csv, wide_view_mp4)"
+   ]
   }
  ],
  "metadata": {

diff --git a/soccertrack/datasets/__init__.py b/soccertrack/datasets/__init__.py
@@ -1,40 +1,60 @@
 import os
 
 from soccertrack.logging import logger
-
+from typing import Optional
+from pathlib import Path
 from .downloader import KaggleDownloader
 
 __all__ = ["available", "get_path", "KaggleDownloader"]
 
-_module_path = os.path.dirname(__file__)
-_available_dir = [p for p in next(os.walk(_module_path))[1] if not p.startswith("__")]
-_available_csv = {"soccertrack sample": "soccertrack_sample.csv"}
-_available_mp4 = {
-    "soccertrack sample": "https://drive.google.com/file/d/1Vxc1NXwLiD3T6cqmlbjgjr-9umDty5Va/view?usp=sharing"
+_module_path = Path(__file__).parent
+_available_dir = {
+    'GNSS': _module_path / 'GNSS',
+    'top-view': _module_path / 'top-view',
+    'wide-view': _module_path / 'wide-view',
+}
+
+for d, path in _available_dir.items():
+    if not path.exists():
+        _available_dir.pop(d)
+
+_available_files = {
+    'drone_keypoints': _module_path / 'drone_keypoints.json',
+    'fisheye_keypoints': _module_path / 'fisheye_keypoints.json',
+    'gnss_keypoints': _module_path / 'gnss_keypoints.json'
 }
-available = _available_dir + list(_available_csv.keys())
 
+for d, path in _available_files.items():
+    if not path.exists:
+        _available_files.pop(d), print(f"Dataset {d} not available")
 
-def get_path(dataset: str, type: str = "csv") -> str:
+available = list(_available_dir.keys()) + list(_available_files.keys())
+
+def get_path(dataset:Optional[str]=None) -> str:
     """Get the path to the data file.
 
     Args:
-        dataset (str): Name of the dataset. See `soccertrack.datasets.available` for all options.
-        dataset_type (str): Type of the dataset. Either 'csv' or 'mp4'.
+        dataset (str): Name of the dataset. If None, print the available datasets.
 
     Returns:
         str: Path to the data file.
     """
-    if type == "csv":
-        if dataset in _available_csv:
-            fpath = os.path.abspath(os.path.join(_module_path, _available_csv[dataset]))
-            return fpath
-    if type == "mp4":
-        if dataset in _available_mp4:
-            fpath = _available_mp4[dataset]
-            logger.info(f"Download the dataset from {fpath}")
-            return fpath
-
+
+    if dataset is None:
+        print("Available keys:")
+        for d in available: 
+            print(f" - {d}")
+        return
+
+    if dataset in _available_dir:
+        return _available_dir[dataset]
+
+    if dataset.split('/')[0] in _available_dir:
+        ret_path = _available_dir[dataset.split('/')[0]] / dataset.split('/')[1]
+        assert ret_path.exists(), f"File {ret_path} not available"
+        return ret_path
+
     msg = f"The dataset '{dataset}' is not available. "
     msg += f"Available datasets are {', '.join(available)}"
     raise ValueError(msg)
+