Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for fsspec paths for CSVLoggers #16880

Merged
merged 13 commits into from
Mar 2, 2023
2 changes: 2 additions & 0 deletions src/lightning/fabric/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added support for automatically calling `set_epoch` on the `dataloader.batch_sampler.sampler` ([#16841](https://github.com/Lightning-AI/lightning/pull/16841))


- Added support for writing logs to remote file systems with the `CSVLogger` ([#16880](https://github.com/Lightning-AI/lightning/pull/16880))

### Changed

- Fabric now chooses `accelerator="auto", strategy="auto", devices="auto"` as defaults ([#16842](https://github.com/Lightning-AI/lightning/pull/16842))
Expand Down
21 changes: 13 additions & 8 deletions src/lightning/fabric/loggers/csv_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from torch import Tensor

from lightning.fabric.loggers.logger import Logger, rank_zero_experiment
from lightning.fabric.utilities.cloud_io import get_filesystem
from lightning.fabric.utilities.logger import _add_prefix
from lightning.fabric.utilities.rank_zero import rank_zero_only, rank_zero_warn
from lightning.fabric.utilities.types import _PATH
Expand Down Expand Up @@ -62,10 +63,12 @@ def __init__(
flush_logs_every_n_steps: int = 100,
):
super().__init__()
self._root_dir = os.fspath(root_dir)
root_dir = os.fspath(root_dir)
self._root_dir = root_dir
self._name = name or ""
self._version = version
self._prefix = prefix
self._fs = get_filesystem(root_dir)
self._experiment: Optional[_ExperimentWriter] = None
self._flush_logs_every_n_steps = flush_logs_every_n_steps

Expand Down Expand Up @@ -150,14 +153,15 @@ def finalize(self, status: str) -> None:
def _get_next_version(self) -> int:
root_dir = self.root_dir

if not os.path.isdir(root_dir):
if not self._fs.isdir(root_dir):
carmocca marked this conversation as resolved.
Show resolved Hide resolved
log.warning("Missing logger folder: %s", root_dir)
return 0

existing_versions = []
for d in os.listdir(root_dir):
if os.path.isdir(os.path.join(root_dir, d)) and d.startswith("version_"):
existing_versions.append(int(d.split("_")[1]))
for d in self._fs.listdir(root_dir, detail=False):
name = d[len(root_dir) + 1 :] # removes parent directories
if self._fs.isdir(d) and name.startswith("version_"):
existing_versions.append(int(name.split("_")[1]))

if len(existing_versions) == 0:
return 0
Expand All @@ -178,13 +182,14 @@ class _ExperimentWriter:
def __init__(self, log_dir: str) -> None:
self.metrics: List[Dict[str, float]] = []

self._fs = get_filesystem(log_dir)
self.log_dir = log_dir
if os.path.exists(self.log_dir) and os.listdir(self.log_dir):
if self._fs.exists(self.log_dir) and self._fs.listdir(self.log_dir):
rank_zero_warn(
f"Experiment logs directory {self.log_dir} exists and is not empty."
" Previous log files in this directory will be deleted when the new ones are saved!"
)
os.makedirs(self.log_dir, exist_ok=True)
self._fs.makedirs(self.log_dir, exist_ok=True)

self.metrics_file_path = os.path.join(self.log_dir, self.NAME_METRICS_FILE)

Expand Down Expand Up @@ -213,7 +218,7 @@ def save(self) -> None:
last_m.update(m)
metrics_keys = list(last_m.keys())

with open(self.metrics_file_path, "w", newline="") as f:
with self._fs.open(self.metrics_file_path, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=metrics_keys)
writer.writeheader()
writer.writerows(self.metrics)
4 changes: 3 additions & 1 deletion src/lightning/pytorch/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added a `Trainer(barebones=True)` argument where all features that may impact raw speed are disabled ([#16854](https://github.com/Lightning-AI/lightning/pull/16854))


- Added `DDPStrategy(start_method=...)` argument, defaulting to 'popen' ([#16809](https://github.com/Lightning-AI/lightning/pull/16809))
- Added support for writing logs remote file systems on `CSVLoggers`. ([#16880](https://github.com/Lightning-AI/lightning/pull/16880))


- Added `DDPStrategy(start_method=...)` argument, defaulting to 'popen' ([#16809](https://github.com/Lightning-AI/lightning/pull/16809))

### Changed

Expand Down
2 changes: 1 addition & 1 deletion src/lightning/pytorch/loggers/csv_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,6 @@ def experiment(self) -> _FabricExperimentWriter:
if self._experiment is not None:
return self._experiment

os.makedirs(self.root_dir, exist_ok=True)
self._fs.makedirs(self.root_dir, exist_ok=True)
self._experiment = ExperimentWriter(log_dir=self.log_dir)
return self._experiment
11 changes: 11 additions & 0 deletions tests/tests_pytorch/loggers/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
from unittest.mock import MagicMock

import fsspec
import pytest
import torch

Expand Down Expand Up @@ -119,6 +120,16 @@ def test_fit_csv_logger(tmpdir):
assert os.path.isfile(metrics_file)


def test_csv_logger_remotefs():
logger = CSVLogger(save_dir="memory://test_fit_csv_logger_remotefs")
fs, _ = fsspec.core.url_to_fs("memory://test_fit_csv_logger_remotefs")
exp = logger.experiment
exp.log_metrics({"loss": 0.1})
exp.save()
metrics_file = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE)
assert fs.isfile(metrics_file)


def test_flush_n_steps(tmpdir):
logger = CSVLogger(tmpdir, flush_logs_every_n_steps=2)
metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)}
Expand Down