Skip to content

Commit

Permalink
Fix auto-resume training from checkpoint (huggingface#9822)
Browse files Browse the repository at this point in the history
* Fix auto-resume training from checkpoint

* style fixes
  • Loading branch information
jncasey authored and Qbiwan committed Jan 31, 2021
1 parent 32ec5a4 commit d447115
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions src/transformers/trainer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,19 @@ class TrainOutput(NamedTuple):


PREFIX_CHECKPOINT_DIR = "checkpoint"
_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d)+$")
_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d+)$")


def get_last_checkpoint(folder):
content = os.listdir(folder)
checkpoints = [path for path in content if _re_checkpoint.search(path) is not None and os.path.isdir(path)]
checkpoints = [
path
for path in content
if _re_checkpoint.search(path) is not None and os.path.isdir(os.path.join(folder, path))
]
if len(checkpoints) == 0:
return
return max(checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0]))
return os.path.join(folder, max(checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0])))


class EvaluationStrategy(ExplicitEnum):
Expand Down

0 comments on commit d447115

Please sign in to comment.