Skip to content

Commit

Permalink
Use stable functions (#9369)
Browse files Browse the repository at this point in the history
  • Loading branch information
jplu authored Jan 5, 2021
1 parent 4aa8f6a commit 4225740
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 26 deletions.
6 changes: 3 additions & 3 deletions src/transformers/benchmark/benchmark_args_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,15 @@ def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", "tf.distribute.clus
tf.config.experimental_connect_to_cluster(self._setup_tpu)
tf.tpu.experimental.initialize_tpu_system(self._setup_tpu)

strategy = tf.distribute.experimental.TPUStrategy(self._setup_tpu)
strategy = tf.distribute.TPUStrategy(self._setup_tpu)
else:
# currently no multi gpu is allowed
if self.is_gpu:
# TODO: Currently only single GPU is supported
tf.config.experimental.set_visible_devices(self.gpu_list[self.device_idx], "GPU")
tf.config.set_visible_devices(self.gpu_list[self.device_idx], "GPU")
strategy = tf.distribute.OneDeviceStrategy(device=f"/gpu:{self.device_idx}")
else:
tf.config.experimental.set_visible_devices([], "GPU") # disable GPU
tf.config.set_visible_devices([], "GPU") # disable GPU
strategy = tf.distribute.OneDeviceStrategy(device=f"/cpu:{self.device_idx}")

return strategy
Expand Down
12 changes: 3 additions & 9 deletions src/transformers/trainer_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

import numpy as np
import tensorflow as tf
from packaging.version import parse
from tensorflow.python.distribute.values import PerReplica

from .modeling_tf_utils import TFPreTrainedModel
Expand Down Expand Up @@ -93,11 +92,6 @@ def __init__(
None,
),
):
assert parse(tf.__version__).release >= (2, 2, 0), (
"You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r "
% tf.__version__
)

self.model = model
self.args = args
self.train_dataset = train_dataset
Expand Down Expand Up @@ -141,7 +135,7 @@ def get_train_tfdataset(self) -> tf.data.Dataset:
raise ValueError("Trainer: training requires a train_dataset.")

self.total_train_batch_size = self.args.train_batch_size * self.args.gradient_accumulation_steps
self.num_train_examples = tf.data.experimental.cardinality(self.train_dataset).numpy()
self.num_train_examples = self.train_dataset.cardinality(self.train_dataset).numpy()

if self.num_train_examples < 0:
raise ValueError("The training dataset must have an asserted cardinality")
Expand Down Expand Up @@ -173,7 +167,7 @@ def get_eval_tfdataset(self, eval_dataset: Optional[tf.data.Dataset] = None) ->
raise ValueError("Trainer: evaluation requires an eval_dataset.")

eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
num_examples = tf.data.experimental.cardinality(eval_dataset).numpy()
num_examples = eval_dataset.cardinality(eval_dataset).numpy()

if num_examples < 0:
raise ValueError("The training dataset must have an asserted cardinality")
Expand Down Expand Up @@ -203,7 +197,7 @@ def get_test_tfdataset(self, test_dataset: tf.data.Dataset) -> tf.data.Dataset:
Subclass and override this method if you want to inject some custom behavior.
"""

num_examples = tf.data.experimental.cardinality(test_dataset).numpy()
num_examples = test_dataset.cardinality(test_dataset).numpy()

if num_examples < 0:
raise ValueError("The training dataset must have an asserted cardinality")
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/training_args_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def _setup_strategy(self) -> Tuple["tf.distribute.Strategy", int]:
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

strategy = tf.distribute.experimental.TPUStrategy(tpu)
strategy = tf.distribute.TPUStrategy(tpu)

elif len(gpus) == 0:
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
Expand Down
6 changes: 3 additions & 3 deletions tests/test_modeling_tf_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@
for gpu in gpus:
# Restrict TensorFlow to only allocate x GB of memory on the GPUs
try:
tf.config.experimental.set_virtual_device_configuration(
gpu, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
tf.config.set_logical_device_configuration(
gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]
)
logical_gpus = tf.config.experimental.list_logical_devices("GPU")
logical_gpus = tf.config.list_logical_devices("GPU")
print("Logical GPUs", logical_gpus)
except RuntimeError as e:
# Virtual devices must be set before GPUs have been initialized
Expand Down
12 changes: 2 additions & 10 deletions tests/test_optimization_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

import unittest

from packaging import version

from transformers import is_tf_available
from transformers.testing_utils import require_tf

Expand Down Expand Up @@ -78,18 +76,12 @@ def accumulate(grad1, grad2):
local_variables = strategy.experimental_local_results(gradient_placeholder)
local_variables[0].assign(grad1)
local_variables[1].assign(grad2)
if version.parse(tf.version.VERSION) >= version.parse("2.2"):
strategy.run(accumulate_on_replica, args=(gradient_placeholder,))
else:
strategy.experimental_run_v2(accumulate_on_replica, args=(gradient_placeholder,))
strategy.run(accumulate_on_replica, args=(gradient_placeholder,))

@tf.function
def apply_grad():
with strategy.scope():
if version.parse(tf.version.VERSION) >= version.parse("2.2"):
strategy.run(apply_on_replica)
else:
strategy.experimental_run_v2(apply_on_replica)
strategy.run(apply_on_replica)

def _check_local_values(grad1, grad2):
values = strategy.experimental_local_results(accumulator._gradients[0])
Expand Down

0 comments on commit 4225740

Please sign in to comment.