Skip to content

Commit

Permalink
change interface for optimizer. (#137)
Browse files Browse the repository at this point in the history
  • Loading branch information
ppwwyyxx committed Feb 12, 2017
1 parent e0b1a5c commit d1041a7
Show file tree
Hide file tree
Showing 37 changed files with 151 additions and 158 deletions.
18 changes: 10 additions & 8 deletions examples/A3C-Gym/train-atari.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from tensorpack.utils.serialize import *
from tensorpack.utils.stats import *
from tensorpack.tfutils import symbolic_functions as symbf
from tensorpack.tfutils.gradproc import MapGradient, SummaryGradient

from tensorpack.RL import *
from simulator import *
Expand Down Expand Up @@ -132,9 +133,14 @@ def _build_graph(self, inputs):
summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage, self.cost)

def get_gradient_processor(self):
return [gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
gradproc.SummaryGradient()]
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)

gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()]
opt = optimizer.apply_grad_processors(opt, gradprocs)
return opt


class MySimulatorMaster(SimulatorMaster, Callback):
Expand Down Expand Up @@ -202,11 +208,8 @@ def get_config():

master = MySimulatorMaster(namec2s, names2c, M)
dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
return TrainConfig(
dataflow=dataflow,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]),
Expand Down Expand Up @@ -269,8 +272,7 @@ def get_config():
logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
nr_gpu = 0
PREDICTOR_THREAD = 1
predict_tower = [0]
train_tower = [0]
predict_tower, train_tower = [0], [0]
trainer = QueueInputTrainer
config = get_config()
if args.load:
Expand Down
15 changes: 6 additions & 9 deletions examples/CTC-TIMIT/train-timit.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from six.moves import map, range

from tensorpack import *
from tensorpack.tfutils.gradproc import *
from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip
from tensorpack.utils.globvars import globalns as param
import tensorpack.tfutils.symbolic_functions as symbf
from timitdata import TIMITBatch
Expand Down Expand Up @@ -73,8 +73,11 @@ def _build_graph(self, inputs):
err = tf.reduce_mean(err, name='error')
summary.add_moving_summary(err, self.cost)

def get_gradient_processor(self):
return [gradproc.GlobalNormClip(5), gradproc.SummaryGradient()]
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 5e-3, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [GlobalNormClip(5), SummaryGradient()])


def get_data(path, isTrain, stat_file):
Expand All @@ -88,13 +91,8 @@ def get_data(path, isTrain, stat_file):


def get_config(ds_train, ds_test):
steps_per_epoch = ds_train.size()

lr = symbolic_functions.get_scalar_var('learning_rate', 5e-3, summary=True)

return TrainConfig(
dataflow=ds_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
StatMonitorParamSetter('learning_rate', 'error',
Expand All @@ -105,7 +103,6 @@ def get_config(ds_train, ds_test):
every_k_epochs=2),
],
model=Model(),
steps_per_epoch=steps_per_epoch,
max_epoch=70,
)

Expand Down
13 changes: 6 additions & 7 deletions examples/Char-RNN/char-rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from six.moves import map, range

from tensorpack import *
from tensorpack.tfutils.gradproc import *
from tensorpack.tfutils.gradproc import GlobalNormClip
from tensorpack.utils.lut import LookUpTable
from tensorpack.utils.globvars import globalns as param
rnn = tf.contrib.rnn
Expand Down Expand Up @@ -42,7 +42,7 @@ def __init__(self, input_file, size):
data = f.read()
if six.PY2:
data = bytearray(data)
data = [chr(c) for c in data if c < 128] # TODO this is Py2 only
data = [chr(c) for c in data if c < 128]
counter = Counter(data)
char_cnt = sorted(counter.items(), key=operator.itemgetter(1), reverse=True)
self.chars = [x[0] for x in char_cnt]
Expand Down Expand Up @@ -105,8 +105,10 @@ def get_v(n):
summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W
summary.add_moving_summary(self.cost)

def get_gradient_processor(self):
return [gradproc.GlobalNormClip(5)]
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True)
opt = tf.train.AdamOptimizer(lr)
return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])


def get_config():
Expand All @@ -116,11 +118,8 @@ def get_config():
ds = BatchData(ds, param.batch_size)
steps_per_epoch = ds.size()

lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True)

return TrainConfig(
dataflow=ds,
optimizer=tf.train.AdamOptimizer(lr),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(25, 2e-4)])
Expand Down
13 changes: 6 additions & 7 deletions examples/DeepQNetwork/DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,11 @@ def update_target_param(self):
ops.append(v.assign(tf.get_default_graph().get_tensor_by_name(new_name + ':0')))
return tf.group(*ops, name='update_target_network')

def get_gradient_processor(self):
return [gradproc.GlobalNormalClip(10),
gradproc.SummaryGradient()]
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [gradproc.GlobalNormalClip(10), gradproc.SummaryGradient()])


def get_config():
Expand All @@ -171,11 +173,8 @@ def get_config():
reward_clip=(-1, 1),
history_len=FRAME_HISTORY)

lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)

return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate',
Expand All @@ -186,7 +185,7 @@ def get_config():
# HumanHyperParamSetter('learning_rate', 'hyper.txt'),
# HumanHyperParamSetter(ObjAttrParam(dataset_train, 'exploration'), 'hyper.txt'),
],
# save memory for multiprocess evaluator
# save memory for multi-thread evaluator
session_config=get_default_sess_config(0.6),
model=M,
steps_per_epoch=STEP_PER_EPOCH,
Expand Down
1 change: 0 additions & 1 deletion examples/DeepQNetwork/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def play_model(cfg):

def eval_with_funcs(predict_funcs, nr_eval):
class Worker(StoppableThread):

def __init__(self, func, queue):
super(Worker, self).__init__()
self._func = func
Expand Down
2 changes: 1 addition & 1 deletion examples/DisturbLabel/disturb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@


class DisturbLabel(ProxyDataFlow):

def __init__(self, ds, prob):
super(DisturbLabel, self).__init__(ds)
self.prob = prob

def reset_state(self):
super(DisturbLabel, self).reset_state()
self.rng = get_rng(self)

def get_data(self):
Expand Down
9 changes: 4 additions & 5 deletions examples/DoReFa-Net/alexnet-dorefa.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ def activate(x):
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)

def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 1e-4, summary=True)
return tf.train.AdamOptimizer(lr, epsilon=1e-5)


def get_data(dataset_name):
isTrain = dataset_name == 'train'
Expand Down Expand Up @@ -225,16 +229,11 @@ def resize_func(im):

def get_config():
logger.auto_set_dir()

# prepare dataset
data_train = get_data('train')
data_test = get_data('val')

lr = get_scalar_var('learning_rate', 1e-4, summary=True)

return TrainConfig(
dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=[
ModelSaver(),
# HumanHyperParamSetter('learning_rate'),
Expand Down
19 changes: 9 additions & 10 deletions examples/DoReFa-Net/svhn-digit-dorefa.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@ def activate(x):
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)

def _get_optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=4721 * 100,
decay_rate=0.5, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr, epsilon=1e-5)


def get_config():
logger.auto_set_dir()
Expand All @@ -146,29 +155,19 @@ def get_config():
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
data_train = PrefetchDataZMQ(data_train, 5)
steps_per_epoch = data_train.size()

augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True)

lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=data_train.size() * 100,
decay_rate=0.5, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)

return TrainConfig(
dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=[
ModelSaver(),
InferenceRunner(data_test,
[ScalarStats('cost'), ClassificationError()])
],
model=Model(),
steps_per_epoch=steps_per_epoch,
max_epoch=200,
)

Expand Down
9 changes: 5 additions & 4 deletions examples/GAN/ConditionalGAN-mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,21 @@ def _build_graph(self, input_vars):
self.build_losses(vecpos, vecneg)
self.collect_variables()

def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)


def get_data():
ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')])
ds = BatchData(ds, BATCH)
return ds
return BatchData(ds, BATCH)


def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5),
model=Model(),
Expand Down
6 changes: 4 additions & 2 deletions examples/GAN/DCGAN-CelebA.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def _build_graph(self, inputs):
self.build_losses(vecpos, vecneg)
self.collect_variables()

def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)


def get_data():
global args
Expand All @@ -104,10 +108,8 @@ def get_data():
def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5),
model=Model(),
Expand Down
10 changes: 0 additions & 10 deletions examples/GAN/GAN.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,6 @@ def build_losses(self, logits_real, logits_fake):

add_moving_summary(self.g_loss, self.d_loss, self.d_accuracy, self.g_accuracy)

def get_gradient_processor_g(self):
return [CheckGradient()]

def get_gradient_processor_d(self):
return [CheckGradient()]


class GANTrainer(FeedfreeTrainerBase):
def __init__(self, config):
Expand All @@ -86,16 +80,12 @@ def _setup(self):
# optimize G
grads = self.config.optimizer.compute_gradients(
self.model.g_loss, var_list=self.model.g_vars)
grads = apply_grad_processors(
grads, self.model.get_gradient_processor_g())
self.g_min = self.config.optimizer.apply_gradients(grads, name='g_op')

# optimize D
with tf.control_dependencies([self.g_min]):
grads = self.config.optimizer.compute_gradients(
self.model.d_loss, var_list=self.model.d_vars)
grads = apply_grad_processors(
grads, self.model.get_gradient_processor_d())
self.d_min = self.config.optimizer.apply_gradients(grads, name='d_op')

self.train_op = self.d_min
Expand Down
6 changes: 4 additions & 2 deletions examples/GAN/Image2Image.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ def _build_graph(self, inputs):

self.collect_variables()

def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)


def split_input(img):
"""
Expand Down Expand Up @@ -167,10 +171,8 @@ def get_data():
def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[
PeriodicTrigger(ModelSaver(), every_k_epochs=3),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
Expand Down
12 changes: 6 additions & 6 deletions examples/GAN/InfoGAN-mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,12 @@ def _build_graph(self, inputs):
# distinguish between variables of generator and discriminator updates
self.collect_variables()

def get_gradient_processor_g(self):
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
# generator learns 5 times faster
return [gradproc.ScaleGradient(('.*', 5), log=False)]
return optimizer.apply_grad_processors(
opt, [gradproc.ScaleGradient(('.*', 5), log=False)])


def get_data():
Expand All @@ -159,11 +162,8 @@ def get_data():

def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6),
dataflow=get_data(),
callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5),
model=Model(),
Expand Down
Loading

0 comments on commit d1041a7

Please sign in to comment.