accum_optimizer_for_keras
accum_optimizer_for_keras copied to clipboard
Doesn't work with new versions of keras
As of keras 2.3.0 self.lr
was renamed to self.learning_rate
(https://github.com/keras-team/keras/releases) so https://github.com/bojone/accum_optimizer_for_keras/blob/b898256464367c028f698c55be12c864b002a49a/accum_optimizer.py#L31 is broken, and needs to be replaced with self.learning_rate
I tried to change lr to learning_rate
from keras.optimizers import Optimizer
import keras.backend as K
class AccumOptimizer(Optimizer):
"""继承Optimizer类,包装原有优化器,实现梯度累积。
# 参数
optimizer:优化器实例,支持目前所有的keras优化器;
steps_per_update:累积的步数。
# 返回
一个新的keras优化器
Inheriting Optimizer class, wrapping the original optimizer
to achieve a new corresponding optimizer of gradient accumulation.
# Arguments
optimizer: an instance of keras optimizer (supporting
all keras optimizers currently available);
steps_per_update: the steps of gradient accumulation
# Returns
a new keras optimizer.
"""
def __init__(self, optimizer, steps_per_update=1, **kwargs):
super(AccumOptimizer, self).__init__(**kwargs)
self.optimizer = optimizer
with K.name_scope(self.__class__.__name__):
self.steps_per_update = steps_per_update
self.iterations = K.variable(0, dtype='int64', name='iterations')
self.cond = K.equal(self.iterations % self.steps_per_update, 0)
self.learning_rate = self.optimizer.learning_rate
self.optimizer.learning_rate = K.switch(self.cond, self.optimizer.learning_rate, 0.)
for attr in ['momentum', 'rho', 'beta_1', 'beta_2']:
if hasattr(self.optimizer, attr):
value = getattr(self.optimizer, attr)
setattr(self, attr, value)
setattr(self.optimizer, attr, K.switch(self.cond, value, 1 - 1e-7))
for attr in self.optimizer.get_config():
if not hasattr(self, attr):
value = getattr(self.optimizer, attr)
setattr(self, attr, value)
# 覆盖原有的获取梯度方法,指向累积梯度
# Cover the original get_gradients method with accumulative gradients.
def get_gradients(loss, params):
return [ag / self.steps_per_update for ag in self.accum_grads]
self.optimizer.get_gradients = get_gradients
def get_updates(self, loss, params):
self.updates = [
K.update_add(self.iterations, 1),
K.update_add(self.optimizer.iterations, K.cast(self.cond, 'int64')),
]
# 累积梯度 (gradient accumulation)
self.accum_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
grads = self.get_gradients(loss, params)
for g, ag in zip(grads, self.accum_grads):
self.updates.append(K.update(ag, K.switch(self.cond, g, ag + g)))
# 继承optimizer的更新 (inheriting updates of original optimizer)
self.updates.extend(self.optimizer.get_updates(loss, params)[1:])
self.weights.extend(self.optimizer.weights)
return self.updates
def get_config(self):
iterations = K.eval(self.iterations)
K.set_value(self.iterations, 0)
config = self.optimizer.get_config()
K.set_value(self.iterations, iterations)
return config
However I still can't use this library. I get the error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
TypeError: float() argument must be a string or a number, not 'Tensor'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-58-e1dd79296dc0> in <module>
11
---> 12 optimizer = AccumOptimizer(Adam(), 10) # 10 is accumulative steps
13 model.compile(optimizer=optimizer, loss='mse', metrics=['accuracy'])
<ipython-input-57-2140290320bb> in __init__(self, optimizer, steps_per_update, **kwargs)
27 self.cond = K.equal(self.iterations % self.steps_per_update, 0)
28 self.learning_rate = self.optimizer.learning_rate
---> 29 self.optimizer.learning_rate = K.switch(self.cond, self.optimizer.learning_rate, 0.)
30 for attr in ['momentum', 'rho', 'beta_1', 'beta_2']:
31 if hasattr(self.optimizer, attr):
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py in __setattr__(self, name, value)
548 name = "learning_rate"
549 if hasattr(self, "_hyper") and name in self._hyper:
--> 550 self._set_hyper(name, value)
551 else:
552 super(OptimizerV2, self).__setattr__(name, value)
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py in _set_hyper(self, name, value)
512 self._hyper[name] = value
513 else:
--> 514 backend.set_value(self._hyper[name], value)
515
516 def _get_hyper(self, name, dtype=None):
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\keras\backend.py in set_value(x, value)
3022 (of the same shape).
3023 """
-> 3024 value = np.asarray(value, dtype=dtype(x))
3025 if ops.executing_eagerly_outside_functions():
3026 x.assign(value)
C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
ValueError: setting an array element with a sequence.
Can you help me ? The issue seems similar
@thomashirtz I have the same problem. Have you solved it?
No I didn't manage to solve it unfortunately
Chage the lines in this way works for me in keras 2.3:
self.learning_rate = self.optimizer.lr
self.optimizer.learning_rate = K.switch(self.cond, self.optimizer.lr, 0.)