opened 07:07AM - 30 Sep 23 UTC
closed 01:46AM - 21 Oct 23 UTC
stat:awaiting response
type:support
stale
comp:keras
TF 2.12
### Issue type
Bug
### Have you reproduced the bug with TensorFlow Nightly?
N… o
### Source
source
### TensorFlow version
v2.12.0-rc1-12-g0db597d0d75 2.12.0
### Custom code
Yes
### OS platform and distribution
Kaggle kernel
### Mobile device
_No response_
### Python version
3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]
### Bazel version
_No response_
### GCC/compiler version
_No response_
### CUDA/cuDNN version
_No response_
### GPU model and memory
_No response_
### Current behavior?
There are no methods called `_set_hyper` and `_get_hyper`
### Standalone code to reproduce the issue
```shell
import tensorflow as tf
class MyAdamOptimizer(tf.keras.optimizers.Optimizer):
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name="MyAdamOptimizer", **kwargs):
super(MyAdamOptimizer, self).__init__(name, **kwargs)
self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
self._set_hyper("beta_1", beta_1)
self._set_hyper("beta_2", beta_2)
self._set_hyper("epsilon", epsilon)
def _create_slots(self, var_list):
for var in var_list:
self.add_slot(var, "m")
self.add_slot(var, "v")
def _resource_apply_dense(self, grad, var):
lr = self._get_hyper("learning_rate", var_dtype=var.dtype.base_dtype)
beta_1 = self._get_hyper("beta_1", var_dtype=var.dtype.base_dtype)
beta_2 = self._get_hyper("beta_2", var_dtype=var.dtype.base_dtype)
epsilon = self._get_hyper("epsilon", var_dtype=var.dtype.base_dtype)
m = self.get_slot(var, "m")
v = self.get_slot(var, "v")
m.assign_add((1 - beta_1) * (grad - m))
v.assign_add((1 - beta_2) * (tf.square(grad) - v))
m_hat = m / (1 - tf.math.pow(beta_1, tf.cast(self.iterations + 1, tf.float32)))
v_hat = v / (1 - tf.math.pow(beta_2, tf.cast(self.iterations + 1, tf.float32)))
var_update = lr * m_hat / (tf.sqrt(v_hat) + epsilon)
var.assign_sub(var_update)
return var_update
def _resource_apply_sparse(self, grad, var):
raise NotImplementedError("Sparse gradient updates are not supported.")
optimizer = MyAdamOptimizer(learning_rate=0.001)
```
### Relevant log output
```shell
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[5], line 42
38 def _resource_apply_sparse(self, grad, var):
39 raise NotImplementedError("Sparse gradient updates are not supported.")
---> 42 optimizer = MyAdamOptimizer(learning_rate=0.001)
Cell In[5], line 7, in MyAdamOptimizer.__init__(self, learning_rate, beta_1, beta_2, epsilon, name, **kwargs)
4 def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name="MyAdamOptimizer", **kwargs):
5 super(MyAdamOptimizer, self).__init__(name, **kwargs)
----> 7 self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
8 self._set_hyper("beta_1", beta_1)
9 self._set_hyper("beta_2", beta_2)
AttributeError: 'MyAdamOptimizer' object has no attribute '_set_hyper'
```
I’m trying to play with optimizers in TF-v2.12:
import tensorflow as tf
class MyAdamOptimizer(tf.keras.optimizers.Optimizer):
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, name="MyAdamOptimizer", **kwargs):
super(MyAdamOptimizer, self).__init__(name, **kwargs)
self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
self._set_hyper("beta_1", beta_1)
self._set_hyper("beta_2", beta_2)
self._set_hyper("epsilon", epsilon)
def _create_slots(self, var_list):
for var in var_list:
self.add_slot(var, "m")
self.add_slot(var, "v")
def _resource_apply_dense(self, grad, var):
lr = self._get_hyper("learning_rate", var_dtype=var.dtype.base_dtype)
beta_1 = self._get_hyper("beta_1", var_dtype=var.dtype.base_dtype)
beta_2 = self._get_hyper("beta_2", var_dtype=var.dtype.base_dtype)
epsilon = self._get_hyper("epsilon", var_dtype=var.dtype.base_dtype)
m = self.get_slot(var, "m")
v = self.get_slot(var, "v")
m.assign_add((1 - beta_1) * (grad - m))
v.assign_add((1 - beta_2) * (tf.square(grad) - v))
m_hat = m / (1 - tf.math.pow(beta_1, tf.cast(self.iterations + 1, tf.float32)))
v_hat = v / (1 - tf.math.pow(beta_2, tf.cast(self.iterations + 1, tf.float32)))
var_update = lr * m_hat / (tf.sqrt(v_hat) + epsilon)
var.assign_sub(var_update)
return var_update
def _resource_apply_sparse(self, grad, var):
raise NotImplementedError("Sparse gradient updates are not supported.")
optimizer = MyAdamOptimizer(learning_rate=0.001)
But I can’t find _set_hyper
and _get_hyper
. Can anyone give me the latest implementation strategy for Optimizers?
tagoma
September 30, 2023, 12:37pm
#2
Hi @maifeeulasad
Can you please try out the following?
import tensorflow as tf
from tensorflow.python.keras.optimizer_v2 import optimizer_v2
class MyAdamOptimizer(optimizer_v2.OptimizerV2):
def init (self, learning_rate=0.001, beta_1=0.9,
beta_2=0.999, epsilon=1e-7,
name=“MyAdamOptimizer”, **kwargs):
super(MyAdamOptimizer, self).init (name, **kwargs)
self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
self._set_hyper("beta_1", beta_1)
self._set_hyper("beta_2", beta_2)
self._set_hyper("epsilon", epsilon)
def _create_slots(self, var_list):
for var in var_list:
self.add_slot(var, "m")
self.add_slot(var, "v")
def _resource_apply_dense(self, grad, var):
lr = self._get_hyper("learning_rate", var_dtype=var.dtype.base_dtype)
beta_1 = self._get_hyper("beta_1", var_dtype=var.dtype.base_dtype)
beta_2 = self._get_hyper("beta_2", var_dtype=var.dtype.base_dtype)
epsilon = self._get_hyper("epsilon", var_dtype=var.dtype.base_dtype)
m = self.get_slot(var, "m")
v = self.get_slot(var, "v")
m.assign_add((1 - beta_1) * (grad - m))
v.assign_add((1 - beta_2) * (tf.square(grad) - v))
m_hat = m / (1 - tf.math.pow(beta_1, tf.cast(self.iterations + 1, tf.float32)))
v_hat = v / (1 - tf.math.pow(beta_2, tf.cast(self.iterations + 1, tf.float32)))
var_update = lr * m_hat / (tf.sqrt(v_hat) + epsilon)
var.assign_sub(var_update)
return var_update
def _resource_apply_sparse(self, grad, var):
raise NotImplementedError("Sparse gradient updates are not supported.")
optimizer = MyAdamOptimizer(learning_rate=0.001)
(for some reason, I’m unable to format my text as I wish. Sorry for the mess on your screen)
@tagoma Really appreaciate your response.
But, it leads to ValueError: Could not interpret optimizer identifier: <__main__.MyAdamOptimizer object at 0x79f2a00cba30>
. Due to version mis-match as far as I know.
tagoma
September 30, 2023, 1:41pm
#4
I don’t get any error message locally using TF 2.13.
I created a Colab using TF 2.12.
Can you please have a look at it?