InvalidArgumentError: Graph execution error:

Team, I’m facing this below issue when trying to train model

Epoch 1/1000
WARNING:tensorflow:From /home/lee/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/optimizers/optimizer_v2/adagrad.py:90: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
2023-01-27 13:24:04.341527: W tensorflow/c/c_api.cc:291] Operation '{name:'batch_normalization_3/cond_3/Identity' id:1442 op device:{requested: '', assigned: ''} def:{{{node batch_normalization_3/cond_3/Identity}} = Identity[T=DT_FLOAT, _has_manual_control_dependencies=true](batch_normalization_3/cond_3)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1378, in BaseSession._do_call(self, fn, *args)
   1377 try:
-> 1378   return fn(*args)
   1379 except errors.OpError as e:

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1360, in BaseSession._do_run.<locals>._run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
   1358 def _run_fn(feed_dict, fetch_list, target_list, options, run_metadata):
   1359   # Ensure any changes to the graph are reflected in the runtime.
-> 1360   self._extend_graph()
   1361   return self._call_tf_sessionrun(options, feed_dict, fetch_list,
   1362                                   target_list, run_metadata)

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1401, in BaseSession._extend_graph(self)
   1400 with self._graph._session_run_lock():  # pylint: disable=protected-access
-> 1401   tf_session.ExtendSession(self._session)

InvalidArgumentError: Node 'training/Adagrad/gradients/gradients/bidirectional_1/forward_gru_1_1/while_grad/bidirectional_1/forward_gru_1_1/while_grad': Connecting to invalid output 51 of source node bidirectional_1/forward_gru_1_1/while which has 51 outputs. Try using tf.compat.v1.experimental.output_all_intermediates(True).

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
Cell In[24], line 1
----> 1 history = train_model.fit(
      2     train_generator.generator(),
      3     steps_per_epoch=16 // BATCH_SIZE,
      4     validation_data=val_generator.generator(),
      5     validation_steps=16 // BATCH_SIZE,
      6     epochs=NUM_EPOCHS,
      7     callbacks=TrainHelper.get_callbacks(OUTPUT_PATH, MODEL_NAME, OPTIMIZER, MODEL_WEIGHTS_PATH), verbose=1)

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_v1.py:855, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    852 self._check_call_args("fit")
    854 func = self._select_training_loop(x)
--> 855 return func.fit(
    856     self,
    857     x=x,
    858     y=y,
    859     batch_size=batch_size,
    860     epochs=epochs,
    861     verbose=verbose,
    862     callbacks=callbacks,
    863     validation_split=validation_split,
    864     validation_data=validation_data,
    865     shuffle=shuffle,
    866     class_weight=class_weight,
    867     sample_weight=sample_weight,
    868     initial_epoch=initial_epoch,
    869     steps_per_epoch=steps_per_epoch,
    870     validation_steps=validation_steps,
    871     validation_freq=validation_freq,
    872     max_queue_size=max_queue_size,
    873     workers=workers,
    874     use_multiprocessing=use_multiprocessing,
    875 )

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_generator_v1.py:648, in GeneratorOrSequenceTrainingLoop.fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing)
    644 model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x)
    645 training_utils_v1.check_generator_arguments(
    646     y, sample_weight, validation_split=validation_split
    647 )
--> 648 return fit_generator(
    649     model,
    650     x,
    651     steps_per_epoch=steps_per_epoch,
    652     epochs=epochs,
    653     verbose=verbose,
    654     callbacks=callbacks,
    655     validation_data=validation_data,
    656     validation_steps=validation_steps,
    657     validation_freq=validation_freq,
    658     class_weight=class_weight,
    659     max_queue_size=max_queue_size,
    660     workers=workers,
    661     use_multiprocessing=use_multiprocessing,
    662     shuffle=shuffle,
    663     initial_epoch=initial_epoch,
    664     steps_name="steps_per_epoch",
    665 )

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_generator_v1.py:282, in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
    279 callbacks._call_batch_hook(mode, "begin", step, batch_logs)
    281 is_deferred = not model._is_compiled
--> 282 batch_outs = batch_function(*batch_data)
    283 if not isinstance(batch_outs, list):
    284     batch_outs = [batch_outs]

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_v1.py:1180, in Model.train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
   1178     self._update_sample_weight_modes(sample_weights=sample_weights)
   1179     self._make_train_function()
-> 1180     outputs = self.train_function(ins)
   1182 if reset_metrics:
   1183     self.reset_metrics()

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/backend.py:4535, in GraphExecutionFunction.__call__(self, inputs)
   4532 def __call__(self, inputs):
   4533     inputs = tf.nest.flatten(inputs, expand_composites=True)
-> 4535     session = get_session(inputs)
   4536     feed_arrays = []
   4537     array_vals = []

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/backend.py:785, in get_session(op_input_list)
    783 if not _MANUAL_VAR_INIT:
    784     with session.graph.as_default():
--> 785         _initialize_variables(session)
    786 return session

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/backend.py:1251, in _initialize_variables(session)
   1247         candidate_vars.append(v)
   1248 if candidate_vars:
   1249     # This step is expensive, so we only run it on variables not already
   1250     # marked as initialized.
-> 1251     is_initialized = session.run(
   1252         [tf.compat.v1.is_variable_initialized(v) for v in candidate_vars]
   1253     )
   1254     # TODO(kathywu): Some metric variables loaded from SavedModel are never
   1255     # actually used, and do not have an initializer.
   1256     should_be_initialized = [
   1257         (not is_initialized[n]) and v.initializer is not None
   1258         for n, v in enumerate(candidate_vars)
   1259     ]

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:968, in BaseSession.run(self, fetches, feed_dict, options, run_metadata)
    965 run_metadata_ptr = tf_session.TF_NewBuffer() if run_metadata else None
    967 try:
--> 968   result = self._run(None, fetches, feed_dict, options_ptr,
    969                      run_metadata_ptr)
    970   if run_metadata:
    971     proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1191, in BaseSession._run(self, handle, fetches, feed_dict, options, run_metadata)
   1188 # We only want to really perform the run if fetches or targets are provided,
   1189 # or if the call is a partial run that specifies feeds.
   1190 if final_fetches or final_targets or (handle and feed_dict_tensor):
-> 1191   results = self._do_run(handle, final_targets, final_fetches,
   1192                          feed_dict_tensor, options, run_metadata)
   1193 else:
   1194   results = []

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1371, in BaseSession._do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1368   return self._call_tf_sessionprun(handle, feed_dict, fetch_list)
   1370 if handle is None:
-> 1371   return self._do_call(_run_fn, feeds, fetches, targets, options,
   1372                        run_metadata)
   1373 else:
   1374   return self._do_call(_prun_fn, handle, feeds, fetches)

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1397, in BaseSession._do_call(self, fn, *args)
   1392 if 'only supports NHWC tensor format' in message:
   1393   message += ('\nA possible workaround: Try disabling Grappler optimizer'
   1394               '\nby modifying the config for creating the session eg.'
   1395               '\nsession_config.graph_options.rewrite_options.'
   1396               'disable_meta_optimizer = True')
-> 1397 raise type(e)(node_def, op, message)

InvalidArgumentError: Graph execution error:

Node 'training/Adagrad/gradients/gradients/bidirectional_1/forward_gru_1_1/while_grad/bidirectional_1/forward_gru_1_1/while_grad': Connecting to invalid output 51 of source node bidirectional_1/forward_gru_1_1/while which has 51 outputs. Try using tf.compat.v1.experimental.output_all_intermediates(True).

This is the code that I have used,

Can someone please help me understanding the error.

i had the same issue using big datasets on GPU. Try to solve with this codes at the beginning of script:


os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import tensorflow as tf
print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

it should print 0 GPU’s availible