InvalidArgumentError: Graph execution error:

Leelaram_Jayaram · January 27, 2023, 2:50pm

Team, I’m facing this below issue when trying to train model

Epoch 1/1000
WARNING:tensorflow:From /home/lee/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/optimizers/optimizer_v2/adagrad.py:90: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
2023-01-27 13:24:04.341527: W tensorflow/c/c_api.cc:291] Operation '{name:'batch_normalization_3/cond_3/Identity' id:1442 op device:{requested: '', assigned: ''} def:{{{node batch_normalization_3/cond_3/Identity}} = Identity[T=DT_FLOAT, _has_manual_control_dependencies=true](batch_normalization_3/cond_3)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1378, in BaseSession._do_call(self, fn, *args)
   1377 try:
-> 1378   return fn(*args)
   1379 except errors.OpError as e:

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1360, in BaseSession._do_run.<locals>._run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
   1358 def _run_fn(feed_dict, fetch_list, target_list, options, run_metadata):
   1359   # Ensure any changes to the graph are reflected in the runtime.
-> 1360   self._extend_graph()
   1361   return self._call_tf_sessionrun(options, feed_dict, fetch_list,
   1362                                   target_list, run_metadata)

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1401, in BaseSession._extend_graph(self)
   1400 with self._graph._session_run_lock():  # pylint: disable=protected-access
-> 1401   tf_session.ExtendSession(self._session)

InvalidArgumentError: Node 'training/Adagrad/gradients/gradients/bidirectional_1/forward_gru_1_1/while_grad/bidirectional_1/forward_gru_1_1/while_grad': Connecting to invalid output 51 of source node bidirectional_1/forward_gru_1_1/while which has 51 outputs. Try using tf.compat.v1.experimental.output_all_intermediates(True).

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
Cell In[24], line 1
----> 1 history = train_model.fit(
      2     train_generator.generator(),
      3     steps_per_epoch=16 // BATCH_SIZE,
      4     validation_data=val_generator.generator(),
      5     validation_steps=16 // BATCH_SIZE,
      6     epochs=NUM_EPOCHS,
      7     callbacks=TrainHelper.get_callbacks(OUTPUT_PATH, MODEL_NAME, OPTIMIZER, MODEL_WEIGHTS_PATH), verbose=1)

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_v1.py:855, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    852 self._check_call_args("fit")
    854 func = self._select_training_loop(x)
--> 855 return func.fit(
    856     self,
    857     x=x,
    858     y=y,
    859     batch_size=batch_size,
    860     epochs=epochs,
    861     verbose=verbose,
    862     callbacks=callbacks,
    863     validation_split=validation_split,
    864     validation_data=validation_data,
    865     shuffle=shuffle,
    866     class_weight=class_weight,
    867     sample_weight=sample_weight,
    868     initial_epoch=initial_epoch,
    869     steps_per_epoch=steps_per_epoch,
    870     validation_steps=validation_steps,
    871     validation_freq=validation_freq,
    872     max_queue_size=max_queue_size,
    873     workers=workers,
    874     use_multiprocessing=use_multiprocessing,
    875 )

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_generator_v1.py:648, in GeneratorOrSequenceTrainingLoop.fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing)
    644 model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x)
    645 training_utils_v1.check_generator_arguments(
    646     y, sample_weight, validation_split=validation_split
    647 )
--> 648 return fit_generator(
    649     model,
    650     x,
    651     steps_per_epoch=steps_per_epoch,
    652     epochs=epochs,
    653     verbose=verbose,
    654     callbacks=callbacks,
    655     validation_data=validation_data,
    656     validation_steps=validation_steps,
    657     validation_freq=validation_freq,
    658     class_weight=class_weight,
    659     max_queue_size=max_queue_size,
    660     workers=workers,
    661     use_multiprocessing=use_multiprocessing,
    662     shuffle=shuffle,
    663     initial_epoch=initial_epoch,
    664     steps_name="steps_per_epoch",
    665 )

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_generator_v1.py:282, in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
    279 callbacks._call_batch_hook(mode, "begin", step, batch_logs)
    281 is_deferred = not model._is_compiled
--> 282 batch_outs = batch_function(*batch_data)
    283 if not isinstance(batch_outs, list):
    284     batch_outs = [batch_outs]

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/engine/training_v1.py:1180, in Model.train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
   1178     self._update_sample_weight_modes(sample_weights=sample_weights)
   1179     self._make_train_function()
-> 1180     outputs = self.train_function(ins)
   1182 if reset_metrics:
   1183     self.reset_metrics()

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/backend.py:4535, in GraphExecutionFunction.__call__(self, inputs)
   4532 def __call__(self, inputs):
   4533     inputs = tf.nest.flatten(inputs, expand_composites=True)
-> 4535     session = get_session(inputs)
   4536     feed_arrays = []
   4537     array_vals = []

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/backend.py:785, in get_session(op_input_list)
    783 if not _MANUAL_VAR_INIT:
    784     with session.graph.as_default():
--> 785         _initialize_variables(session)
    786 return session

File ~/anaconda3/envs/tfsetup/lib/python3.10/site-packages/keras/backend.py:1251, in _initialize_variables(session)
   1247         candidate_vars.append(v)
   1248 if candidate_vars:
   1249     # This step is expensive, so we only run it on variables not already
   1250     # marked as initialized.
-> 1251     is_initialized = session.run(
   1252         [tf.compat.v1.is_variable_initialized(v) for v in candidate_vars]
   1253     )
   1254     # TODO(kathywu): Some metric variables loaded from SavedModel are never
   1255     # actually used, and do not have an initializer.
   1256     should_be_initialized = [
   1257         (not is_initialized[n]) and v.initializer is not None
   1258         for n, v in enumerate(candidate_vars)
   1259     ]

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:968, in BaseSession.run(self, fetches, feed_dict, options, run_metadata)
    965 run_metadata_ptr = tf_session.TF_NewBuffer() if run_metadata else None
    967 try:
--> 968   result = self._run(None, fetches, feed_dict, options_ptr,
    969                      run_metadata_ptr)
    970   if run_metadata:
    971     proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1191, in BaseSession._run(self, handle, fetches, feed_dict, options, run_metadata)
   1188 # We only want to really perform the run if fetches or targets are provided,
   1189 # or if the call is a partial run that specifies feeds.
   1190 if final_fetches or final_targets or (handle and feed_dict_tensor):
-> 1191   results = self._do_run(handle, final_targets, final_fetches,
   1192                          feed_dict_tensor, options, run_metadata)
   1193 else:
   1194   results = []

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1371, in BaseSession._do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1368   return self._call_tf_sessionprun(handle, feed_dict, fetch_list)
   1370 if handle is None:
-> 1371   return self._do_call(_run_fn, feeds, fetches, targets, options,
   1372                        run_metadata)
   1373 else:
   1374   return self._do_call(_prun_fn, handle, feeds, fetches)

File ~/.local/lib/python3.10/site-packages/tensorflow/python/client/session.py:1397, in BaseSession._do_call(self, fn, *args)
   1392 if 'only supports NHWC tensor format' in message:
   1393   message += ('\nA possible workaround: Try disabling Grappler optimizer'
   1394               '\nby modifying the config for creating the session eg.'
   1395               '\nsession_config.graph_options.rewrite_options.'
   1396               'disable_meta_optimizer = True')
-> 1397 raise type(e)(node_def, op, message)

InvalidArgumentError: Graph execution error:

Node 'training/Adagrad/gradients/gradients/bidirectional_1/forward_gru_1_1/while_grad/bidirectional_1/forward_gru_1_1/while_grad': Connecting to invalid output 51 of source node bidirectional_1/forward_gru_1_1/while which has 51 outputs. Try using tf.compat.v1.experimental.output_all_intermediates(True).

This is the code that I have used,

github.com

leelaram-j/Indian_License_Plate_Recognition/blob/master/4.License_Recognition_Model_Training_Evaluation.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e0b96704",
   "metadata": {},
   "outputs": [],
   "source": [
    "import skimage\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import json\n",
    "import os.path\n",
    "import random\n",
    "import re\n",
    "import time\n",
    "import cv2\n",
    "import requests\n",
    "import pandas as pd\n",

This file has been truncated. show original

Can someone please help me understanding the error.

TruhMann · March 8, 2023, 12:00pm

i had the same issue using big datasets on GPU. Try to solve with this codes at the beginning of script:


os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import tensorflow as tf
print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

it should print 0 GPU’s availible