Not able to resolve model.fit() InvalidArgumentError

Varun_Modak · March 7, 2024, 5:55am

Hi I’m trying to use FCN for multi-class image segmentation.

Here are some relevant code snippets:

IMG_SIZE_1 = 320 #320
IMG_SIZE_2 = 320 #240
N_CHANNELS = 3 # RGB
N_CLASSES = 11 # 17 for now, maybe more with moving objects
SEED = 123 # why?

label_colors = [
np.array([255, 255, 255]), # Label - 0
np.array([250, 125, 187]), # Label - 3
np.array([225, 204, 51]), # Label - 4
np.array([0, 128, 255]), # Label - 5
np.array([170, 240, 209]), # Label - 6
np.array([255, 43, 10]), # Label - 7
np.array([178, 80, 80]), # Label - 8
np.array([245, 147, 49]), # Label - 9
np.array([221, 255, 51]), # Label - 10
np.array([140, 120, 240]), # Label - 11
np.array([204, 153, 51]), # Label - 12
# Add more label colors here (as per assigned value on CVAT).
]

import tensorflow as tf

def parse_image(image_path: str) → dict:
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image, channels=N_CHANNELS)
image = tf.image.convert_image_dtype(image, tf.uint8)

mask_path = tf.strings.regex_replace(image_path, "image", "image_gt")

mask = tf.io.read_file(mask_path)
mask = tf.image.decode_jpeg(mask, channels=N_CHANNELS)

masks = []

# Modify this section to handle different labels
label_values = [0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]  # Define your label values here

# Create numeric masks for each label:
for label_color, label_value in zip(label_colors, label_values):
    mask_binary = tf.reduce_all(mask == label_color, axis=-1)
    
    mask_value = tf.where(mask_binary, tf.cast(label_value, tf.uint8), tf.zeros_like(tf.cast(mask_binary, tf.uint8))) 
    print(mask_value.shape)
    masks.append(mask_value)

mask = tf.concat(masks, axis=-1)
# mask = tf.reduce_max(mask, axis=-1, keepdims=True)  # Ensure mask has only one channel

return {'image': image, 'segmentation_mask': mask}

all_dataset = tf.data.Dataset.list_files(traindata_dir + “*.png”, seed=SEED)
all_dataset = all_dataset.map(parse_image)

train_dataset = all_dataset.take(TRAINSET_SIZE + VALIDSET_SIZE)
val_dataset = train_dataset.skip(TRAINSET_SIZE)
train_dataset = train_dataset.take(TRAINSET_SIZE)
test_dataset = all_dataset.skip(TRAINSET_SIZE + VALIDSET_SIZE)

def normalize(input_image: tf.Tensor, input_mask: tf.Tensor) → tuple:
input_image = tf.cast(input_image, tf.float32) / 255.0
return input_image, input_mask

def load_image_train(datapoint: dict) → tuple:
input_image = tf.image.resize(datapoint[‘image’], (IMG_SIZE_2, IMG_SIZE_1))
input_mask = tf.image.resize(datapoint[‘segmentation_mask’], (IMG_SIZE_2, IMG_SIZE_1))

print(input_image.shape)
print(input_mask.shape)

if tf.random.uniform(()) > 0.5:
    input_image = tf.image.flip_left_right(input_image)
    input_mask = tf.image.flip_left_right(input_mask)

return input_image, input_mask

def load_image_test(datapoint: dict) → tuple:
input_image = tf.image.resize(datapoint[‘image’], (IMG_SIZE_2, IMG_SIZE_1))
input_mask = tf.image.resize(datapoint[‘segmentation_mask’], (IMG_SIZE_2, IMG_SIZE_1))

input_image, input_mask = normalize(input_image, input_mask)

return input_image, input_mask

BATCH_SIZE = 10
BUFFER_SIZE = 50

dataset = {“train”: train_dataset, “val”: val_dataset, “test”: test_dataset}

dataset[‘train’] = dataset[‘train’].map(load_image_train, num_parallel_calls=tf.data.AUTOTUNE)
dataset[‘train’] = dataset[‘train’].shuffle(buffer_size=BUFFER_SIZE, seed=SEED)
dataset[‘train’] = dataset[‘train’].repeat()
dataset[‘train’] = dataset[‘train’].batch(BATCH_SIZE)
dataset[‘train’] = dataset[‘train’].prefetch(buffer_size=tf.data.AUTOTUNE)

dataset[‘val’] = dataset[‘val’].map(load_image_test)
dataset[‘val’] = dataset[‘val’].repeat()
dataset[‘val’] = dataset[‘val’].batch(BATCH_SIZE)
dataset[‘val’] = dataset[‘val’].prefetch(buffer_size=tf.data.AUTOTUNE)

#-- Testing Dataset --#
dataset[‘test’] = dataset[‘test’].map(load_image_test)
dataset[‘test’] = dataset[‘test’].batch(BATCH_SIZE)
dataset[‘test’] = dataset[‘test’].prefetch(buffer_size=tf.data.AUTOTUNE)

print(dataset[‘train’])
print(dataset[‘val’])
print(dataset[‘test’])

import cv2
from tqdm import tqdm
import datetime
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Concatenate, Cropping2D, Dense
from tensorflow.keras.layers import Input, Add, Conv2DTranspose, Activation
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy, MeanSquaredError, BinaryCrossentropy
from tensorflow.keras.utils import plot_model

vgg16_model = VGG16() # maybe try to use VGG18

input_shape = (IMG_SIZE_2, IMG_SIZE_1, N_CHANNELS)

inputs = Input(input_shape)

vgg16_model = VGG16(include_top = False, weights = ‘imagenet’, input_tensor = inputs)

pool3 = vgg16_model.get_layer(“block3_pool”).output
pool4 = vgg16_model.get_layer(“block4_pool”).output
pool5 = vgg16_model.get_layer(“block5_pool”).output

conv_6 = Conv2D(1024, (7,7), activation=‘relu’, padding=‘same’, name=“conv_6”)(pool5)
conv_7 = Conv2D(1024, (1, 1), activation=‘relu’, padding=‘same’, name=“conv_7”)(conv_6)

conv_8 = Conv2D(N_CLASSES, (1, 1), activation=‘relu’, padding=‘same’, name=“conv_8”)(pool4)
conv_9 = Conv2D(N_CLASSES, (1, 1), activation=‘relu’, padding=‘same’, name=“conv_9”)(pool3)

deconv_7 = Conv2DTranspose(N_CLASSES, kernel_size=(2,2), strides=(2,2))(conv_7)
add_1 = Add()([deconv_7, conv_8])
deconv_8 = Conv2DTranspose(N_CLASSES, kernel_size=(2,2), strides=(2,2))(add_1)

deconv_10 = Conv2DTranspose(N_CLASSES, kernel_size=(16,16), strides=(16,16))(add_1)
output_layer = Activation(‘softmax’)(deconv_10)

model = Model(inputs=vgg16_model.input, outputs=output_layer)
model.summary()

m_iou = tf.keras.metrics.MeanIoU(2)
model.compile(optimizer=Adam(),
loss=‘categorical_crossentropy’,
metrics=[m_iou])

from tensorflow.keras import callbacks

def show_predictions(dataset=None, num=1):
if dataset:
# Predict and show image from input dataset
for image, mask in dataset.take(num):
pred_mask = model.predict(image)
display_sample2([image[0], true_mask, create_mask(pred_mask)])
else:
inference = model.predict(sample_image)
display_sample2([sample_image[0], sample_mask[0],
inference[0]])

EPOCHS = 20
STEPS_PER_EPOCH = TRAINSET_SIZE // BATCH_SIZE
VALIDATION_STEPS = VALIDSET_SIZE // BATCH_SIZE

model_history = model.fit(dataset[‘train’], epochs=EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
validation_data = dataset[“val”],
validation_steps=VALIDATION_STEPS,
callbacks = Callbacks)

Relevant Error:

InvalidArgumentError Traceback (most recent call last)
Cell In[82], line 35
32 STEPS_PER_EPOCH = TRAINSET_SIZE // BATCH_SIZE
33 VALIDATION_STEPS = VALIDSET_SIZE // BATCH_SIZE
—> 35 model_history = model.fit(dataset[‘train’], epochs=EPOCHS,
36 steps_per_epoch=STEPS_PER_EPOCH,
37 validation_data = dataset[“val”],
38 validation_steps=VALIDATION_STEPS,
39 callbacks = Callbackss)

File /opt/conda/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py:123, in filter_traceback..error_handler(*args, **kwargs)
120 filtered_tb = _process_traceback_frames(e.traceback)
121 # To get the full stack trace, call:
122 # keras.config.disable_traceback_filtering()
→ 123 raise e.with_traceback(filtered_tb) from None
124 finally:
125 del filtered_tb

File /opt/conda/lib/python3.10/site-packages/tensorflow/python/eager/execute.py:53, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
51 try:
52 ctx.ensure_initialized()
—> 53 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
54 inputs, attrs, num_outputs)
55 except core._NotOkStatusException as e:
56 if name is not None:

InvalidArgumentError: Graph execution error:

Detected at node ScatterNd defined at (most recent call last):
File “/opt/conda/lib/python3.10/runpy.py”, line 196, in _run_module_as_main

File “/opt/conda/lib/python3.10/runpy.py”, line 86, in _run_code

File “/opt/conda/lib/python3.10/site-packages/ipykernel_launcher.py”, line 17, in

File “/opt/conda/lib/python3.10/site-packages/traitlets/config/application.py”, line 1043, in launch_instance

File “/opt/conda/lib/python3.10/site-packages/ipykernel/kernelapp.py”, line 701, in start

File “/opt/conda/lib/python3.10/site-packages/tornado/platform/asyncio.py”, line 195, in start

File “/opt/conda/lib/python3.10/asyncio/base_events.py”, line 603, in run_forever

File “/opt/conda/lib/python3.10/asyncio/base_events.py”, line 1909, in _run_once

File “/opt/conda/lib/python3.10/asyncio/events.py”, line 80, in _run

File “/opt/conda/lib/python3.10/site-packages/ipykernel/kernelbase.py”, line 534, in dispatch_queue

File “/opt/conda/lib/python3.10/site-packages/ipykernel/kernelbase.py”, line 523, in process_one

File “/opt/conda/lib/python3.10/site-packages/ipykernel/kernelbase.py”, line 429, in dispatch_shell

File “/opt/conda/lib/python3.10/site-packages/ipykernel/kernelbase.py”, line 767, in execute_request

File “/opt/conda/lib/python3.10/site-packages/ipykernel/ipkernel.py”, line 429, in do_execute

File “/opt/conda/lib/python3.10/site-packages/ipykernel/zmqshell.py”, line 549, in run_cell

File “/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py”, line 3051, in run_cell

File “/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py”, line 3106, in _run_cell

File “/opt/conda/lib/python3.10/site-packages/IPython/core/async_helpers.py”, line 129, in _pseudo_sync_runner

File “/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py”, line 3311, in run_cell_async

File “/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py”, line 3493, in run_ast_nodes

File “/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py”, line 3553, in run_code

File “/tmp/ipykernel_33/1826598133.py”, line 35, in

File “/opt/conda/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py”, line 118, in error_handler

File “/opt/conda/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py”, line 323, in fit

File “/opt/conda/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py”, line 117, in one_step_on_iterator

File “/opt/conda/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py”, line 105, in one_step_on_data

File “/opt/conda/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py”, line 76, in train_step

File “/opt/conda/lib/python3.10/site-packages/keras/src/trainers/trainer.py”, line 375, in compute_metrics

File “/opt/conda/lib/python3.10/site-packages/keras/src/trainers/compile_utils.py”, line 331, in update_state

File “/opt/conda/lib/python3.10/site-packages/keras/src/trainers/compile_utils.py”, line 18, in update_state

File “/opt/conda/lib/python3.10/site-packages/keras/src/metrics/iou_metrics.py”, line 127, in update_state

File “/opt/conda/lib/python3.10/site-packages/keras/src/metrics/metrics_utils.py”, line 682, in confusion_matrix

File “/opt/conda/lib/python3.10/site-packages/keras/src/ops/core.py”, line 62, in scatter

File “/opt/conda/lib/python3.10/site-packages/keras/src/backend/tensorflow/core.py”, line 214, in scatter

indices[558070] = [2, 0] does not index into shape [2,2]
[[{{node ScatterNd}}]] [Op:__inference_one_step_on_iterator_51173]

Kiran_Sai_Ramineni · March 13, 2024, 6:49am

Hi @Varun_Modak, I have tried to train the above model with random data and did not face any error. please refer to this gist for working code example.

Also could you please check if the labels are correctly formatted or not. Thank You.