Regression in a dataset with flattened images and target reals

Hello,

I am trying to set up a dataset with flattened lung images and 2 reals indicating the lung nodule location but I am facing some problems:

import tensorflow as tf
import tensorflow_io as tfio
import matplotlib.pyplot as plt
import os
import numpy as np

class LossPlotCallback(tf.keras.callbacks.Callback):
def init(self, X_val, y_val):
super(LossPlotCallback, self).init()
self.X_val = X_val
self.y_val = y_val
self.losses = []
self.val_losses = []
self.fig, self.ax = plt.subplots()
plt.ion() # Turn on interactive mode for real-time plotting

def on_train_begin(self, logs={}):
    pass

def on_epoch_end(self, epoch, logs={}):
    self.losses.append(logs.get('loss'))
    self.val_losses.append(self.model.evaluate(self.X_val, self.y_val, verbose=0))
    self.ax.clear()
    self.ax.plot(range(1, len(self.losses) + 1), self.losses, label='Training Loss')
    self.ax.plot(range(1, len(self.val_losses) + 1), self.val_losses, label='Validation Loss')
    self.ax.set_xlabel('Epoch')
    self.ax.set_ylabel('Loss')
    self.ax.set_title('Training and Validation Loss')
    self.ax.legend()
    self.fig.canvas.draw()
    plt.pause(0.01)  # Pause for a short time to update the plot

def load_tiff_image(file_path):
image = tf.io.read_file(file_path)
image = tfio.experimental.image.decode_tiff(image)
image = tf.image.convert_image_dtype(image, tf.float32) # Normalize to [0, 1]
return image

def normalize_images(input_image):
# Calculate the minimum and maximum values in the input and target images
min_value_input = tf.reduce_min(input_image)
max_value_input = tf.reduce_max(input_image)

# Normalize input image to [0, 1]
normalized_input_image = (input_image - min_value_input) / (max_value_input - min_value_input)

return normalized_input_image

def preprocess_image(image):
# Flatten the image into a vector
flattened_image = tf.reshape(image, [-1])
return flattened_image

PATH = “/home/rafael/Área de Trabalho/BioInspirada/Trabalho 2/All in one”

cvs = “Coord.csv”

csv = os.path.join(PATH, cvs)
cvs = “Coord.csv”

def parse_csv_line(line):

parts = tf.strings.split(line, sep=',')
# Assuming the first two columns are the targets
try:
    # Assuming the first two columns are the targets
    target1 = tf.strings.to_number(parts[0], out_type=tf.float32)
    target2 = tf.strings.to_number(parts[1], out_type=tf.float32)
    targets = tf.stack([target1, target2])
except tf.errors.InvalidArgumentError as e:
    print("Error parsing targets:", e)
    return None, None
   
# Load and preprocess the image data
filename = parts[-1]  # Assuming image filename is the last element
file_path = tf.strings.join([PATH, filename])
image = load_tiff_image(filename)
image = normalize_images(image)
flattened_image = preprocess_image(image)
    

return flattened_image, targets

dataset = tf.data.TextLineDataset(csv)

Parse each line of the CSV file

dataset = dataset.map(parse_csv_line)

for flattened_image, targets in dataset.take(5):
print(“Flattened Image:”, flattened_image)
print(“Targets:”, targets)

def create_mlp(input_shape):
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation=‘relu’, input_shape=input_shape),
tf.keras.layers.Dense(64, activation=‘relu’),
tf.keras.layers.Dense(2) # Output layer with 2 neurons for the real-valued targets
])
return model

def count_tuples(count, _):
return count + 1

Reduce the dataset to count the number of tuples

num_tuples = dataset.reduce(0, count_tuples)

Print the number of tuples

print(“Dataset size:”, num_tuples.numpy())

dataset = dataset.shuffle(buffer_size=1000)

train_size = 1000
test_size = 100
val_size = 66

Split the dataset

train_dataset = dataset.take(train_size)
remaining_dataset = dataset.skip(train_size)
test_dataset = remaining_dataset.take(test_size)
remaining_dataset = dataset.skip(train_size+test_size)
val_dataset = remaining_dataset.take(val_size)

Optionally, you may want to batch the datasets

batch_size = 32
train_dataset = train_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

Optionally, you may want to prefetch the datasets for better performance

train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE)

model = create_mlp(input_shape=(240 * 358 * 1,)) # Replace height, width, and channels with actual values
model.compile(optimizer=‘adam’, loss=‘mse’, metrics=[‘mae’])

Create lists to store preprocessed images and target values

flattened_image_list_train = []
target_values_list_train = []

flattened_image_list_test = []
target_values_list_test = []

flattened_image_list_val = []
target_values_list_val = []

Recreate iterators for datasets

train_iter = iter(train_dataset)
test_iter = iter(test_dataset)
val_iter = iter(val_dataset)

Clear existing lists

flattened_image_list_train.clear()
target_values_list_train.clear()
flattened_image_list_test.clear()
target_values_list_test.clear()
flattened_image_list_val.clear()
target_values_list_val.clear()

Populate the lists

Populate the lists

for flattened_image, targets in train_iter:
flattened_image_list_train.append(flattened_image)
target_values_list_train.append(targets)

for flattened_image, targets in test_iter:
flattened_image_list_test.append(flattened_image)
target_values_list_test.append(targets)

for flattened_image, targets in val_iter:
flattened_image_list_val.append(flattened_image)
target_values_list_val.append(targets)

Convert lists to NumPy arrays or TensorFlow tensors

X_train = tf.stack(flattened_image_list_train)
y_train = tf.constant(target_values_list)

Convert lists to NumPy arrays or TensorFlow tensors

X_test = tf.stack(flattened_image_list_test)
y_test = tf.constant(target_values_test)

Convert lists to NumPy arrays or TensorFlow tensors

X_val = tf.stack(flattened_image_list_val)
y_val = tf.constant(target_values_val)

Check the shapes of X_train and y_train

print(“X_train shape:”, X_train.shape)
print(“y_train shape:”, y_train.shape)

num_epochs = 500;

Train the model

model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size,validation_data=(X_val, y_val), callbacks=[LossPlotCallback()])

I get:

Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([ 41.142372 106.030334], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([270.51218 63.334614], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([256.8265 145.49292], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([120.41846 137.22746], shape=(2,), dtype=float32)
Flattened Image: tf.Tensor([0. 0. 0. … 0. 0. 1.], shape=(343680,), dtype=float32)
Targets: tf.Tensor([148.83672 120.33603], shape=(2,), dtype=float32)
2024-05-30 18:58:56.257933: W tensorflow/core/framework/op_kernel.cc:1828] OP_REQUIRES failed at whole_file_reafile:///home/rafael/%C3%81rea%20de%20Trabalho/BioInspirada/Trabalho%202/Perceptron.ipynb
d_ops.cc:116 : NOT_FOUND: ; No such file or directory
2024-05-30 18:58:56.262418: W tensorflow/core/framework/op_kernel.cc:1828] OP_REQUIRES failed at whole_file_read_ops.cc:116 : NOT_FOUND: ; No such file or directory

NotFoundError Traceback (most recent call last)
Cell In[47], line 126
123 return count + 1
125 # Reduce the dataset to count the number of tuples
→ 126 num_tuples = dataset.reduce(0, count_tuples)
128 # Print the number of tuples
129 print(“Dataset size:”, num_tuples.numpy())

File ~/miniconda3/envs/tensorenv/lib/python3.9/site-packages/tensorflow/python/data/ops/dataset_ops.py:2787, in DatasetV2.reduce(self, initial_state, reduce_func, name)
2783 if name:
2784 metadata.name = _validate_and_encode(name)
2785 return structure.from_compatible_tensor_list(
2786 state_structure,
→ 2787 gen_dataset_ops.reduce_dataset(
2788 dataset._variant_tensor,
2789 structure.to_tensor_list(state_structure, initial_state),
2790 reduce_func.captured_inputs,
2791 f=reduce_func,
2792 output_shapes=structure.get_flat_tensor_shapes(state_structure),
2793 output_types=structure.get_flat_tensor_types(state_structure),
2794 metadata=metadata.SerializeToString()))

File ~/miniconda3/envs/tensorenv/lib/python3.9/site-packages/tensorflow/python/ops/gen_dataset_ops.py:6178, in reduce_dataset(input_dataset, initial_state, other_arguments, f, output_types, output_shapes, use_inter_op_parallelism, metadata, name)
6176 return _result
6177 except _core._NotOkStatusException as e:
→ 6178 _ops.raise_from_not_ok_status(e, name)
6179 except _core._FallbackException:
6180 pass

File ~/miniconda3/envs/tensorenv/lib/python3.9/site-packages/tensorflow/python/framework/ops.py:6656, in raise_from_not_ok_status(e, name)
6654 def raise_from_not_ok_status(e, name):
6655 e.message += (" name: " + str(name if name is not None else “”))
→ 6656 raise core._status_to_exception(e) from None

NotFoundError: {{function_node _wrapped__ReduceDataset_Targuments_0_Tstate_1_output_types_1_device/job:localhost/replica:0/task:0/device:CPU:0}} ; No such file or directory
[[{{node ReadFile}}]] [Op:ReduceDataset] name: