Value Error problem from using kernel regularizer

I got a ValueError when using TensorFlow to create a model. Based on the error there is a problem that occurs with the kernel regularizer applied on the Conv2D layer and the mean squared error function. I used the L1 regularizer provided by the TensorFlow keras package. I’ve tried setting different values for the L1 regularization factor and even setting the value to 0, but I get the same error.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

!pip install tensorflow-privacy
import numpy as np
import tensorflow as tf
from tensorflow_privacy import *
import tensorflow_privacy

from matplotlib import pyplot as plt
import pylab as pl
import numpy as np
import pandas as pd


from tensorflow.keras.models import Model
from tensorflow.keras import datasets, layers, models, losses
from tensorflow.keras import backend as bke
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1, l2, l1_l2 #meaning of norm

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

batch_size = 32
epochs = 4
microbatches = 8


inChannel = 1
kr = 0#1e-5
num_kernels=8
drop_perc=0.25
dim = 1

l2_norm_clip = 1.5
noise_multiplier = 1.3
 
learning_rate = 0.25
latent_dim = 0


def print_datashape():
  print('genotype data: ', genotype_data.shape)
  print('phenotype data: ', single_pheno.shape)


genotype_data = tf.random.uniform([4276, 28220],1,3, dtype=tf.dtypes.int32)
phenotype_data = tf.random.uniform([4276, 20],-4.359688,34,dtype=tf.dtypes.float32)

genotype_data = genotype_data.numpy()
phenotype_data = phenotype_data.numpy()

small_geno = genotype_data
single_pheno = phenotype_data[:, 1]

print_datashape()

df = small_geno
min_max_scaler = preprocessing.MinMaxScaler()
df = min_max_scaler.fit_transform(df)
scaled_pheno = min_max_scaler.fit_transform(single_pheno.reshape(-1,1)).reshape(-1)

feature_size= df.shape[1]
df = df.reshape(-1, feature_size, 1, 1)



print("df: ", df.shape)
print("scaled: ", scaled_pheno.shape)


# split train to train and valid
train_data,test_data,train_Y,test_Y = train_test_split(df, scaled_pheno, test_size=0.2, random_state=13)
train_X,valid_X,train_Y,valid_Y = train_test_split(train_data, train_Y, test_size=0.2, random_state=13)

def print_shapes():
  print('train_X: {}'.format(train_X.shape))
  print('train_Y: {}'.format(train_Y.shape))
  print('valid_X: {}'.format(valid_X.shape))
  print('valid_Y: {}'.format(valid_Y.shape))

input_shape= (feature_size, dim, inChannel)

predictor = tf.keras.Sequential()
predictor.add(layers.Conv2D(num_kernels, (5,1), padding='same', strides=(12, 1), activation='relu', kernel_regularizer=tf.keras.regularizers.L1(kr),input_shape= input_shape))
predictor.add(layers.AveragePooling2D(pool_size=(2,1)))
predictor.add(layers.Dropout(drop_perc))
predictor.add(layers.Flatten())
predictor.add(layers.Dense(int(feature_size / 4), activation='relu'))
predictor.add(layers.Dropout(drop_perc))
predictor.add(layers.Dense(int(feature_size / 10), activation='relu'))
predictor.add(layers.Dropout(drop_perc))
predictor.add(layers.Dense(1))


mse = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
optimizer = DPKerasAdamOptimizer(learning_rate=learning_rate, l2_norm_clip=l2_norm_clip, noise_multiplier=noise_multiplier, num_microbatches=microbatches)

# compile
predictor.compile(loss=mse, optimizer=optimizer, metrics=['mse'])

#summary
predictor.summary()

print_shapes()

predictor.fit(train_X, train_Y,batch_size=batch_size,epochs=epochs,verbose=1, validation_data=(valid_X, valid_Y))
ValueError: Shapes must be equal rank, but are 1 and 0
        From merging shape 0 with other shapes. for '{{node AddN}} = AddN[N=2, T=DT_FLOAT](mean_squared_error/weighted_loss/Mul, conv2d_2/kernel/Regularizer/mul)' with input shapes: [?], [].

Context: Creating a model that predicts phenotype traits given genotypes and phenotypes datasets. The genotype input data has 4276 samples, and the input shape that the model takes is (28220,1). My labels represent the phenotype data. The labels include 4276 samples with 20 as the number of phenotype traits in the dataset. In this model we use differential privacy(DP) and add it to a CNN model which uses the Mean squared error loss function and the DPKerasAdamOptimizer to add DP. I’m just wondering if MSE would be a good choice as a loss function?