Expected to see 1 array(s), but instead got the following list of 2 arrays

Hamza_KHEDDAR · March 30, 2023, 1:57am

Please help me to solve this error

I have two differents datasets, each one fed a separate model, then concatenated to get a commun model with only one output with 5 classes. However when I run it, an error appeared said :

the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 2 arrays

the error is in line: model = Model(inputs=[input1, input2], outputs=[output3])

when I write model = Model(inputs=[input1, input2], outputs=[output3, output3]) it does work, but I do not want two output. Please help me on this problem.

-- coding: utf-8 --

“”"
Created on Thu Feb 16 09:04:00 2023

@author: PC EL NOUR
“”"

import libraries

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os, re, time, math, tqdm, itertools
import matplotlib.pyplot as plt
import seaborn as sns #???
import plotly.express as px #???
import plotly.offline as pyo#???
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler #more data
from sklearn.ensemble import RandomForestClassifier #classifieur
from sklearn.preprocessing import LabelEncoder #conv attack to 0,1,2
from sklearn.neural_network import MLPClassifier #classifieur
import keras
from keras.layers import Conv2D, Conv1D, MaxPooling2D, MaxPooling1D, Flatten, BatchNormalization, Dense, Dropout,concatenate
from keras.utils.np_utils import to_categorical #to convert biynarie
from keras.models import Sequential # to modale seq or layer
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.layers import Input
from keras.models import Model
import matplotlib.pyplot as plt
import glob

Get CSV files list from a folder

path1 = r’D:\master prog\IDS 2018 Intrusion CSVs (CSE-CIC-IDS2018)1’
csv_files1 = glob.glob(path1 + “/*.csv”)

Read each CSV file into DataFrame

This creates a list of dataframes

df_list1 = (pd.read_csv(file) for file in csv_files1)

Concatenate all DataFrames

network_data1 = pd.concat(df_list1, ignore_index=True)

path2 = r’D:\master prog\IDS 2018 Intrusion CSVs (CSE-CIC-IDS2018)3’
csv_files2 = glob.glob(path2 + “/*.csv”)

Read each CSV file into DataFrame

This creates a list of dataframes

df_list2 = (pd.read_csv(file) for file in csv_files2)

Concatenate all DataFrames

network_data2 = pd.concat(df_list2, ignore_index=True)

encode the column labels

label_encoder = LabelEncoder()
cleaned_data1[‘Label’]= label_encoder.fit_transform(cleaned_data1[‘Label’])
cleaned_data1[‘Label’].unique()

cleaned_data2[‘Label’]= label_encoder.fit_transform(cleaned_data2[‘Label’])
cleaned_data2[‘Label’].unique()
cleaned_data2[‘Label’].value_counts()
#%% cleaned_data1[‘Label’].value_counts()

make 5 seperate datasets for 5 feature labels

data_01 = cleaned_data1[cleaned_data1[‘Label’] == 0]
data_11 = cleaned_data1[cleaned_data1[‘Label’] == 1]
data_21 = cleaned_data1[cleaned_data1[‘Label’] == 2]
data_31 = cleaned_data1[cleaned_data1[‘Label’] == 3]
data_41 = cleaned_data1[cleaned_data1[‘Label’] == 4]

make 5 seperate datasets for 5 feature labels

data_02 = cleaned_data2[cleaned_data2[‘Label’] == 0]
data_12 = cleaned_data2[cleaned_data2[‘Label’] == 1]
data_22 = cleaned_data2[cleaned_data2[‘Label’] == 2]
data_32 = cleaned_data2[cleaned_data2[‘Label’] == 3]
data_42 = cleaned_data2[cleaned_data2[‘Label’] == 4]

make benign feature

y_01 = np.zeros(data_01.shape[0])
y_benign1 = pd.DataFrame(y_01)

y_11 = np.ones(data_11.shape[0])
y_bf1 = pd.DataFrame(y_11)

y_21 = np.full(data_21.shape[0], 2)
y_ssh1 = pd.DataFrame(y_21)

y_31 = np.full(data_31.shape[0], 3)
y_dosg1 = pd.DataFrame(y_31)

y_41 = np.full(data_41.shape[0], 4)
y_doss1 = pd.DataFrame(y_41)

y_02 = np.zeros(data_02.shape[0])
y_benign2= pd.DataFrame(y_02)

y_12 = np.ones(data_12.shape[0])
y_bf2 = pd.DataFrame(y_12)

y_22 = np.full(data_22.shape[0], 2)
y_ssh2 = pd.DataFrame(y_22)

y_32 = np.full(data_32.shape[0], 3)
y_dosg2 = pd.DataFrame(y_32)

y_42 = np.full(data_42.shape[0], 4)
y_doss2 = pd.DataFrame(y_42)

#%%

merging the original dataframe

X1 = pd.concat([data_01, data_11, data_21, data_31, data_41], sort=True)
y1 = pd.concat([y_benign1, y_bf1, y_ssh1,y_dosg1,y_doss1], sort=True)

X2 = pd.concat([data_02, data_12, data_22, data_32, data_42], sort=True)
y2 = pd.concat([y_benign2, y_bf2, y_ssh2,y_dosg2,y_doss2], sort=True)

#%%

from sklearn.utils import resample

data_01_resample = resample(data_01, n_samples=20000, random_state=123, replace=True)
data_11_resample = resample(data_11, n_samples=20000, random_state=123, replace=True)
data_21_resample = resample(data_21, n_samples=20000, random_state=123, replace=True)
data_31_resample = resample(data_31, n_samples=20000, random_state=123, replace=True)
data_41_resample = resample(data_41, n_samples=20000, random_state=123, replace=True)

train_dataset1 = pd.concat([data_01_resample, data_11_resample,data_21_resample, data_31_resample, data_41_resample])
#train_dataset1.head(2)

data_02_resample = resample(data_02, n_samples=20000, random_state=123, replace=True)
data_12_resample = resample(data_12, n_samples=20000, random_state=123, replace=True)
data_22_resample = resample(data_22, n_samples=20000, random_state=123, replace=True)
data_32_resample = resample(data_32, n_samples=20000, random_state=123, replace=True)
data_42_resample = resample(data_42, n_samples=20000, random_state=123, replace=True)

train_dataset2= pd.concat([data_02_resample, data_12_resample,data_22_resample, data_32_resample, data_42_resample])
#train_dataset2.head(2)
#%%

test_dataset1 = train_dataset1.sample(frac=0.1)
target_train1 = train_dataset1[‘Label’]
target_test1 = test_dataset1[‘Label’]
target_train1.unique(), target_test1.unique()

y_train1 = to_categorical(target_train1, num_classes=5)
y_test1 = to_categorical(target_test1, num_classes=5)

train_dataset1 = train_dataset1.drop(columns = [“Timestamp”, “Protocol”,“PSH Flag Cnt”,“Init Fwd Win Byts”,“Flow Byts/s”,“Flow Pkts/s”, “Label”], axis=1).value
test_dataset1 = test_dataset1.drop(columns = [“Timestamp”, “Protocol”,“PSH Flag Cnt”,“Init Fwd Win Byts”,“Flow Byts/s”,“Flow Pkts/s”, “Label”], axis=1)

test_dataset2 = train_dataset2.sample(frac=0.1)
target_train2 = train_dataset2[‘Label’]
target_test2 = test_dataset2[‘Label’]
target_train2.unique(), target_test2.unique()

y_train2 = to_categorical(target_train2, num_classes=5)
y_test2 = to_categorical(target_test2, num_classes=5)

train_dataset2 = train_dataset2.drop(columns = [“Timestamp”, “Protocol”,“PSH Flag Cnt”,“Init Fwd Win Byts”,“Flow Byts/s”,“Flow Pkts/s”, “Label”], axis=1)
test_dataset2 = test_dataset2.drop(columns = [“Timestamp”, “Protocol”,“PSH Flag Cnt”,“Init Fwd Win Byts”,“Flow Byts/s”,“Flow Pkts/s”, “Label”], axis=1)

making train & test splits

X_train1 = train_dataset1.iloc[:, :-1].values #
X_test1 = test_dataset1.iloc[:, :-1].values

X_train2 = train_dataset2.iloc[:, :-1].values
X_test2 = test_dataset2.iloc[:, :-1].values

#%%

reshape the data for CNN

X_train1 = X_train1.reshape(len(X_train1), X_train1.shape[1], 1)
X_test1 = X_test1.reshape(len(X_test1), X_test1.shape[1], 1)
X_train1.shape, X_test1.shape

X_train2 = np.array(X_train2.reshape(len(X_train2), X_train2.shape[1], 1))
X_test2 = np.array(X_test2.reshape(len(X_test2), X_test2.shape[1], 1))
X_train2.shape, X_test2.shape

making the deep learning function

input1= Input(shape=(72, 1))
model1=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’) (input1)
model1=BatchNormalization()(model1)

adding a pooling layer

model1=MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model1)

model1=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’)(model1)
model1=BatchNormalization()(model1)
model1=MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model1)
model1=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’)(model1)
model1=BatchNormalization()(model1)
model1=MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model1)
model1=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’)(model1)
model1=BatchNormalization()(model1)
output1 =MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model1)
flat01=(Flatten())(output1)
#model1 =Model(input1, output1)
#model1.summary()

input2 = Input(shape=(72, 1))
model2=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’)(input2)
model2=BatchNormalization()(model2)

adding a pooling layer

model2=MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model2)

model2=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’)(model2)
model2=BatchNormalization()(model2)
model2=MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model2)
model2=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’)(model2)
model2=BatchNormalization()(model2)
model2=MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model2)
model2=Conv1D(filters=65, kernel_size=6, activation=‘tanh’,
padding=‘same’)(model2)
model2=BatchNormalization()(model2)
output2=MaxPooling1D(pool_size=(3), strides=2, padding=‘same’)(model2)
flat02=(Flatten())(output2)

#%%
x = concatenate([flat01, flat02])
x= Dense(65, activation=‘relu’)(x)
drop=Dropout(0.25)(x)
output3= Dense(5, activation=‘softmax’)(drop)

model = Model(inputs=[input1, input2], outputs=[output3])
model.summary()
#import tensorflow as tf
model.compile(optimizer=‘Adam’, loss=‘categorical_crossentropy’, metrics=[‘accuracy’])

logger = CSVLogger(‘logs.csv’, append=True)

his = model.fit([X_train1,X_train2],[y_train1, y_train2], validation_data=([X_test1, X_test2], [y_test1, y_test2]), epochs=80, batch_size=40)

scores = model.evaluate([X_test1, X_test2], [y_test1, y_test2])
print(“%s: %.2f%%” % (model.metrics_names[1], scores[1] * 100))

model.save(“model.h5”)
print(“Saved model to disk”)

Laxma_Reddy_Patlolla · March 31, 2023, 4:43am

Use the concatenate function from NumPy to combine the two inputs (datasets) into a single array.


# concatenate the two datasets
network_data = pd.concat([network_data1, network_data2], ignore_index=True)

# separate the input features and labels
X = network_data.drop('Label', axis=1)
y = network_data['Label']

# split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create the first model
model1 = Model(inputs=input1, outputs=output1)

# train the first model
model1.compile()
model1.fit()

# create the second model
model2 = Model(inputs=input2, outputs=output2)

# train the second model
model2.compile()
model2.fit()

# concatenate the output of the two models
concatenated = concatenate([model1.output, model2.output])

# create the common model
output3 = Dense()(concatenated)
model = Model(inputs=[model1.input, model2.input], outputs=output3)

# compile the common model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# train the common model
model.fit([X_train, X_train], epochs=10, batch_size=32, validation_data=([X_test, X_test]))

This might resolve your issue.

Please let us know if it helps you.

Thanks.

Hamza_KHEDDAR · March 31, 2023, 2:43pm

Thank you so much for your help. but,
I do not want to concatenate two datasets at the beginning, because each dataset will be treated differently (2 different models)