This code almost work to train AI to make music (Help please)

I am struggling with this for a week now.
If anyone can fix this so it works, you are a wizard in my eyes.

Lilypond error right now, but it really dont need it. Can remove generate music sheet if necessary.

I believe it has incremental learning code correct.

My main goals right now is:

  • Input midi file (This works and it loads)
  • Output midi filesProject file
  • Save the model so i can load it for incremental learning
import tensorflow as tf
import numpy as np
import pandas as pd
from collections import Counter
import random
import os
from IPython.display import Image
from IPython.display import display
import IPython
from IPython.display import Audio
from music21 import *
from music21.note import Note
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adamax
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import sys
import warnings


# Loading the list of Chopin's MIDI files as a stream
filepath = r"C:/Users/adria/OneDrive/Desktop/test/"
# Getting MIDI files
all_midis = []
for i in os.listdir(filepath):
    if i.endswith(".mid"):
        tr = filepath + i
        midi = converter.parse(tr)

# Helping function        
def extract_notes(file):
    notes = []
    pick = None
    for j in file:
        songs = instrument.partitionByInstrument(j)
        for part in
            pick = part.recurse()
            for element in pick:
                if isinstance(element, note.Note):
                elif isinstance(element, chord.Chord):
                    notes.append(".".join(str(n) for n in element.normalOrder))

    return notes

# Getting the list of notes as Corpus
Corpus = extract_notes(all_midis)
print("Total notes in all the Chopin MIDI files in the dataset:", len(Corpus))

print("First fifty values in the Corpus:", Corpus[:50])

# First, let's write some functions to look into the data

def show(music):

def chords_n_notes(Snippet):
    Melody = []
    offset = 0  # Incremental
    for i in Snippet:
        # If it is a chord
        if ("." in i or i.isdigit()):
            chord_notes = i.split(".")  # Separating the notes in the chord
            notes = [] 
            for j in chord_notes:
                inst_note = int(j)
                note_snip = note.Note(inst_note)            
            chord_snip = chord.Chord(notes)
            chord_snip.offset = offset
        # If it is a note
            note_snip = note.Note(i)
            note_snip.offset = offset
        # Increase offset each iteration so that notes do not stack
        offset += 1
    Melody_midi = stream.Stream(Melody)   
    return Melody_midi

Melody_Snippet = chords_n_notes(Corpus[:100])

# To play audio or corpus
print("Sample Audio From Data")

# Creating a count dictionary
count_num = Counter(Corpus)
print("Total unique notes in the Corpus:", len(count_num))

# Exploring the notes dictionary
Notes = list(count_num.keys())
Recurrence = list(count_num.values())

# Average recurrence for a note in Corpus
def Average(lst):
    return sum(lst) / len(lst)
print("Average recurrence for a note in Corpus:", Average(Recurrence))
print("Most frequent note in Corpus appeared:", max(Recurrence), "times")
print("Least frequent note in Corpus appeared:", min(Recurrence), "time")

# Plotting the distribution of Notes
plt.figure(figsize=(18, 3), facecolor="#97BACB")
bins = np.arange(0, (max(Recurrence)), 50) 
plt.hist(Recurrence, bins=bins, color="#97BACB")
plt.axvline(x=100, color="#DBACC1")
plt.title("Frequency Distribution Of Notes In The Corpus")
plt.xlabel("Frequency Of Chords in Corpus")
plt.ylabel("Number Of Chords")

# Getting a list of rare chords
rare_note = []
for index, (key, value) in enumerate(count_num.items()):
    if value < 100:
        m = key
print("Total number of notes that occur less than 100 times:", len(rare_note))

# Eliminating the rare notes
Corpus = [element for element in Corpus if element not in rare_note]

print("Length of Corpus after eliminating the rare notes:", len(Corpus))

# Storing all the unique characters present in my corpus to build a mapping dictionary
symb = sorted(list(set(Corpus)))

L_corpus = len(Corpus) # Length of corpus
L_symb = len(symb) # Length of total unique characters

# Building dictionary to access the vocabulary from indices and vice versa
mapping = dict((c, i) for i, c in enumerate(symb))
reverse_mapping = dict((i, c) for i, c in enumerate(symb))

print("Total number of characters:", L_corpus)
print("Number of unique characters:", L_symb)

# Splitting the Corpus into equal length strings and output targets
length = 40
features = []
targets = []
for i in range(0, L_corpus - length, 1):
    feature = Corpus[i:i + length]
    target = Corpus[i + length]
    features.append([mapping[j] for j in feature])
L_datapoints = len(targets)
print("Total number of sequences in the Corpus:", L_datapoints)

# Reshape X and normalize
X = (np.reshape(features, (L_datapoints, length, 1))) / float(L_symb)
# One hot encode the output variable
y = tf.keras.utils.to_categorical(targets) 

# Taking out a subset of data to be used as seed
X_train, X_seed, y_train, y_seed = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the Model
model = Sequential()
# Adding layers
model.add(LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dense(y.shape[1], activation='softmax'))
# Compiling the model for training  
opt = Adamax(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt)

# Model's Summary               

# Training the Model
history =, y_train, batch_size=256, epochs=200)

# Plotting the learnings 
history_df = pd.DataFrame(history.history)
fig = plt.figure(figsize=(15, 4), facecolor="#97BACB")
fig.suptitle("Learning Plot of Model for Loss")
plt.plot(history_df['loss'], label='Training Loss')

# Generating music using the seed
start = np.random.randint(0, len(X_seed)-1)
pattern = X_seed[start]
print("\"", ''.join([reverse_mapping[value] for value in pattern]), "\"")
# Generate 500 notes
generated_notes = []
for i in range(500):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(L_symb)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = reverse_mapping[index]
    pattern = pattern[1:len(pattern)]

print("\nGenerated Notes:")

# Converting generated notes into MIDI file
offset = 0
output_notes = []
for pattern in generated_notes:
    # If it is a chord
    if ('.' in pattern or pattern.isdigit()):
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
    # If it is a note
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
    # Increase offset each iteration so that notes do not stack
    offset += 0.5

midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp=r'C:\Users\adria\OneDrive\Desktop\test\generated_music.mid')

Hi @Adrian_XH, Could please provide the error you are getting while executing the code. If possible, please share the dataset in the shared drive. Thank You.

Hey, this dataset, just a handful before the program works.

Here is the whole output in the terminal:

Total notes in all the Chopin MIDI files in the dataset: 58
First fifty values in the Corpus: ['F#4', '1.4.8', 'C#3', 'E4', 'E4', '1.4.8', 'C#3', 'E4', '4.8.11', 'E3', 'E4', 'E4', '4.8.11', 'E4', 'E3', 'E-4', '11.3.6', 'B2', 'E-4', 'E4', '11.3.6', 'E4', '9.1.4', 'A2', 'C#5', 'B4', '9.1.4', 'G#4', 'A2', 'F#4', '1.4.8', 'C#3', 'E4', 'E4', '1.4.8', 'C#3', 'E4', '4.8.11', 'E3', 'E4', 'E4', '4.8.11', 'E4', 'E3', 'C#4', '11.3.6', 'B2', 'C#4', 'B3', '11.3.6']
GNU LilyPond 2.24.1 (running Guile 2.2)
Changing working directory to: `C:/Users/adria/AppData/Local/Temp/music21'
Processing `C:/Users/adria/AppData/Local/Temp/music21/'
C:/Users/adria/AppData/Local/Temp/music21/ error: unknown escaped string: `\RemoveEmptyStaffContext'    

C:/Users/adria/AppData/Local/Temp/music21/ error: syntax error, unexpected \override, expecting '='     

    \override VerticalAxisGroup #'remove-first = ##t
C:/Users/adria/AppData/Local/Temp/music21/ warning: deprecated: missing `.' in property path VerticalAxisGroup.remove-first
    \override VerticalAxisGroup
                                #'remove-first = ##t
C:/Users/adria/AppData/Local/Temp/music21/ error: syntax error, unexpected '}'

C:/Users/adria/AppData/Local/Temp/music21/ error: Unfinished main input

Interpreting music...[8]
Preprocessing graphical objects...
Calculating line breaks...
Drawing systems...
Converting to PNG...
fatal error: failed files: "C:\\Users\\adria\\AppData\\Local\\Temp\\music21\\"
Traceback (most recent call last):
  File "c:\Users\adria\OneDrive\Desktop\KERAS -", line 94, in <module>
  File "c:\Users\adria\OneDrive\Desktop\KERAS -", line 65, in show
    fp = str(music.write("lily.png"))
  File "C:\Users\adria\AppData\Local\Programs\Python\Python311\Lib\site-packages\music21\stream\", line 406, in write
    return super().write(fmt=fmt, fp=fp, **keywords)
  File "C:\Users\adria\AppData\Local\Programs\Python\Python311\Lib\site-packages\music21\", line 2886, in write      
    return formatWriter.write(self,
  File "C:\Users\adria\AppData\Local\Programs\Python\Python311\Lib\site-packages\music21\converter\", line 432, in write
    convertedFilePath = conv.createPNG(fp)
  File "C:\Users\adria\AppData\Local\Programs\Python\Python311\Lib\site-packages\music21\lily\", line 2511, in createPNG
    lilyFile = self.runThroughLily(backend='eps', format='png', fileName=fileName)
  File "C:\Users\adria\AppData\Local\Programs\Python\Python311\Lib\site-packages\music21\lily\", line 2467, in runThroughLily
    raise LilyTranslateException('cannot find ' + str(fileEnd)
music21.lily.translate.LilyTranslateException: cannot find or the full path C:\Users\adria\AppData\Local\Temp\music21\ original file was C:\Users\adria\AppData\Local\Temp\music21\
PS C:\Users\adria> 

I don’t need lilypond but not sure how to remove it without breaking the code now.

Hi @Adrian_XH, While executing your code in colab i found that while removing rare_note from the corpus the corpus has become empty and faced an error while converting those to categorical. I have made a few changes in the code it works fine. Please refer to this gist for working code example. Thank You