How to make neural network run faster?

# Model parameters
β = 0.994     # Discount factor 
ν = 1         # Frisch elasticity
ψ = 1         # Normalization parameter for hours worked
θ = 0.75      # Calvo parameter
ϵ = 6        # Implies a steady-state markup of 20%
ϕ_pie = 1.5   # Taylor rule parameter
ϕ_y = 0.25    # Taylor rule parameter
Πss = 1.005     # Inlfation target
s_g = 0.2     # Share of government expenditures in output
ρ_b = 0.8     # Discount factor shock persistence 
σ_b = 0.025  # Standard deviation of discount factor (preference) shock
Ass = 1         # Steady state of productivity shock
ρ_a = 0.9     # Persistence of TFP shock
σ_a = 0.025  # Standard deviation of roductivity shock
σ_m = 0.025  # Standard deviation of MP shock
ρ_g = 0.8     # Government expenditure shock
σ_g = 0.025  # Standard deviation of government expenditure shock

#Steady state
Rss = Πss/β
Pie_starss = ((1 - θ*Πss**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
vss = (1 - θ)/(1 - θ*Πss**ϵ)*Pie_starss**(-ϵ)
g_y = s_g
c_y = (1 - s_g)
x_2ss = 1/c_y*Pie_starss/(1 - β*θ*Πss**(ϵ-1))
x_1ss = (ϵ-1)/ϵ*x_2ss
mcss = (1 - s_g)*(1 - β*θ*Πss**(ϵ))*x_1ss
wss = mcss
l_y = vss
l_c = l_y*1/c_y
lss = (wss/ψ*l_c)**(1/(1 + ν))
yss = lss/l_y
css = c_y*yss

Hi @Vahagn_Davtyan

Welcome to the TensorFlow Forum!

Which TF/Keras model you are using, what is the dataset type and shape according to which the hyperparamters can be tuned by understanding the model objective. Please share minimal reproducible code to replicate and understand the issue. Thank you.

Sorry, here is the full code

import tensorflow as tf
from tensorflow.keras import regularizers, initializers  # Import the regularizers module

import numpy as np
#from tensorflow.keras.layers import Input, Dense
#from tensorflow.keras.models import Model
from math import sqrt
from matplotlib import pyplot as plt
from tqdm import tqdm as tqdm         # tqdm is a nice library to visualize ongoing loops
import datetime
# followint lines are used for indicative typing
from typing import Tuple
class Vector: pass

# Model parameters
β = 0.994     # Discount factor 
ν = 1         # Frisch elasticity
ψ = 1         # Normalization parameter for hours worked
θ = 0.75      # Calvo parameter
ϵ = 6         # Implies a steady-state markup of 20%
ϕ_pie = 1.5   # Taylor rule parameter
ϕ_y = 0.25    # Taylor rule parameter
Πss = 1.005   # Inlfation target
s_g = 0.2     # Share of government expenditures in output
ρ_b = 0.8     # Discount factor shock persistence 
σ_b = 0.02    # Standard deviation of discount factor (preference) shock
Ass = 1       # Steady state of productivity shock
ρ_a = 0.9     # Persistence of TFP shock
σ_a = 0.025   # Standard deviation of roductivity shock
σ_m = 0.01    # Standard deviation of MP shock
ρ_g = 0.8     # Government expenditure shock
σ_g = 0.02    # Standard deviation of government expenditure shock

#Steady state
Rss = Πss/β
Pie_starss = ((1 - θ*Πss**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
vss = (1 - θ)/(1 - θ*Πss**ϵ)*Pie_starss**(-ϵ)
g_y = s_g
c_y = (1 - s_g)
x_2ss = 1/c_y*Pie_starss/(1 - β*θ*Πss**(ϵ-1))
x_1ss = (ϵ-1)/ϵ*x_2ss
mcss = (1 - s_g)*(1 - β*θ*Πss**(ϵ))*x_1ss
wss = mcss
l_y = vss
l_c = l_y*1/c_y
lss = (wss/ψ*l_c)**(1/(1 + ν))
yss = lss/l_y
css = c_y*yss
# Standard deviations for ergodic distributions of exogenous state variables
σ_e_b = σ_b/(1-ρ_b**2)**0.5
σ_e_a = σ_a/(1-ρ_a**2)**0.5
σ_e_m = σ_m
σ_e_g = σ_g/(1-ρ_g**2)**0.5

# bounds for endogenous state variable
vmin = 1
vmax = 1.1
# construction of neural network
layers = [
    tf.keras.layers.Dense(32, activation='tanh', input_dim=5, bias_initializer='he_uniform',kernel_initializer=initializers.GlorotUniform()),
    tf.keras.layers.Dense(32, activation='tanh'),
    tf.keras.layers.Dense(32, activation='tanh'),
    tf.keras.layers.Dense(3, activation=tf.keras.activations.linear)
]
model = tf.keras.Sequential(layers)
print(model.summary())
def dr(z: Vector, a: Vector, m: Vector, g: Vector, v: Vector)-> Tuple[Vector, Vector, Vector]:

    # we normalize exogenous state variables by their 2 standard deviations 
    # so that they are typically between -1 and 1
    z = z/σ_e_b/2
    a = a/σ_e_a/2
    m = m/σ_e_a/2
    g = g/σ_e_g/2
    # we normalze income to be between -1 and 1
    vmin = tf.math.reduce_min(v)
    vmax = tf.math.reduce_max(v)
    if vmax - vmin==0:
        v = v
    else:
        v = (v-vmin)/(vmax-vmin)*2.0-1.0
        
        
    
    

    # we prepare input to the perceptron
    s = tf.concat([_e[:,None] for _e in [z,a,m,g, v]], axis=1) # equivalent to np.column_stack
    
    #s = tf.concat([ζ[:, None], a[:, None], m[:, None], g[:, None], v[:, None]], axis=1)
    x = model(s) # n x 3 matrix 

    # Consumption is always positive
    muc = tf.exp( x[:,0] )
    
    # We restrict  inflation to be between -5.9 and 5.9 
    pie = 5.9 * tf.tanh(x[:, 1])     
    
    
    # No restriction on auxulary variable
    x_1 = tf.exp(x[:,2])
    
   
    return (muc, pie, x_1)
def Residuals(e_b: Vector, e_a: Vector, e_m: Vector, e_g: Vector, z: Vector, a: Vector, m: Vector, g: Vector, v: Vector):

    # all inputs are expected to have the same size n
    n = tf.size(a)

    # arguments correspond to the values of the states today
    muc, pie, x_1 = dr(z, a, m, g, v)
    pie = 1 + pie/100
    c = tf.exp(z)/muc
    x_2 = ϵ/(ϵ-1)*x_1
    y = c + s_g*yss*tf.exp(g)
    pie_star = ((1 - θ*pie**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
    
    vnext = θ*pie**ϵ*v + (1 - θ)*pie_star**(-ϵ)
    l = vnext/tf.exp(a)*y
    w = ψ*l**ν*c
    mc = w/tf.exp(a)
    R = tf.maximum(Rss/(Πss**ϕ_pie*yss**ϕ_y)*pie**ϕ_pie*y**ϕ_y*tf.exp(m),1)
    
    

    # transitions of the exogenous processes
    anext = a*ρ_a + e_a
    znext = z*ρ_b + e_b
    mnext = 0*m + e_m
    gnext = g*ρ_g  + e_g
    # (epsilon = (rnext, δnext, pnext, qnext))
    mucnext, pienext, x_1next = dr(znext, anext, mnext, gnext, vnext)
    pienext = 1 + pienext/100 
    cnext = tf.exp(znext)/mucnext
    x_2next = ϵ/(ϵ-1)*x_1next
    pie_starnext = ((1 - θ*pienext**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
    
    R1 = β*mucnext*R/pienext/muc - 1
    R2 = (muc*mc*y + θ*β*pienext**ϵ*x_1next)/x_1 - 1
    R3 = (pie_star*(y*muc + θ*β*pienext**(ϵ-1)/pie_starnext*x_2next))/x_1 - ϵ/(ϵ - 1)


    return (R1, R2, R3)

@tf.function
def Objective(n,z,a,m,g,v): # objective function for DL training
    # randomly drawing 1st realization for shocks
    e1_b = tf.random.normal(shape=(n,), stddev=σ_b)
    e1_a = tf.random.normal(shape=(n,), stddev=σ_a)
    e1_m = tf.random.normal(shape=(n,), stddev=σ_m)
    e1_g = tf.random.normal(shape=(n,), stddev=σ_g)

    # randomly drawing 2nd realization for shocks
    e2_b = tf.random.normal(shape=(n,), stddev=σ_b)
    e2_a = tf.random.normal(shape=(n,), stddev=σ_a)
    e2_m = tf.random.normal(shape=(n,), stddev=σ_m)
    e2_g = tf.random.normal(shape=(n,), stddev=σ_g)
    # residuals for n random grid points under 2 realizations of shocks
    R1_e1, R2_e1, R3_e1 = Residuals(e1_b, e1_a, e1_m, e1_g, z, a, m, g, v)
    R1_e2, R2_e2, R3_e2 = Residuals(e2_b, e2_a, e2_m, e2_g, z, a, m, g, v)
    
    # Manually set the target value for the specific input z=0, k=Kss
    z_specific = tf.constant([0.0]) 
    a_specific = tf.constant([0.0])
    m_specific = tf.constant([0.0])
    g_specific = tf.constant([0.0])
    v_specific = tf.constant([vss])
    
    muc_specific_predicted, pie_specific_predicted, x_1_specific_predicted = dr(z_specific, a_specific, m_specific, g_specific, v_specific)
    pie_specific_predicted = 1 + pie_specific_predicted/100
    muc_specific_target = 1/css
    pie_specific_target = Πss
    x_1_specific_target = x_1ss

    # construct all-in-one expectation operator
    R_squared = R1_e1*R1_e2 + R2_e1*R2_e2 + R3_e1*R3_e2 
    mse_muc = tf.keras.losses.mean_squared_error(muc_specific_target, muc_specific_predicted)
    mse_pie = tf.keras.losses.mean_squared_error(pie_specific_target, pie_specific_predicted)
    mse_x1 = tf.keras.losses.mean_squared_error(x_1_specific_target, x_1_specific_predicted)
    error =  tf.reduce_mean(R_squared) +  (mse_muc + mse_pie + mse_x1)
    # compute average across n random draws
    return    error
@tf.function
def training_step(n,z,a,m,g,v):
    with tf.GradientTape() as tape:
        xx = Objective(n,z,a,m,g,v)

    grads = tape.gradient(xx, model.trainable_variables)
    optimizer.apply_gradients(zip(grads,model.trainable_variables))
    grads_norm = tf.linalg.global_norm(grads)

    return xx,grads_norm
def train_me(n, periods, K, threshold):
    v = tf.random.uniform(shape=(n,), minval=vmin, maxval=vmin)

    vals = []
    for k in tqdm(range(K)):
        z = tf.random.normal(shape=(n, periods), stddev=σ_e_b)
        a = tf.random.normal(shape=(n, periods), stddev=σ_e_a)
        m = tf.random.normal(shape=(n, periods), stddev=σ_e_m)
        g = tf.random.normal(shape=(n, periods), stddev=σ_e_g)

        for t in range(periods):
            muc, pie, x_1 = dr(z[:, t], a[:, t], m[:, t], g[:, t], v)
            pie = 1.0 + pie / 100.0
            pie_star = ((1 - θ * tf.pow(pie, ϵ - 1)) / (1 - θ))**(1 / (1 - ϵ))

            vnext = θ * tf.pow(pie, ϵ) * v + (1 - θ) * tf.pow(pie_star, -ϵ)
            # transition of endogenous states
            v = vnext

        z_final = z[:,periods-1]
        a_final = a[:,periods-1]
        m_final = m[:,periods-1]
        g_final = g[:,periods-1]
        v_final = v

        initial_weights = model.get_weights()
        val, grads_norm = training_step(n, z_final, a_final, m_final, g_final, v_final)
        vals.append(val)
        final_weights = model.get_weights()
        weight_difference = np.sum([(np.linalg.norm(w - w_initial))**2 for w, w_initial in zip(final_weights, initial_weights)])**0.5

        if grads_norm < threshold or weight_difference < 1e-6 or weight_difference == np.nan:
            print(f"Stopping training. Gradient Norm ({grads_norm}) is below the threshold.")
            break

    print("L2 Norm between existing and new weights:", weight_difference)
    return vals, z, a, m, g, v

optimizer = tf.keras.optimizers.Adam()
n = 500
periods = 10
threshold = 1e-5

K = 50000
results, z, a, m, g, v = train_me(n,periods,K,threshold)

discobot
discobot
discobot

can you clarify what does it mean?

As you have larger computation, you need to use GPU to perform the computation faster. Please make sure you have enabled GPU with Tensorflow in your system or You can follow this link TF install for the TensorFlow GPU setup as per you system OS by checking the Hardware/Software requirements.

Along with that you can try with less number of epochs for model training, using ‘relu’ activation function and less number of units in Model layers to expedite the model training.

I have replicated the mentioned code in Google Colab in GPU mode and it has completed the computation in around 10 mins. (Attaching the replicated gist for your reference.)