# Tensorflow Base and Keras Model Different Results

I have been trying to learn how to deal with tensorflow base to build ANN as opposed to just using the keras API. Tensorflow has a nice tutorial on getting started on this with the MNIST digits dataset
Multilayer perceptrons for digit recognition with Core APIs  |  TensorFlow Core. I wanted to change this up a little and modify it to do regression instead of classification.

Basically all the code is as follows

import tensorflow as tf
import numpy as np
from tensorflow import keras
import pdb

def xavier_init(shape):
# Computes the xavier initialization values for a weight matrix
in_dim, out_dim = shape
xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(in_dim + out_dim, tf.float32))
weight_vals = tf.random.uniform(shape=(in_dim, out_dim), minval=-xavier_lim, maxval=xavier_lim, seed=22)
return weight_vals

class DenseLayer(tf.Module):
def init(self,out_dim,weight_init=xavier_init,activation=tf.identity):
super().init()
self.out_dim=out_dim
self.activation=activation
self.built=False
self.weight_init=weight_init

``````def __call__(self,x):
if not self.built:
self.in_dim=x.shape
self.w=tf.Variable(xavier_init(shape=(self.in_dim,self.out_dim)))
self.b=tf.Variable(tf.zeros(shape=(self.out_dim,)))
self.built=True
return self.activation(z)
``````

class LinearLayer(tf.Module):
def init(self,out_dim,weight_init=xavier_init):
super().init()
self.out_dim=out_dim
self.built=False
self.weight_init=weight_init

``````def __call__(self,x):
if not self.built:
self.in_dim=x.shape
self.w=tf.Variable(xavier_init(shape=(self.in_dim,self.out_dim)))
self.b=tf.Variable(tf.zeros(shape=(self.out_dim,)))
self.built=True
return z
``````

class MLP_REG(tf.Module):
def init(self,layers):
self.layers=layers

``````@tf.function
def __call__(self,x,preds=False):
for layer in self.layers:
x=layer(x)
return x
``````

def mse_loss(ypred,y):
return tf.reduce_mean(tf.square(ypred-y))

def accuracy(ypred,y):
loss=tf.math.reduce_sum(tf.square(ypred-y))
return loss

``````def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):
# Initialize optimizer parameters and variable slots
super().__init__()
self.beta_1 = beta_1
self.beta_2 = beta_2
self.learning_rate = learning_rate
self.ep = ep
self.t = 1.
self.v_dvar, self.s_dvar = [], []
self.built = False

# Initialize variables on the first call
if not self.built:
for var in vars:
v = tf.Variable(tf.zeros(shape=var.shape))
s = tf.Variable(tf.zeros(shape=var.shape))
self.v_dvar.append(v)
self.s_dvar.append(s)
self.built = True
# Update the model variables given their gradients
for i, (d_var, var) in enumerate(zip(grads, vars)):
self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)
self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))
v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))
s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))
var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))
self.t += 1.
return
``````

def train_step(x_batch,y_batch,loss,acc,model,optimizer):
y_pred = model(x_batch)
batch_loss = loss(y_pred, y_batch)
batch_acc = acc(y_pred, y_batch)
return batch_loss, batch_acc

def val_step(x_batch, y_batch, loss, acc, model):
# Evaluate the model on given a batch of validation data
y_pred = model(x_batch)
batch_loss = loss(y_pred, y_batch)
batch_acc = acc(y_pred, y_batch)
return batch_loss, batch_acc

def train_model(mlp, train_data, val_data, loss, acc, optimizer, epochs):
# Initialize data structures
train_losses, train_accs = [], []
val_losses, val_accs = [], []
for epoch in range(epochs):
batch_losses_train, batch_accs_train = [], []
batch_losses_val, batch_accs_val = [], []
# Iterate over training

``````    for x_batch, y_batch in train_data:
# Compute gradients and update the model's parameters
batch_loss, batch_acc = train_step(x_batch, y_batch, loss, acc, mlp, optimizer)
# Keep track of batch-level training performance
batch_losses_train.append(batch_loss)
batch_accs_train.append(batch_acc)

# iterate of validation
for x_batch, y_batch in val_data:
batch_loss, batch_acc = val_step(x_batch, y_batch, loss, acc, mlp)
batch_losses_val.append(batch_loss)
batch_accs_val.append(batch_acc)

train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)
val_loss, val_acc = tf.reduce_mean(batch_losses_val), tf.reduce_mean(batch_accs_val)
train_losses.append(train_loss)
train_accs.append(train_acc)
val_losses.append(val_loss)
val_accs.append(val_acc)
print(f"Epoch: {epoch}")
print(f"Training loss: {train_loss:.3f}, Training accuracy: {train_acc:.3f}")
print(f"Validation loss: {val_loss:.3f}, Validation accuracy: {val_acc:.3f}")
return train_losses, train_accs, val_losses, val_accs
``````

for a dataset I used the sklearn diabetes dataset

import sklearn.datasets as ds

Anyways I use this to build a model with two hidden layers with 300 units and 150 units both with relu activation using and using mean squared error as loss.

I train this for 10 epochs and the loss for the training data is on the order of 7000

however, if I use Keras as follows

from tensorflow import keras
from tensflow.keras import layers

model=keras.Sequential([
keras.layers.Dense(300,activation=‘relu’),
keras.layers.Dense(150,activation=‘relu’),
keras.layers.Dense(1)])