Distribute on GPU data creation of random variable

I try to generate independent random variable with tensorflow distributed code on different GPU.
I use the split method on the random generator to generate n subgenerators for my n GPU. Then i’d like to run some code distributed with each GPU using its own subgenerator.
It is a tack commonly achieved on software using MPI but i could not find any way to achieve with TF.
Here is an example on code not working trying to distribute the work on 2 GPU.

import numpy as np
import tensorflow as tf
import time
import sys, os
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--n_gpus', type=int, default=1)
args = parser.parse_args()
n_gpus = args.n_gpus

device_type = "GPU"
devices = tf.config.experimental.list_physical_devices(
          device_type)
devices_names = [d.name.split('e:')[1] for d in devices]

strategy = tf.distribute.MirroredStrategy( devices=devices_names[:n_gpus])


with strategy.scope():   
    optimizerControl= tf.keras.optimizers.Adam(learning_rate = 1e-3)
    modelControl = tf.keras.Sequential([tf.keras.layers.Dense(8, activation = tf.nn.relu),
                                        tf.keras.layers.Dense(1 )])
    
@tf.function
def cal( locGen, nbSimul, modelControl):
    x= locGen.normal( [nbSimul])
    return tf.reduce_sum(tf.square(modelControl(tf.expand_dims(x, axis=-1))[:,0]-tf.square(x)))

def train_step(newGen,  nbSimul, modelControl, optimizerControl):
    i = tf.distribute.get_replica_context().replica_id_in_sync_group
    print("Devic run", i)
    with tf.GradientTape() as tape:
         loss = cal( newGen[i], nbSimul, modelControl)
    gradients = tape.gradient(loss, modelControl.trainable_variables)
    optimizerControl.apply_gradients(zip(gradients, modelControl.trainable_variables))
    return loss
    

def distributed_train_step(newGen,nbSimul, modelControl, optimizerControl):
  per_replica_losses = strategy.run(train_step, args=(newGen,int(nbSimul/n_gpus), modelControl, optimizerControl,))
  return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses,
                         axis=None)/nbSimul
                         
gen = tf.random.Generator.from_seed(1)
newGen = gen.split(n_gpus)
batchSize=10
for epoch in range(10):
    valTest = distributed_train_step(newGen,batchSize,modelControl,optimizerControl)

If someone knows if it is possible.
Thank you.