My tensorflow model not update loss, please can someone help me?

i was writer one tensorflow yolo, But my loss has not been updated. Can someone check it for me?

my all code github link :

my optimizer update code:
if ni <= nw:

            xi = [0,nw]

           

            # ?奇怪,为啥要获取这个?

            # 这个东西大概意思是求nbs/tbs 的第ni个线性变化值,最小必须是1,然后会做取整操作

            # 累计?累计了什么?

            accumulate = max(1,np.interp(ni, xi, [1, nbs / total_batch_size]).round())

            # 这是在干嘛?

            # for j, x in enumerate(opt):

            #通过_set_hyper调整lr和mom

           

            # optimizer._set_hyper("learning_rate", np.interp(ni, xi, 0.0, hyp['lr0'] * lf(epoch)))

            optimizer._set_hyper("momentum", np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]))

        # 是否要做缩放

        if opt.multi_scale:

            sz = random.randrange(imgsz * 0.5, imgsz * 1.5 * gs) // gs * gs

            sf = sz / max(imgs.shape[2:])  # scale factor

            if sf != 1:

                ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]# new shape (stretched to gs-multiple)

                imgs = tf.image.resize(imgs, ns ,  tf.image.ResizeMethod.BILINEAR,False)

           

        #Forward

        with tf.GradientTape() as gt:

         

            pred = model(imgs)

                           

            loss, loss_items = compute_loss(pred,targets,model)

           

            print(loss_items,end='')

            print(loss)

            if rank != -1 :

            # gradient averaged between devices in DDP mode

            # ddp模式下需要配置设备间的梯度平均值

                loss *= opt.world_size

           

            grads = gt.gradient(loss,model.trainable_variables)

            optimizer.apply_gradients((grad, var) for (grad, var) in zip(grads,model.trainable_variables) if grad is not None) # 优化函数应用梯度进行优化

            # optimizer.apply_gradients(zip(grads,model.trainable_variables)) # 不要使用trainable_variables**strong text**

my trian code

for epoch in range(start_epoch,epochs):

    # 更新图片权重

    if opt.image_weights:

        # 生成索引

        if rank in [-1, 0]:

            cw = model.class_widths * (1 - maps) ** 2 /nc # 类权重(class weights)

            iw = model.class_widths * (1 - maps) ** 2 /nc # 图片权重(image weights)

            dataset.indices = random.choice(range(dataset.n),weights=id, k = dataset.n)

        #如果是ddp模式(ddp大概是分布式训练?),就需要配置广播

        # if rank != -1 :

            # indices = np.array[dataset.indices] if rank == 0 else np.zeros(dataset.n,dtype=np.int8)

   

    mloss = np.zeros(5)    

    logger.info(('\n' + '%10s' * 9) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'landmark', 'total', 'targets', 'img_size'))

    # dataset len

 

    pbar = range(nb)

    if(rank in [-1,0]):

        pbar = tqdm(pbar,total=nb)

           

    for i  in pbar:

        # number integrated batches (since train start)

        # 这个参大概意思是本次训练累计使用了多少图片了

        ni = i + nb * epoch

       

        # 通过批次数读取一次epochs所需要使用到的数据

        (imgs, targets, paths)=[],[],[]

       

        batch_index = 0

       

        for imgi in range(i * batch_size , i * batch_size + batch_size):

                           

            # 下表从0开始,删一个

            if(imgi < dl ):

                img,target,path = dataset.__getitem__(imgi)

                # 在getitem里改图片的shape有点困难,还是在外面改吧

                if(opt.format=='NHWC'):

                    img = tf.transpose(img,perm=[1,2,0]).numpy()

               

                imgs.append(img)

                targets.append(target)

                paths.append(path)

               

        (imgs, targets, paths) =  dataset.collate_fn(imgs,targets,paths)

                               

        imgs = np.array(imgs,dtype=np.float32) / 255.0 #将图片从uint8的0-255转化为float32的0到1

       

        #预处理了图像一下

        if ni <= nw:

            xi = [0,nw]

           

            # ?奇怪,为啥要获取这个?

            # 这个东西大概意思是求nbs/tbs 的第ni个线性变化值,最小必须是1,然后会做取整操作

            # 累计?累计了什么?

            accumulate = max(1,np.interp(ni, xi, [1, nbs / total_batch_size]).round())

            # 这是在干嘛?

            # for j, x in enumerate(opt):

            #通过_set_hyper调整lr和mom

           

            # optimizer._set_hyper("learning_rate", np.interp(ni, xi, 0.0, hyp['lr0'] * lf(epoch)))

            optimizer._set_hyper("momentum", np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]))

        # 是否要做缩放

        if opt.multi_scale:

            sz = random.randrange(imgsz * 0.5, imgsz * 1.5 * gs) // gs * gs

            sf = sz / max(imgs.shape[2:])  # scale factor

            if sf != 1:

                ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]# new shape (stretched to gs-multiple)

                imgs = tf.image.resize(imgs, ns ,  tf.image.ResizeMethod.BILINEAR,False)

           

        #Forward

        with tf.GradientTape() as gt:

         

            pred = model(imgs)

                           

            loss, loss_items = compute_loss(pred,targets,model)

           

            print(loss_items,end='')

            print(loss)

            if rank != -1 :

            # gradient averaged between devices in DDP mode

            # ddp模式下需要配置设备间的梯度平均值

                loss *= opt.world_size

           

            grads = gt.gradient(loss,model.trainable_variables)

            optimizer.apply_gradients((grad, var) for (grad, var) in zip(grads,model.trainable_variables) if grad is not None) # 优化函数应用梯度进行优化

            # optimizer.apply_gradients(zip(grads,model.trainable_variables)) # 不要使用trainable_variables

       

        #?什么玩意

        # tensorflow的混合精度学习。。后面再琢磨,现在不想

       

        # scaler.scale(loss).backward()

                   

        # Optimize

        # if ni % accumulate == 0 :

            # scaler.step(optimizer)

            # scaler.update()

           

            # optimizer.zero_grad()

            # if ema :

            #     ema.update(model)

                   

        # Print

        if rank in [-1,0]:

            mloss = (mloss * i + loss_items) / (i+1) # 更新当前平均loss

           

            mem = '%.3G' % ( pynvml.nvmlDeviceGetMemoryInfo(handle).used  / 1E9 if useGpu else 0)

            s = ('%10s' * 2 + '%10.4g' * 7) % (

                '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1] if opt.format == 'NCHW' else imgs.shape[1])

            pbar.set_description(s)

        # Plot

            if plots and ni < 3:

                f = save_dir / f'train_batch{ni}.jpg'  # filename

                f = ''

                Thread(target=plot_images, args=(imgs, targets, paths, f, opt.format), daemon=True).start()

                # if tb_writer:

                #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)

                #     tb_writer.add_graph(model, imgs)  # add model to tensorboard

            elif plots and ni == 3 and wandb:

                wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]})

               

        # 单次训练完成,清理掉img,paths和 targets

        del imgs, targets, paths

        # # 跑完一次epoch记得gc叫出来一下,做个深度清理

        gc.collect()

   

model.save("mask_detector")