How could i resolve the ResourceExhaustedError working on CycleGAN for Fashion MNIST Dataset

ResourceExhaustedError Traceback (most recent call last)
in <cell line: 1>()
1 for epoch in range(1, EPOCHS+1):
2 for image_x, image_y in tf.data.Dataset.zip((tr1, tr2)):
----> 3 train_step(image_x, image_y)
4 generate_images(generator_g, sample_tr1, generator_f, sample_tr2)
5 ckpt_save_path = ckpt_manager.save()

1 frames
/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
50 try:
51 ctx.ensure_initialized()
—> 52 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
53 inputs, attrs, num_outputs)
54 except core._NotOkStatusException as e:

ResourceExhaustedError: Graph execution error:

Detected at node ‘model/concatenate/concat_13’ defined at (most recent call last):
File “/usr/lib/python3.10/runpy.py”, line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File “/usr/lib/python3.10/runpy.py”, line 86, in _run_code
exec(code, run_globals)
File “/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py”, line 16, in
app.launch_new_instance()
File “/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py”, line 992, in launch_instance
app.start()
File “/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py”, line 619, in start
self.io_loop.start()
File “/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py”, line 215, in start
self.asyncio_loop.run_forever()
File “/usr/lib/python3.10/asyncio/base_events.py”, line 603, in run_forever
self._run_once()
File “/usr/lib/python3.10/asyncio/base_events.py”, line 1909, in _run_once
handle._run()
File “/usr/lib/python3.10/asyncio/events.py”, line 80, in _run
self._context.run(self._callback, *self._args)
File “/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py”, line 687, in
lambda f: self._run_callback(functools.partial(callback, future))
File “/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py”, line 740, in _run_callback
ret = callback()
File “/usr/local/lib/python3.10/dist-packages/tornado/gen.py”, line 821, in inner
self.ctx_run(self.run)
File “/usr/local/lib/python3.10/dist-packages/tornado/gen.py”, line 782, in run
yielded = self.gen.send(value)
File “/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py”, line 361, in process_one
yield gen.maybe_future(dispatch(*args))
File “/usr/local/lib/python3.10/dist-packages/tornado/gen.py”, line 234, in wrapper
yielded = ctx_run(next, result)
File “/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py”, line 261, in dispatch_shell
yield gen.maybe_future(handler(stream, idents, msg))
File “/usr/local/lib/python3.10/dist-packages/tornado/gen.py”, line 234, in wrapper
yielded = ctx_run(next, result)
File “/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py”, line 539, in execute_request
self.do_execute(
File “/usr/local/lib/python3.10/dist-packages/tornado/gen.py”, line 234, in wrapper
yielded = ctx_run(next, result)
File “/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py”, line 302, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File “/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py”, line 539, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py”, line 2975, in run_cell
result = self._run_cell(
File “/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py”, line 3030, in _run_cell
return runner(coro)
File “/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py”, line 78, in pseudo_sync_runner
coro.send(None)
File “/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py”, line 3257, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File “/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py”, line 3473, in run_ast_nodes
if (await self.run_code(code, result, async
=asy)):
File “/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py”, line 3553, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File “”, line 3, in <cell line: 1>
train_step(image_x, image_y)
File “”, line 12, in train_step
cycled_y = generator_g(fake_x, training=True) # code to generate images using Generator_g on fake_x
File “/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/engine/training.py”, line 558, in call
return super().call(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/engine/base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/engine/functional.py”, line 512, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File “/usr/local/lib/python3.10/dist-packages/keras/engine/functional.py”, line 669, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/engine/base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “/usr/local/lib/python3.10/dist-packages/keras/layers/merging/base_merge.py”, line 196, in call
return self._merge_function(inputs)
File “/usr/local/lib/python3.10/dist-packages/keras/layers/merging/concatenate.py”, line 134, in _merge_function
return backend.concatenate(inputs, axis=self.axis)
File “/usr/local/lib/python3.10/dist-packages/keras/backend.py”, line 3581, in concatenate
return tf.concat([to_dense(x) for x in tensors], axis)
Node: ‘model/concatenate/concat_13’
OOM when allocating tensor with shape[32,128,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node model/concatenate/concat_13}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn’t available when running in Eager mode.
[Op:__inference_train_step_39138]

@rupak_shah,

Welcome to the Tensorflow Forum!

This error could be caused many reasons such as insufficient RAM, running out of GPU memory or a bug in the code that causes memory leak.

Can you try reducing the batch size to a smaller value that fits in GPU memory?

Thank you!

1 Like