In my ComfyUI project, tasks such as generating videos from images and generating videos from text are often performed, which can easily cause video memory overflow. Therefore, when I detect video memory overflow, I called an interface in the easy-use component of ComfyUI to clean up the video memory. As a result, an error occurred. Please help me analyze it: This is the error report: This is the relevant pseudo code:
if out of memory:
SystemResourceManager.clear_queue()
SystemResourceManager.clear_gpu()
@classmethod
def clear_gpu(cls):
try:
url = f"{cls.base_url}/api/easyuse/cleangpu"
req = urllib.request.Request(url, method="POST")
with urllib.request.urlopen(req, timeout=10) as response:
if response.status == 200:
import gc
gc.collect()
print("cleargpu success!")
else:
print(f"cleargpu failed: {response.status}")
except Exception as e:
print(f"cleargpu have error: {e}")
!!! Exception during processing !!! Allocation on device 0 would exceed allowed memory. (out of memory)
Currently allocated : 21.96 GiB
Requested : 416.62 MiB
Device limit : 23.50 GiB
Free (according to CUDA): 31.69 MiB
PyTorch limit (set by user-supplied memory fraction)
: 17179869184.00 GiB
Traceback (most recent call last):
File "/root/ComfyUI/execution.py", line 323, in execute
output_data, output_ui, has_subgraph = get_output_data(obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
File "/root/ComfyUI/execution.py", line 198, in get_output_data
return_values = _map_node_over_list(obj, input_data_all, obj.FUNCTION, allow_interrupt=True, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
File "/root/ComfyUI/execution.py", line 169, in _map_node_over_list
process_inputs(input_dict, i)
File "/root/ComfyUI/execution.py", line 158, in process_inputs
results.append(getattr(obj, func)(**inputs))
File "/root/ComfyUI/custom_nodes/ComfyUI-CogVideoXWrapper/nodes.py", line 844, in process
latents = pipeline["pipe"](
File "/usr/local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/root/ComfyUI/custom_nodes/ComfyUI-CogVideoXWrapper/pipeline_cogvideox.py", line 615, in __call__
noise_pred = self.transformer(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/diffusers/models/transformers/cogvideox_transformer_3d.py", line 429, in forward
hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/diffusers/models/embeddings.py", line 440, in forward
embeds = embeds + pos_embedding
torch.cuda.OutOfMemoryError: Allocation on device 0 would exceed allowed memory. (out of memory)
Currently allocated : 21.96 GiB
Requested : 416.62 MiB
Device limit : 23.50 GiB
Free (according to CUDA): 31.69 MiB
PyTorch limit (set by user-supplied memory fraction)
: 17179869184.00 GiB
Got an OOM, unloading all loaded models.
Exception in thread Thread-2 (prompt_worker):
Traceback (most recent call last):
File "/root/ComfyUI/execution.py", line 323, in execute
output_data, output_ui, has_subgraph = get_output_data(obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
File "/root/ComfyUI/execution.py", line 198, in get_output_data
return_values = _map_node_over_list(obj, input_data_all, obj.FUNCTION, allow_interrupt=True, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb)
File "/root/ComfyUI/execution.py", line 169, in _map_node_over_list
process_inputs(input_dict, i)
File "/root/ComfyUI/execution.py", line 158, in process_inputs
results.append(getattr(obj, func)(**inputs))
File "/root/ComfyUI/custom_nodes/ComfyUI-CogVideoXWrapper/nodes.py", line 844, in process
latents = pipeline["pipe"](
File "/usr/local/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/root/ComfyUI/custom_nodes/ComfyUI-CogVideoXWrapper/pipeline_cogvideox.py", line 615, in __call__
noise_pred = self.transformer(
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/diffusers/models/transformers/cogvideox_transformer_3d.py", line 429, in forward
hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/site-packages/diffusers/models/embeddings.py", line 440, in forward
embeds = embeds + pos_embedding
torch.cuda.OutOfMemoryError: Allocation on device 0 would exceed allowed memory. (out of memory)
Currently allocated : 21.96 GiB
Requested : 416.62 MiB
Device limit : 23.50 GiB
Free (according to CUDA): 31.69 MiB
PyTorch limit (set by user-supplied memory fraction)
: 17179869184.00 GiB
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/threading.py", line 1009, in _bootstrap_inner
self.run()
File "/usr/local/lib/python3.10/threading.py", line 946, in run
self._target(*self._args, **self._kwargs)
File "/root/ComfyUI/main.py", line 126, in prompt_worker
e.execute(item[2], prompt_id, item[3], item[4])
File "/root/ComfyUI/execution.py", line 503, in execute
result, error, ex = execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results)
File "/root/ComfyUI/execution.py", line 407, in execute
comfy.model_management.unload_all_models()
File "/root/ComfyUI/comfy/model_management.py", line 1100, in unload_all_models
free_memory(1e30, get_torch_device())
File "/root/ComfyUI/comfy/model_management.py", line 441, in free_memory
if current_loaded_models[i].model_unload(memory_to_free):
File "/root/ComfyUI/comfy/model_management.py", line 347, in model_unload
self.model.unpatch_model(self.model.offload_device, unpatch_weights=unpatch_weights)
File "/root/ComfyUI/comfy/model_patcher.py", line 459, in unpatch_model
self.model.to(device_to)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1160, in to
return self._apply(convert)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 810, in _apply
module._apply(fn)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 810, in _apply
module._apply(fn)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 810, in _apply
module._apply(fn)
[Previous line repeated 2 more times]
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 833, in _apply
param_applied = fn(param)
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1158, in convert
return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
RuntimeError: CUDA error: invalid argument
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
I don't know if it is caused by the cleaning function of this component. At present, I am also reading the relevant code. The address of this component is:.git
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1742312467a4420170.html
评论列表(0条)