lisonallen commited on
Commit
cdbfba8
·
1 Parent(s): 0f1d758

修复Stateless GPU环境中CUDA初始化问题

Browse files
Files changed (2) hide show
  1. app.py +73 -24
  2. diffusers_helper/memory.py +53 -14
app.py CHANGED
@@ -30,30 +30,46 @@ from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode
30
  from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
31
  from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
32
  from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
33
- from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete
34
  from diffusers_helper.thread_utils import AsyncStream, async_run
35
  from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
36
  from transformers import SiglipImageProcessor, SiglipVisionModel
37
  from diffusers_helper.clip_vision import hf_clip_vision_encode
38
  from diffusers_helper.bucket_tools import find_nearest_bucket
39
 
40
- # 获取可用的CUDA内存
41
- try:
42
- if torch.cuda.is_available():
43
- free_mem_gb = get_cuda_free_memory_gb(gpu)
44
- print(f'Free VRAM {free_mem_gb} GB')
45
- else:
 
 
 
 
 
 
 
 
46
  free_mem_gb = 6.0 # 默认值
47
- print("CUDA不可用,使用默认的内存设置")
48
- except Exception as e:
49
- free_mem_gb = 6.0 # 默认值
50
- print(f"获取CUDA内存时出错: {e},使用默认的内存设置")
 
 
 
 
 
 
51
 
52
- high_vram = free_mem_gb > 60
53
- print(f'High-VRAM Mode: {high_vram}')
54
 
55
  # 使用加载模型的函数
56
  def load_models():
 
 
57
  print("开始加载模型...")
58
 
59
  # 加载模型
@@ -93,7 +109,7 @@ def load_models():
93
  image_encoder.requires_grad_(False)
94
  transformer.requires_grad_(False)
95
 
96
- if torch.cuda.is_available() and gpu.type == 'cuda':
97
  if not high_vram:
98
  # DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster
99
  DynamicSwapInstaller.install_model(transformer, device=gpu)
@@ -105,28 +121,61 @@ def load_models():
105
  vae.to(gpu)
106
  transformer.to(gpu)
107
 
108
- return text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  # 使用Hugging Face Spaces GPU装饰器
111
  if IN_HF_SPACE and 'spaces' in globals():
112
  @spaces.GPU
113
- def load_models_with_gpu():
 
114
  return load_models()
 
 
 
 
 
 
115
 
116
- print("使用@spaces.GPU装饰器加载模型")
117
- text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer = load_models_with_gpu()
118
- else:
119
- print("不使用@spaces.GPU装饰器,直接加载模型")
120
- text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer = load_models()
 
 
 
 
121
 
122
- stream = AsyncStream()
123
 
124
- outputs_folder = './outputs/'
125
- os.makedirs(outputs_folder, exist_ok=True)
126
 
127
 
128
  @torch.no_grad()
129
  def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
 
 
 
 
 
 
 
 
 
 
 
130
  total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
131
  total_latent_sections = int(max(round(total_latent_sections), 1))
132
 
 
30
  from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
31
  from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
32
  from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
33
+ from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete, IN_HF_SPACE as MEMORY_IN_HF_SPACE
34
  from diffusers_helper.thread_utils import AsyncStream, async_run
35
  from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
36
  from transformers import SiglipImageProcessor, SiglipVisionModel
37
  from diffusers_helper.clip_vision import hf_clip_vision_encode
38
  from diffusers_helper.bucket_tools import find_nearest_bucket
39
 
40
+ outputs_folder = './outputs/'
41
+ os.makedirs(outputs_folder, exist_ok=True)
42
+
43
+ # 在Spaces环境中,我们延迟所有CUDA操作
44
+ if not IN_HF_SPACE:
45
+ # 仅在非Spaces环境中获取CUDA内存
46
+ try:
47
+ if torch.cuda.is_available():
48
+ free_mem_gb = get_cuda_free_memory_gb(gpu)
49
+ print(f'Free VRAM {free_mem_gb} GB')
50
+ else:
51
+ free_mem_gb = 6.0 # 默认值
52
+ print("CUDA不可用,使用默认的内存设置")
53
+ except Exception as e:
54
  free_mem_gb = 6.0 # 默认值
55
+ print(f"获取CUDA内存时出错: {e},使用默认的内存设置")
56
+
57
+ high_vram = free_mem_gb > 60
58
+ print(f'High-VRAM Mode: {high_vram}')
59
+ else:
60
+ # 在Spaces环境中使用默认值
61
+ print("在Spaces环境中使用默认内存设置")
62
+ free_mem_gb = 60.0 # 默认在Spaces中使用较高的值
63
+ high_vram = True
64
+ print(f'High-VRAM Mode: {high_vram}')
65
 
66
+ # 使用models变量存储全局模型引用
67
+ models = {}
68
 
69
  # 使用加载模型的函数
70
  def load_models():
71
+ global models
72
+
73
  print("开始加载模型...")
74
 
75
  # 加载模型
 
109
  image_encoder.requires_grad_(False)
110
  transformer.requires_grad_(False)
111
 
112
+ if torch.cuda.is_available():
113
  if not high_vram:
114
  # DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster
115
  DynamicSwapInstaller.install_model(transformer, device=gpu)
 
121
  vae.to(gpu)
122
  transformer.to(gpu)
123
 
124
+ # 保存到全局变量
125
+ models = {
126
+ 'text_encoder': text_encoder,
127
+ 'text_encoder_2': text_encoder_2,
128
+ 'tokenizer': tokenizer,
129
+ 'tokenizer_2': tokenizer_2,
130
+ 'vae': vae,
131
+ 'feature_extractor': feature_extractor,
132
+ 'image_encoder': image_encoder,
133
+ 'transformer': transformer
134
+ }
135
+
136
+ return models
137
+
138
 
139
  # 使用Hugging Face Spaces GPU装饰器
140
  if IN_HF_SPACE and 'spaces' in globals():
141
  @spaces.GPU
142
+ def initialize_models():
143
+ """在@spaces.GPU装饰器内初始化模型"""
144
  return load_models()
145
+
146
+
147
+ # 以下函数内部会延迟获取模型
148
+ def get_models():
149
+ """获取模型,如果尚未加载则加载模型"""
150
+ global models
151
 
152
+ if not models:
153
+ if IN_HF_SPACE and 'spaces' in globals():
154
+ print("使用@spaces.GPU装饰器加载模型")
155
+ models = initialize_models()
156
+ else:
157
+ print("直接加载模型")
158
+ load_models()
159
+
160
+ return models
161
 
 
162
 
163
+ stream = AsyncStream()
 
164
 
165
 
166
  @torch.no_grad()
167
  def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
168
+ # 获取模型
169
+ models = get_models()
170
+ text_encoder = models['text_encoder']
171
+ text_encoder_2 = models['text_encoder_2']
172
+ tokenizer = models['tokenizer']
173
+ tokenizer_2 = models['tokenizer_2']
174
+ vae = models['vae']
175
+ feature_extractor = models['feature_extractor']
176
+ image_encoder = models['image_encoder']
177
+ transformer = models['transformer']
178
+
179
  total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
180
  total_latent_sections = int(max(round(total_latent_sections), 1))
181
 
diffusers_helper/memory.py CHANGED
@@ -10,17 +10,26 @@ IN_HF_SPACE = os.environ.get('SPACE_ID') is not None
10
  # 设置CPU设备
11
  cpu = torch.device('cpu')
12
 
13
- # 尝试设置GPU设备,如果不可用则回退到CPU
14
- try:
15
- if torch.cuda.is_available():
16
- gpu = torch.device(f'cuda:{torch.cuda.current_device()}')
17
- else:
18
- print("CUDA不可用,使用CPU作为默认设备")
19
- gpu = torch.device('cpu')
20
- except Exception as e:
21
- print(f"初始化CUDA设备时出错: {e}")
22
- print("回退到CPU设备")
23
- gpu = torch.device('cpu')
 
 
 
 
 
 
 
 
 
24
 
25
  gpu_complete_modules = []
26
 
@@ -73,7 +82,11 @@ class DynamicSwapInstaller:
73
  return
74
 
75
 
76
- def fake_diffusers_current_device(model: torch.nn.Module, target_device: torch.device):
 
 
 
 
77
  if hasattr(model, 'scale_shift_table'):
78
  model.scale_shift_table.data = model.scale_shift_table.data.to(target_device)
79
  return
@@ -88,6 +101,10 @@ def get_cuda_free_memory_gb(device=None):
88
  if device is None:
89
  device = gpu
90
 
 
 
 
 
91
  # 如果不是CUDA设备,返回默认值
92
  if device.type != 'cuda':
93
  print("无法获取非CUDA设备的内存信息,返回默认值")
@@ -109,8 +126,17 @@ def get_cuda_free_memory_gb(device=None):
109
  def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
110
  print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
111
 
 
 
 
 
 
 
 
 
 
112
  # 如果目标设备是CPU或当前在CPU上,直接移动
113
- if target_device.type == 'cpu' or gpu.type == 'cpu':
114
  model.to(device=target_device)
115
  torch.cuda.empty_cache() if torch.cuda.is_available() else None
116
  return
@@ -131,8 +157,17 @@ def move_model_to_device_with_memory_preservation(model, target_device, preserve
131
  def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
132
  print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
133
 
 
 
 
 
 
 
 
 
 
134
  # 如果目标设备是CPU或当前在CPU上,直接处理
135
- if target_device.type == 'cpu' or gpu.type == 'cpu':
136
  model.to(device=cpu)
137
  torch.cuda.empty_cache() if torch.cuda.is_available() else None
138
  return
@@ -161,6 +196,10 @@ def unload_complete_models(*args):
161
 
162
 
163
  def load_model_as_complete(model, target_device, unload=True):
 
 
 
 
164
  if unload:
165
  unload_complete_models()
166
 
 
10
  # 设置CPU设备
11
  cpu = torch.device('cpu')
12
 
13
+ # 在Stateless GPU环境中,不要在主进程初始化CUDA
14
+ def get_gpu_device():
15
+ if IN_HF_SPACE:
16
+ # 在Spaces中将延迟初始化GPU设备
17
+ return 'cuda' # 返回字符串,而不是实际初始化设备
18
+
19
+ # 非Spaces环境正常初始化
20
+ try:
21
+ if torch.cuda.is_available():
22
+ return torch.device(f'cuda:{torch.cuda.current_device()}')
23
+ else:
24
+ print("CUDA不可用,使用CPU作为默认设备")
25
+ return torch.device('cpu')
26
+ except Exception as e:
27
+ print(f"初始化CUDA设备时出错: {e}")
28
+ print("回退到CPU设备")
29
+ return torch.device('cpu')
30
+
31
+ # 保存一个字符串表示,而不是实际的设备对象
32
+ gpu = get_gpu_device()
33
 
34
  gpu_complete_modules = []
35
 
 
82
  return
83
 
84
 
85
+ def fake_diffusers_current_device(model: torch.nn.Module, target_device):
86
+ # 转换字符串设备为torch.device
87
+ if isinstance(target_device, str):
88
+ target_device = torch.device(target_device)
89
+
90
  if hasattr(model, 'scale_shift_table'):
91
  model.scale_shift_table.data = model.scale_shift_table.data.to(target_device)
92
  return
 
101
  if device is None:
102
  device = gpu
103
 
104
+ # 如果是字符串,转换为设备
105
+ if isinstance(device, str):
106
+ device = torch.device(device)
107
+
108
  # 如果不是CUDA设备,返回默认值
109
  if device.type != 'cuda':
110
  print("无法获取非CUDA设备的内存信息,返回默认值")
 
126
  def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
127
  print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
128
 
129
+ # 如果是字符串,转换为设备
130
+ if isinstance(target_device, str):
131
+ target_device = torch.device(target_device)
132
+
133
+ # 如果gpu是字符串,转换为设备
134
+ gpu_device = gpu
135
+ if isinstance(gpu_device, str):
136
+ gpu_device = torch.device(gpu_device)
137
+
138
  # 如果目标设备是CPU或当前在CPU上,直接移动
139
+ if target_device.type == 'cpu' or gpu_device.type == 'cpu':
140
  model.to(device=target_device)
141
  torch.cuda.empty_cache() if torch.cuda.is_available() else None
142
  return
 
157
  def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
158
  print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
159
 
160
+ # 如果是字符串,转换为设备
161
+ if isinstance(target_device, str):
162
+ target_device = torch.device(target_device)
163
+
164
+ # 如果gpu是字符串,转换为设备
165
+ gpu_device = gpu
166
+ if isinstance(gpu_device, str):
167
+ gpu_device = torch.device(gpu_device)
168
+
169
  # 如果目标设备是CPU或当前在CPU上,直接处理
170
+ if target_device.type == 'cpu' or gpu_device.type == 'cpu':
171
  model.to(device=cpu)
172
  torch.cuda.empty_cache() if torch.cuda.is_available() else None
173
  return
 
196
 
197
 
198
  def load_model_as_complete(model, target_device, unload=True):
199
+ # 如果是字符串,转换为设备
200
+ if isinstance(target_device, str):
201
+ target_device = torch.device(target_device)
202
+
203
  if unload:
204
  unload_complete_models()
205