Spaces:

multimodalart
/

diffusion

Runtime error

multimodalart HF Staff commited on May 11, 2022

Commit

bf89172

1 Parent(s): 9cd412c

Add more logic to clip embeds

Files changed (1) hide show

app.py CHANGED Viewed

@@ -59,8 +59,32 @@ make_cutouts = MakeCutouts(clip_model.visual.input_resolution, 16, 1.)
 def run_all(prompt, steps, n_images, weight, clip_guided):
     import random
     seed = int(random.randint(0, 2147483647))
-    target_embed = clip_model.encode_text(clip.tokenize(prompt)).float().cuda()
-    clip_embed = target_embed.repeat([n_images, 1])
     def cfg_model_fn(x, t):
         """The CFG wrapper function."""
         n = x.shape[0]

 def run_all(prompt, steps, n_images, weight, clip_guided):
     import random
     seed = int(random.randint(0, 2147483647))
+    target_embed = clip_model.encode_text(clip.tokenize(prompt)).float()#.cuda()
+    if(clip_guided):
+        prompts = [prompt]
+        def parse_prompt(prompt):
+            if prompt.startswith('http://') or prompt.startswith('https://'):
+                vals = prompt.rsplit(':', 2)
+                vals = [vals[0] + ':' + vals[1], *vals[2:]]
+            else:
+                vals = prompt.rsplit(':', 1)
+            vals = vals + ['', '1'][len(vals):]
+            return vals[0], float(vals[1])
+        for prompt in prompts:
+            txt, weight = parse_prompt(prompt)
+            target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
+            weights.append(weight)
+        target_embeds = torch.cat(target_embeds)
+        weights = torch.tensor(weights, device=device)
+        if weights.sum().abs() < 1e-3:
+            raise RuntimeError('The weights must not sum to 0.')
+        weights /= weights.sum().abs()
+        clip_embed = F.normalize(target_embeds.mul(weights[:, None]).sum(0, keepdim=True), dim=-1)
+        clip_embed = target_embed.repeat([n_images, 1])
     def cfg_model_fn(x, t):
         """The CFG wrapper function."""
         n = x.shape[0]