FP-Studio
diff --git a/‎diffusers_helper/bucket_tools.py
Lines changed: 12 additions & 10 deletions b/‎diffusers_helper/bucket_tools.py
Lines changed: 12 additions & 10 deletions
diff --git a/‎diffusers_helper/clip_vision.py
Lines changed: 3 additions & 1 deletion b/‎diffusers_helper/clip_vision.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎diffusers_helper/dit_common.py
Lines changed: 9 additions & 2 deletions b/‎diffusers_helper/dit_common.py
Lines changed: 9 additions & 2 deletions
diff --git a/‎diffusers_helper/gradio/progress_bar.py
Lines changed: 5 additions & 5 deletions b/‎diffusers_helper/gradio/progress_bar.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎diffusers_helper/hf_login.py
Lines changed: 3 additions & 3 deletions b/‎diffusers_helper/hf_login.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎diffusers_helper/hunyuan.py
Lines changed: 56 additions & 28 deletions b/‎diffusers_helper/hunyuan.py
Lines changed: 56 additions & 28 deletions
@@ -63,35 +63,37 @@
 def find_nearest_bucket(h, w, resolution=640):
     # Use the provided resolution or find the closest available bucket size
     # print(f"find_nearest_bucket called with h={h}, w={w}, resolution={resolution}")
-    
+
     # Convert resolution to int if it's not already
     resolution = int(resolution) if not isinstance(resolution, int) else resolution
-    
+
     if resolution not in bucket_options:
         # Find the closest available resolution
         available_resolutions = list(bucket_options.keys())
-        closest_resolution = min(available_resolutions, key=lambda x: abs(x - resolution))
+        closest_resolution = min(
+            available_resolutions, key=lambda x: abs(x - resolution)
+        )
         # print(f"Resolution {resolution} not found in bucket options, using closest available: {closest_resolution}")
         resolution = closest_resolution
     # else:
-        # print(f"Resolution {resolution} found in bucket options")
-    
+    # print(f"Resolution {resolution} found in bucket options")
+
     # Calculate the aspect ratio of the input image
     input_aspect_ratio = w / h if h > 0 else 1.0
     # print(f"Input aspect ratio: {input_aspect_ratio:.4f}")
-    
-    min_diff = float('inf')
+
+    min_diff = float("inf")
     best_bucket = None
-    
+
     # Find the bucket size with the closest aspect ratio to the input image
-    for (bucket_h, bucket_w) in bucket_options[resolution]:
+    for bucket_h, bucket_w in bucket_options[resolution]:
         bucket_aspect_ratio = bucket_w / bucket_h if bucket_h > 0 else 1.0
         # Calculate the difference in aspect ratios
         diff = abs(bucket_aspect_ratio - input_aspect_ratio)
         if diff < min_diff:
             min_diff = diff
             best_bucket = (bucket_h, bucket_w)
         # print(f"  Checking bucket ({bucket_h}, {bucket_w}), aspect ratio={bucket_aspect_ratio:.4f}, diff={diff:.4f}, current best={best_bucket}")
-    
+
     # print(f"Using resolution {resolution}, selected bucket: {best_bucket}")
     return best_bucket
@@ -6,7 +6,9 @@ def hf_clip_vision_encode(image, feature_extractor, image_encoder):
     assert image.ndim == 3 and image.shape[2] == 3
     assert image.dtype == np.uint8
 
-    preprocessed = feature_extractor.preprocess(images=image, return_tensors="pt").to(device=image_encoder.device, dtype=image_encoder.dtype)
+    preprocessed = feature_extractor.preprocess(images=image, return_tensors="pt").to(
+        device=image_encoder.device, dtype=image_encoder.dtype
+    )
     image_encoder_output = image_encoder(**preprocessed)
 
     return image_encoder_output
@@ -1,14 +1,21 @@
 import torch
 import accelerate.accelerator
 
-from diffusers.models.normalization import RMSNorm, LayerNorm, FP32LayerNorm, AdaLayerNormContinuous
+from diffusers.models.normalization import (
+    RMSNorm,
+    LayerNorm,
+    FP32LayerNorm,
+    AdaLayerNormContinuous,
+)
 
 
 accelerate.accelerator.convert_outputs_to_fp32 = lambda x: x
 
 
 def LayerNorm_forward(self, x):
-    return torch.nn.functional.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps).to(x)
+    return torch.nn.functional.layer_norm(
+        x, self.normalized_shape, self.weight, self.bias, self.eps
+    ).to(x)
 
 
 LayerNorm.forward = LayerNorm_forward
 
@@ -1,14 +1,14 @@
-progress_html = '''
+progress_html = """
 <div class="loader-container">
   <div class="loader"></div>
   <div class="progress-container">
     <progress value="*number*" max="100"></progress>
   </div>
   <span>*text*</span>
 </div>
-'''
+"""
 
-css = '''
+css = """
 .loader-container {
   display: flex; /* Use flex to align items horizontally */
   align-items: center; /* Center items vertically within the container */
@@ -75,11 +75,11 @@
   display: none !important;
 }
 
-'''
+"""
 
 
 def make_progress_bar_html(number, text):
-    return progress_html.replace('*number*', str(number)).replace('*text*', text)
+    return progress_html.replace("*number*", str(number)).replace("*text*", text)
 
 
 def make_progress_bar_css():
 
@@ -8,14 +8,14 @@ def login(token):
     while True:
         try:
             login(token)
-            print('HF login ok.')
+            print("HF login ok.")
             break
         except Exception as e:
-            print(f'HF login failed: {e}. Retrying')
+            print(f"HF login failed: {e}. Retrying")
             time.sleep(0.5)
 
 
-hf_token = os.environ.get('HF_TOKEN', None)
+hf_token = os.environ.get("HF_TOKEN", None)
 
 if hf_token is not None:
     login(hf_token)
@@ -1,69 +1,87 @@
 import torch
 
-from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import DEFAULT_PROMPT_TEMPLATE
-from diffusers_helper.utils import crop_or_pad_yield_mask
+from diffusers.pipelines.hunyuan_video.pipeline_hunyuan_video import (
+    DEFAULT_PROMPT_TEMPLATE,
+)
 
 
 @torch.no_grad()
-def encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2, max_length=256):
+def encode_prompt_conds(
+    prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2, max_length=256
+):
     assert isinstance(prompt, str)
 
     prompt = [prompt]
 
     # LLAMA
-    
+
     # Check if there's a custom system prompt template in settings
     custom_template = None
     try:
         from modules.settings import Settings
+
         settings = Settings()
         override_system_prompt = settings.get("override_system_prompt", False)
         custom_template_str = settings.get("system_prompt_template")
-        
+
         if override_system_prompt and custom_template_str:
             try:
                 # Convert the string representation to a dictionary
                 # Extract template and crop_start directly from the string using regex
                 import re
-                
+
                 # Try to extract the template value
-                template_match = re.search(r"['\"]template['\"]\s*:\s*['\"](.+?)['\"](?=\s*,|\s*})", custom_template_str, re.DOTALL)
-                crop_start_match = re.search(r"['\"]crop_start['\"]\s*:\s*(\d+)", custom_template_str)
-                
+                template_match = re.search(
+                    r"['\"]template['\"]\s*:\s*['\"](.+?)['\"](?=\s*,|\s*})",
+                    custom_template_str,
+                    re.DOTALL,
+                )
+                crop_start_match = re.search(
+                    r"['\"]crop_start['\"]\s*:\s*(\d+)", custom_template_str
+                )
+
                 if template_match and crop_start_match:
                     template_value = template_match.group(1)
                     crop_start_value = int(crop_start_match.group(1))
-                    
+
                     # Unescape any escaped characters in the template
-                    template_value = template_value.replace("\\n", "\n").replace("\\\"", "\"").replace("\\'", "'")
-                    
+                    template_value = (
+                        template_value.replace("\\n", "\n")
+                        .replace('\\"', '"')
+                        .replace("\\'", "'")
+                    )
+
                     custom_template = {
                         "template": template_value,
-                        "crop_start": crop_start_value
+                        "crop_start": crop_start_value,
                     }
-                    print(f"Using custom system prompt template from settings: {custom_template}")
+                    print(
+                        f"Using custom system prompt template from settings: {custom_template}"
+                    )
                 else:
-                    print(f"Could not extract template or crop_start from system prompt template string")
-                    print(f"Falling back to default template")
+                    print(
+                        "Could not extract template or crop_start from system prompt template string"
+                    )
+                    print("Falling back to default template")
                     custom_template = None
             except Exception as e:
                 print(f"Error parsing custom system prompt template: {e}")
-                print(f"Falling back to default template")
+                print("Falling back to default template")
                 custom_template = None
         else:
             if not override_system_prompt:
-                print(f"Override system prompt is disabled, using default template")
+                print("Override system prompt is disabled, using default template")
             elif not custom_template_str:
-                print(f"No custom system prompt template found in settings")
+                print("No custom system prompt template found in settings")
             custom_template = None
     except Exception as e:
         print(f"Error loading settings: {e}")
-        print(f"Falling back to default template")
+        print("Falling back to default template")
         custom_template = None
-    
+
     # Use custom template if available, otherwise use default
     template = custom_template if custom_template else DEFAULT_PROMPT_TEMPLATE
-    
+
     prompt_llama = [template["template"].format(p) for p in prompt]
     crop_start = template["crop_start"]
 
@@ -105,7 +123,9 @@ def encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokeniz
         return_length=False,
         return_tensors="pt",
     ).input_ids
-    clip_l_pooler = text_encoder_2(clip_l_input_ids.to(text_encoder_2.device), output_hidden_states=False).pooler_output
+    clip_l_pooler = text_encoder_2(
+        clip_l_input_ids.to(text_encoder_2.device), output_hidden_states=False
+    ).pooler_output
 
     return llama_vec, clip_l_pooler
 
@@ -128,15 +148,21 @@ def vae_decode_fake(latents):
         [-0.2315, -0.1920, -0.1355],
         [-0.0270, 0.0401, -0.0821],
         [-0.0616, -0.0997, -0.0727],
-        [0.0249, -0.0469, -0.1703]
+        [0.0249, -0.0469, -0.1703],
     ]  # From comfyui
 
     latent_rgb_factors_bias = [0.0259, -0.0192, -0.0761]
 
-    weight = torch.tensor(latent_rgb_factors, device=latents.device, dtype=latents.dtype).transpose(0, 1)[:, :, None, None, None]
-    bias = torch.tensor(latent_rgb_factors_bias, device=latents.device, dtype=latents.dtype)
+    weight = torch.tensor(
+        latent_rgb_factors, device=latents.device, dtype=latents.dtype
+    ).transpose(0, 1)[:, :, None, None, None]
+    bias = torch.tensor(
+        latent_rgb_factors_bias, device=latents.device, dtype=latents.dtype
+    )
 
-    images = torch.nn.functional.conv3d(latents, weight, bias=bias, stride=1, padding=0, dilation=1, groups=1)
+    images = torch.nn.functional.conv3d(
+        latents, weight, bias=bias, stride=1, padding=0, dilation=1, groups=1
+    )
     images = images.clamp(0.0, 1.0)
 
     return images
@@ -158,6 +184,8 @@ def vae_decode(latents, vae, image_mode=False):
 
 @torch.no_grad()
 def vae_encode(image, vae):
-    latents = vae.encode(image.to(device=vae.device, dtype=vae.dtype)).latent_dist.sample()
+    latents = vae.encode(
+        image.to(device=vae.device, dtype=vae.dtype)
+    ).latent_dist.sample()
     latents = latents * vae.config.scaling_factor
     return latents