FP-Studio · colinurbs · May 8, 2025 · May 5, 2025 · May 5, 2025 · May 5, 2025
diff --git a/diffusers_helper/lora_utils.py b/diffusers_helper/lora_utils.py
@@ -1,4 +1,4 @@
-from pathlib import Path
+from pathlib import Path, PurePath
 from typing import Dict, List, Optional, Union
 from diffusers.loaders.lora_pipeline import _fetch_state_dict
 from diffusers.loaders.lora_conversion_utils import _convert_hunyuan_video_lora_to_diffusers
@@ -33,7 +33,17 @@ def load_lora(transformer, lora_path: Path, weight_name: Optional[str] = "pytorc
 
     state_dict = _convert_hunyuan_video_lora_to_diffusers(state_dict)
 
-    adapter_name = weight_name.split(".")[0]
+    # should weight_name even be Optional[str] or just str?
+    # For now, we assume it is never None
+    # The module name in the state_dict must not include a . in the name
+    # See https://github.com/pytorch/pytorch/pull/6639/files#diff-4be56271f7bfe650e3521c81fd363da58f109cd23ee80d243156d2d6ccda6263R133-R134
+    adapter_name = PurePath(str(weight_name).replace('_DOT_', '.')).stem.replace('.', '_DOT_')
+    if '_DOT_' in adapter_name:
+        print(
+            f"LoRA file '{weight_name}' contains a '.' in the name. " +
+            'This may cause issues. Consider renaming the file.' +
+            f" Using '{adapter_name}' as the adapter name to be safe."
+        )
 
     # Check if adapter already exists and delete it if it does
     if hasattr(transformer, 'peft_config') and adapter_name in transformer.peft_config:

diff --git a/modules/interface.py b/modules/interface.py
@@ -2,6 +2,7 @@
 import time
 import datetime
 import random
+import json
 import os
 from typing import List, Dict, Any, Optional
 from PIL import Image
@@ -125,7 +126,8 @@ def create_interface(
                             height=420,
                             elem_classes="contain-image"
                         )
-
+
+
                         with gr.Accordion("Latent Image Options", open=False):
                             latent_type = gr.Dropdown(
                                 ["Black", "White", "Noise", "Green Screen"], label="Latent Image", value="Black", info="Used as a starting point if no image is provided"
@@ -142,11 +144,21 @@ def create_interface(
                             with gr.Row():
                                 steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
                                 total_second_length = gr.Slider(label="Video Length (Seconds)", minimum=1, maximum=120, value=6, step=0.1)
-                            with gr.Row():
-                                resolution = gr.Slider(
-                                    label="Output Resolution (Width)", minimum=128, maximum=768, value=640, 
-                                    step=32, info="Nearest valid bucket size will be used. Height will be adjusted automatically."
+                            with gr.Row("Resolution"):
+                                resolutionW = gr.Slider(
+                                    label="Width", minimum=128, maximum=768, value=640, step=32, 
+                                    info="Nearest valid width will be used."
+                                )
+                                resolutionH = gr.Slider(
+                                    label="Height", minimum=128, maximum=768, value=640, step=32, 
+                                    info="Nearest valid height will be used."
                                 )
+                                def on_input_image_change(img):
+                                    if img is not None:
+                                        return gr.update(info="Nearest valid bucket size will be used. Height will be adjusted automatically."), gr.update(visible=False)
+                                    else:
+                                        return gr.update(info="Nearest valid width will be used."), gr.update(visible=True)
+                                input_image.change(fn=on_input_image_change, inputs=[input_image], outputs=[resolutionW, resolutionH])
                             with gr.Row("LoRAs"):
                                 lora_selector = gr.Dropdown(
                                     choices=lora_names,
@@ -155,7 +167,7 @@ def create_interface(
                                     value=[],
                                     info="Select one or more LoRAs to use for this job"
                                 )
-
+                                lora_names_states = gr.State(lora_names)
                                 lora_sliders = {}
                                 for lora in lora_names:
                                     lora_sliders[lora] = gr.Slider(
@@ -173,20 +185,18 @@ def create_interface(
                                 save_metadata = gr.Checkbox(label="Save Metadata", value=True, info="Save to JSON file")
                             with gr.Row("TeaCache"):
                                 use_teacache = gr.Checkbox(label='Use TeaCache', value=True, info='Faster speed, but often makes hands and fingers slightly worse.')
-
                                 n_prompt = gr.Textbox(label="Negative Prompt", value="", visible=False)  # Not used
 
                             with gr.Row():
                                 seed = gr.Number(label="Seed", value=31337, precision=0)
                                 randomize_seed = gr.Checkbox(label="Randomize", value=False, info="Generate a new random seed for each job")
 
-
                         with gr.Accordion("Advanced Parameters", open=False):
                             latent_window_size = gr.Slider(label="Latent Window Size", minimum=1, maximum=33, value=9, step=1, visible=True, info='Change at your own risk, very experimental')  # Should not change
                             cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=1.0, step=0.01, visible=False)  # Should not change
                             gs = gr.Slider(label="Distilled CFG Scale", minimum=1.0, maximum=32.0, value=10.0, step=0.01)
                             rs = gr.Slider(label="CFG Re-Scale", minimum=0.0, maximum=1.0, value=0.0, step=0.01, visible=False)  # Should not change
-                            gpu_memory_preservation = gr.Slider(label="GPU Inference Preserved Memory (GB) (larger means slower)", minimum=6, maximum=128, value=6, step=0.1, info="Set this number to a larger value if you encounter OOM. Larger value causes slower speed.")
+                            gpu_memory_preservation = gr.Slider(label="GPU Inference Preserved Memory (GB) (larger means slower)", minimum=1, maximum=128, value=6, step=0.1, info="Set this number to a larger value if you encounter OOM. Larger value causes slower speed.")
                         with gr.Accordion("Output Parameters", open=False):
                             mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
                             clean_up_videos = gr.Checkbox(
@@ -217,6 +227,7 @@ def create_interface(
                             elem_classes="contain-image"
                         )
 
+
                         with gr.Accordion("Latent Image Options", open=False):
                             f1_latent_type = gr.Dropdown(
                                 ["Black", "White", "Noise", "Green Screen"], label="Latent Image", value="Black", info="Used as a starting point if no image is provided"
@@ -233,11 +244,21 @@ def create_interface(
                             with gr.Row():
                                 f1_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
                                 f1_total_second_length = gr.Slider(label="Video Length (Seconds)", minimum=1, maximum=120, value=5, step=0.1)
-                            with gr.Row():
-                                f1_resolution = gr.Slider(
-                                    label="Output Resolution (Width)", minimum=128, maximum=768, value=640, 
-                                    step=32, info="Nearest valid bucket size will be used. Height will be adjusted automatically."
+                            with gr.Row("Resolution"):
+                                f1_resolutionW = gr.Slider(
+                                    label="Width", minimum=128, maximum=768, value=640, step=32, 
+                                    info="Nearest valid width will be used."
+                                )
+                                f1_resolutionH = gr.Slider(
+                                    label="Height", minimum=128, maximum=768, value=640, step=32,
+                                    info="Nearest valid height will be used."
                                 )
+                                def f1_on_input_image_change(img):
+                                    if img is not None:
+                                        return gr.update(info="Nearest valid bucket size will be used. Height will be adjusted automatically."), gr.update(visible=False)
+                                    else:
+                                        return gr.update(info="Nearest valid width will be used."), gr.update(visible=True)
+                                f1_input_image.change(fn=f1_on_input_image_change, inputs=[f1_input_image], outputs=[f1_resolutionW, f1_resolutionH])
                             with gr.Row("LoRAs"):
                                 f1_lora_selector = gr.Dropdown(
                                     choices=lora_names,
@@ -246,7 +267,7 @@ def create_interface(
                                     value=[],
                                     info="Select one or more LoRAs to use for this job"
                                 )
-
+                                f1_lora_names_states = gr.State(lora_names)
                                 f1_lora_sliders = {}
                                 for lora in lora_names:
                                     f1_lora_sliders[lora] = gr.Slider(
@@ -275,7 +296,7 @@ def create_interface(
                             f1_cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=32.0, value=1.0, step=0.01, visible=False)
                             f1_gs = gr.Slider(label="Distilled CFG Scale", minimum=1.0, maximum=32.0, value=10.0, step=0.01)
                             f1_rs = gr.Slider(label="CFG Re-Scale", minimum=0.0, maximum=1.0, value=0.0, step=0.01, visible=False)
-                            f1_gpu_memory_preservation = gr.Slider(label="GPU Inference Preserved Memory (GB) (larger means slower)", minimum=6, maximum=128, value=6, step=0.1, info="Set this number to a larger value if you encounter OOM. Larger value causes slower speed.")
+                            f1_gpu_memory_preservation = gr.Slider(label="GPU Inference Preserved Memory (GB) (larger means slower)", minimum=1, maximum=128, value=6, step=0.1, info="Set this number to a larger value if you encounter OOM. Larger value causes slower speed.")
                         with gr.Accordion("Output Parameters", open=False):
                             f1_mp4_crf = gr.Slider(label="MP4 Compression", minimum=0, maximum=100, value=16, step=1, info="Lower means better quality. 0 is uncompressed. Change to 16 if you get black outputs. ")
                             f1_clean_up_videos = gr.Checkbox(
@@ -339,6 +360,67 @@ def create_interface(
                             object-fit: cover;
                         }
                         """
+            # with gr.TabItem("Outputs"):
+            #     outputDirectory = settings.get("output_dir", settings.default_settings['output_dir'])
+            #     def get_gallery_items():
+            #         items = []
+            #         for f in os.listdir(outputDirectory):
+            #             if f.endswith(".png"):
+            #                 prefix = os.path.splitext(f)[0]
+            #                 latest_video = get_latest_video_version(prefix)
+            #                 if latest_video:
+            #                     video_path = os.path.join(outputDirectory, latest_video)
+            #                     mtime = os.path.getmtime(video_path)
+            #                     preview_path = os.path.join(outputDirectory, f)
+            #                     items.append((preview_path, prefix, mtime))
+            #         items.sort(key=lambda x: x[2], reverse=True)
+            #         return [(i[0], i[1]) for i in items]
+            #     def get_latest_video_version(prefix):
+            #         max_number = -1
+            #         selected_file = None
+            #         for f in os.listdir(outputDirectory):
+            #             if f.startswith(prefix + "_") and f.endswith(".mp4"):
+            #                 num = int(f.replace(prefix + "_", '').replace(".mp4", ''))
+            #                 if num > max_number:
+            #                     max_number = num
+            #                     selected_file = f
+            #         return selected_file
+            #     def load_video_and_info_from_prefix(prefix):
+            #         video_file = get_latest_video_version(prefix)
+            #         if not video_file:
+            #             return None, "JSON not found."
+            #         video_path = os.path.join(outputDirectory, video_file)
+            #         json_path = os.path.join(outputDirectory, prefix) + ".json"
+            #         info = {"description": "no info"}
+            #         if os.path.exists(json_path):
+            #             with open(json_path, "r", encoding="utf-8") as f:
+            #                 info = json.load(f)
+            #         return video_path, json.dumps(info, indent=2, ensure_ascii=False)
+            #     gallery_items_state = gr.State(get_gallery_items())
+            #     with gr.Row():
+            #         with gr.Column(scale=2):
+            #             thumbs = gr.Gallery(
+            #                 # value=[i[0] for i in get_gallery_items()],
+            #                 columns=[4],
+            #                 allow_preview=False,
+            #                 object_fit="cover",
+            #                 height="auto"
+            #             )
+            #             refresh_button = gr.Button("Update")
+            #         with gr.Column(scale=5):
+            #             video_out = gr.Video(sources=[], autoplay=True, loop=True, visible=False)
+            #         with gr.Column(scale=1):
+            #             info_out = gr.Textbox(label="Generation info", visible=False)
+            #         def refresh_gallery():
+            #             new_items = get_gallery_items()
+            #             return gr.update(value=[i[0] for i in new_items]), new_items
+            #         refresh_button.click(fn=refresh_gallery, outputs=[thumbs, gallery_items_state])
+
+            #         def on_select(evt: gr.SelectData, gallery_items):
+            #             prefix = gallery_items[evt.index][1]
+            #             video, info = load_video_and_info_from_prefix(prefix)
+            #             return gr.update(value=video, visible=True), gr.update(value=info, visible=True)
+            #         thumbs.select(fn=on_select, inputs=[gallery_items_state], outputs=[video_out, info_out])
 
             with gr.Tab("Settings"):
                 with gr.Row():
@@ -430,7 +512,7 @@ def cleanup_temp_files():
         # Connect the main process function (wrapper for adding to queue)
         def process_with_queue_update(model_type, *args):
             # Extract all arguments (ensure order matches inputs lists)
-            input_image, prompt_text, n_prompt, seed_value, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, randomize_seed_checked, save_metadata_checked, blend_sections, latent_type, clean_up_videos, selected_loras, resolution, *lora_args = args
+            input_image, prompt_text, n_prompt, seed_value, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, randomize_seed_checked, save_metadata_checked, blend_sections, latent_type, clean_up_videos, selected_loras, resolutionW, resolutionH, *lora_args = args
 
             # DO NOT parse the prompt here. Parsing happens once in the worker.
 
@@ -439,7 +521,7 @@ def process_with_queue_update(model_type, *args):
             # Pass the model_type and the ORIGINAL prompt_text string to the backend process function
             result = process_fn(model_type, input_image, prompt_text, n_prompt, seed_value, total_second_length, # Pass original prompt_text string
                             latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation,
-                            use_teacache, mp4_crf, save_metadata_checked, blend_sections, latent_type, clean_up_videos, selected_loras, resolution, *lora_args)
+                            use_teacache, mp4_crf, save_metadata_checked, blend_sections, latent_type, clean_up_videos, selected_loras, resolutionW, resolutionH, *lora_args)
 
             # If randomize_seed is checked, generate a new random seed for the next job
             new_seed_value = None
@@ -492,7 +574,9 @@ def end_process_with_update():
             latent_type,
             clean_up_videos,
             lora_selector,
-            resolution
+            resolutionW,
+            resolutionH,
+            lora_names_states
         ]
         # Add LoRA sliders to the input list
         ips.extend([lora_sliders[lora] for lora in lora_names])
@@ -518,7 +602,9 @@ def end_process_with_update():
             f1_latent_type,
             f1_clean_up_videos,
             f1_lora_selector,
-            f1_resolution
+            f1_resolutionW,
+            f1_resolutionH,
+            f1_lora_names_states
         ]
         # Add F1 LoRA sliders to the input list
         f1_ips.extend([f1_lora_sliders[lora] for lora in lora_names])
@@ -626,8 +712,6 @@ def load_metadata_from_json(json_path):
                 return [gr.update()] * (2 + num_orig_sliders)
 
             try:
-                import json
-
                 with open(json_path, 'r') as f:
                     metadata = json.load(f)