diff --git a/README.md b/README.md index 36178cb8..0b14f1f9 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # FramePack Studio -FramePack Studio is an enhanced version of the FramePack demo script, designed to create intricate video scenes with improved prompt adherence. This is very much a work in progress, expect some bugs and broken features. +FramePack Studio is an enhanced version of the FramePack demo script, designed to create intricate video scenes with improved prompt adherence. This is very much a work in progress, expect some bugs and broken features. ![screencapture-127-0-0-1-7860-2025-05-04-20_13_58](https://github.com/user-attachments/assets/8fcb90af-8c3f-47ca-8f23-61d9b59438ae) - ## Current Features - **F1 and Original FramePack Models**: Run both in a single queue +- **Dynamic Prompt Timeline**: Intuitive interface for creating time-based prompt changes - **Timestamped Prompts**: Define different prompts for specific time segments in your video - **Prompt Blending**: Define the blending time between timestamped prompts - **Basic LoRA Support**: Works with most (all?) hunyuan LoRAs @@ -15,7 +15,6 @@ FramePack Studio is an enhanced version of the FramePack demo script, designed t - **I2V and T2V**: Works with or without an input image to allow for more flexibility when working with standard LoRAs - **Latent Image Options**: When using T2V you can generate based on a black, white, green screen or pure noise image - ## Fresh Installation ### Prerequisites @@ -28,6 +27,7 @@ FramePack Studio is an enhanced version of the FramePack demo script, designed t Install via the Pinokio community script "FP-Studio" or: 1. Clone the repository: + ```bash git clone https://github.com/colinurbs/FramePack-Studio.git cd FramePack-Studio @@ -47,6 +47,7 @@ python studio.py ``` Additional command line options: + - `--share`: Create a public Gradio link to share your interface - `--server`: Specify the server address (default: 0.0.0.0) - `--port`: Specify a custom port @@ -58,27 +59,39 @@ Add LoRAs to the /loras/ folder at the root of the installation. Select the LoRA NOTE: slow lora loading is a known issue -## Working with Timestamped Prompts +## Working with the Prompt Timeline + +The new dynamic prompt timeline interface makes it easy to create videos with changing prompts over time: + +1. **Add Prompt Segments**: Click the "+ Add Prompt Segment" button to add new timeline segments +2. **Set Timing**: For each segment, enter the start time in seconds +3. **Enter Prompts**: Type your prompt for each time segment +4. **Remove Segments**: Click the ❌ button to remove unwanted segments (minimum one segment required) +5. **Automatic Sorting**: Segments are automatically sorted by start time +6. **Live Preview**: The timeline updates in real-time as you make changes -You can create videos with changing prompts over time using the following syntax: +The system will smoothly transition between prompts for a cohesive video. The interface automatically handles the conversion to the internal format required by the generation engine. + +### Legacy Format + +You can still use the traditional timestamp format directly if preferred: ``` -[0s: A serene forest with sunlight filtering through the trees ] -[5s: A deer appears in the clearing ] -[10s: The deer drinks from a small stream ] +[0s: A serene forest with sunlight filtering through the trees] +[5s: A deer appears in the clearing] +[10s: The deer drinks from a small stream] ``` -Each timestamp defines when that prompt should start influencing the generation. The system will (hopefully) smoothly transition between prompts for a cohesive video. +Each timestamp defines when that prompt should start influencing the generation. ## Credits + Many thanks to [Lvmin Zhang](https://github.com/lllyasviel) for the absolutely amazing work on the original [FramePack](https://github.com/lllyasviel/FramePack) code! Thanks to [Rickard Edén](https://github.com/neph1) for the LoRA code and their general contributions to this growing FramePack scene! Thanks to everyone who has joined the Discord, reported a bug, sumbitted a PR or helped with testing! - - @article{zhang2025framepack, title={Packing Input Frame Contexts in Next-Frame Prediction Models for Video Generation}, author={Lvmin Zhang and Maneesh Agrawala}, diff --git a/modules/interface.py b/modules/interface.py index e7c2d636..75c5c3cf 100644 --- a/modules/interface.py +++ b/modules/interface.py @@ -9,12 +9,192 @@ import numpy as np import base64 import io +import json from modules.video_queue import JobStatus, Job -from modules.prompt_handler import get_section_boundaries, get_quick_prompts, parse_timestamped_prompt +from modules.prompt_handler import get_section_boundaries, get_quick_prompts, parse_timestamped_prompt, format_prompt_segments, parse_prompt_segments from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html from diffusers_helper.bucket_tools import find_nearest_bucket +def create_prompt_interface(default_prompt="[1s: The person waves hello] [3s: The person jumps up and down] [5s: The person does a dance]", max_segments=10): + """Create a reusable prompt interface component""" + + # Container for the interface + interface = {} + + # Parse initial prompt + initial_segments = parse_prompt_segments(default_prompt) + + # Hidden state to store segments + interface['prompt_segments_state'] = gr.State(initial_segments) + + # Main UI container + with gr.Column(): + gr.Markdown("### Prompt Timeline") + + # Create rows for each segment + interface['segment_rows'] = [] + interface['segment_visibility'] = [] + interface['segment_time_inputs'] = [] + interface['segment_prompt_inputs'] = [] + interface['segment_delete_buttons'] = [] + + for i in range(max_segments): + visible = (i < len(initial_segments)) + + with gr.Row(visible=visible) as row: + with gr.Column(scale=10): + with gr.Row(): + time_input = gr.Number( + label=f"Segment {i + 1} - Start Time (seconds)", + value=initial_segments[i].get('start_time', 0) if i < len(initial_segments) else 0, + minimum=0, + maximum=120, + step=0.1 + ) + prompt_input = gr.Textbox( + label="Prompt", + value=initial_segments[i].get('prompt', '') if i < len(initial_segments) else '', + placeholder="Enter your prompt for this segment" + ) + with gr.Column(scale=1): + delete_btn = gr.Button("❌", variant="stop", size="sm") + + interface['segment_rows'].append(row) + interface['segment_time_inputs'].append(time_input) + interface['segment_prompt_inputs'].append(prompt_input) + interface['segment_delete_buttons'].append(delete_btn) + + # Hidden components for state management + interface['hidden_prompt'] = gr.Textbox(value=default_prompt, visible=False) + interface['segment_count'] = gr.Number(value=len(initial_segments), visible=False) + + # Add segment button + with gr.Row(): + interface['add_segment_button'] = gr.Button("+ Add Prompt Segment", variant="primary") + + return interface + + +def connect_prompt_interface_events(interface, max_segments=10): + """Connect event handlers for the prompt interface""" + + # Helper functions for event handling + def update_segments(segment_count, *inputs): + """Update segments when time or prompt changes""" + segments = [] + + for i in range(0, len(inputs), 2): + if i < segment_count * 2: + time_val = inputs[i] + prompt_val = inputs[i + 1] + if prompt_val: # Only include segments with content + segments.append({"start_time": time_val, "prompt": prompt_val}) + + segments.sort(key=lambda x: x['start_time']) + formatted_prompt = format_prompt_segments(segments) + + return segments, formatted_prompt + + def add_segment(segment_count): + """Add a new segment""" + new_count = min(segment_count + 1, max_segments) + updates = [] + + # Update visibility of rows + for i in range(max_segments): + updates.append(gr.update(visible=(i < new_count))) + + return [new_count] + updates + + def delete_segment(segment_index, segment_count, *inputs): + """Delete a segment""" + if segment_count <= 1: # Keep at least one segment + return [gr.update()] * (max_segments * 3 + 1) + + segments = [] + + # Collect all segments except the deleted one + for i in range(0, len(inputs), 2): + if i < segment_count * 2 and i // 2 != segment_index: + time_val = inputs[i] + prompt_val = inputs[i + 1] + if prompt_val: + segments.append({"start_time": time_val, "prompt": prompt_val}) + + segments.sort(key=lambda x: x['start_time']) + new_count = len(segments) + + # Prepare updates for all components + updates = [] + + # Update segment count + updates.append(new_count) + + # Update row visibility + for i in range(max_segments): + updates.append(gr.update(visible=(i < new_count))) + + # Update time inputs + for i in range(max_segments): + if i < new_count: + updates.append(gr.update(value=segments[i]['start_time'])) + else: + updates.append(gr.update(value=0)) + + # Update prompt inputs + for i in range(max_segments): + if i < new_count: + updates.append(gr.update(value=segments[i]['prompt'])) + else: + updates.append(gr.update(value='')) + + return updates + + # Get all inputs for the update function + all_inputs = [] + for i in range(max_segments): + all_inputs.extend([ + interface['segment_time_inputs'][i], + interface['segment_prompt_inputs'][i] + ]) + + # Connect change handlers for all time and prompt inputs + for i in range(max_segments): + # Time input changes + interface['segment_time_inputs'][i].change( + fn=update_segments, + inputs=[interface['segment_count']] + all_inputs, + outputs=[interface['prompt_segments_state'], interface['hidden_prompt']] + ) + + # Prompt input changes + interface['segment_prompt_inputs'][i].change( + fn=update_segments, + inputs=[interface['segment_count']] + all_inputs, + outputs=[interface['prompt_segments_state'], interface['hidden_prompt']] + ) + + # Delete button clicks + interface['segment_delete_buttons'][i].click( + fn=delete_segment, + inputs=[gr.Number(i, visible=False), interface['segment_count']] + all_inputs, + outputs=[interface['segment_count']] + + interface['segment_rows'] + + interface['segment_time_inputs'] + + interface['segment_prompt_inputs'] + ) + + # Add segment button click + interface['add_segment_button'].click( + fn=add_segment, + inputs=[interface['segment_count']], + outputs=[interface['segment_count']] + interface['segment_rows'] + ) + + return interface + + def create_interface( process_fn, monitor_fn, @@ -54,9 +234,26 @@ def create_interface( height: 100% !important; background: #222; } - """ - - css += """ + + .prompt-segment { + border: 1px solid #444; + border-radius: 8px; + padding: 10px; + margin-bottom: 10px; + background: #1a1a1a; + } + + .segment-controls { + display: flex; + gap: 10px; + align-items: center; + margin-top: 10px; + } + + .time-input { + width: 100px !important; + } + #fixed-toolbar { position: fixed; top: 0; @@ -79,23 +276,44 @@ def create_interface( min-width: 80px !important; } - - .gr-button-primary{ color:white; } body, .gradio-container { padding-top: 40px !important; } - """ - - css += """ .narrow-button { min-width: 40px !important; width: 40px !important; padding: 0 !important; margin: 0 !important; } + .thumbnail-container { + display: flex; + flex-wrap: wrap; + gap: 10px; + padding: 10px; + } + .thumbnail-item { + width: 100px; + height: 100px; + border: 1px solid #444; + border-radius: 4px; + overflow: hidden; + } + .thumbnail-item img { + width: 100%; + height: 100%; + object-fit: cover; + } + #footer { + margin-top: 20px; + padding: 20px; + border-top: 1px solid #eee; + } + #footer a:hover { + color: #4f46e5 !important; + } """ # Get the theme from settings @@ -138,8 +356,15 @@ def create_interface( ["Black", "White", "Noise", "Green Screen"], label="Latent Image", value="Black", info="Used as a starting point if no image is provided" ) - prompt = gr.Textbox(label="Prompt", value=default_prompt) - + # Create prompt interface for Original model + prompt_interface = create_prompt_interface(default_prompt) + prompt_segments_state = prompt_interface['prompt_segments_state'] + hidden_prompt = prompt_interface['hidden_prompt'] + segment_count = prompt_interface['segment_count'] + + # Connect events + connect_prompt_interface_events(prompt_interface) + with gr.Accordion("Prompt Parameters", open=False): blend_sections = gr.Slider( minimum=0, maximum=10, value=4, step=1, @@ -256,27 +481,6 @@ def on_resolution_change(img, resolutionW, resolutionH): thumbnail_container = gr.Column() thumbnail_container.elem_classes = ["thumbnail-container"] - # Add CSS for thumbnails - css += """ - .thumbnail-container { - display: flex; - flex-wrap: wrap; - gap: 10px; - padding: 10px; - } - .thumbnail-item { - width: 100px; - height: 100px; - border: 1px solid #444; - border-radius: 4px; - overflow: hidden; - } - .thumbnail-item img { - width: 100%; - height: 100%; - object-fit: cover; - } - """ with gr.TabItem("Outputs"): outputDirectory_video = settings.get("output_dir", settings.default_settings['output_dir']) outputDirectory_metadata = settings.get("metadata_dir", settings.default_settings['metadata_dir']) @@ -428,14 +632,13 @@ def cleanup_temp_files(): # Connect the main process function (wrapper for adding to queue) def process_with_queue_update(model_type, *args): # Extract all arguments (ensure order matches inputs lists) - input_image, prompt_text, n_prompt, seed_value, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, randomize_seed_checked, save_metadata_checked, blend_sections, latent_type, clean_up_videos, selected_loras, resolutionW, resolutionH, *lora_args = args + input_image, prompt_segments, hidden_prompt_text, n_prompt, seed_value, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, randomize_seed_checked, save_metadata_checked, blend_sections, latent_type, clean_up_videos, selected_loras, resolutionW, resolutionH, *lora_args = args - # DO NOT parse the prompt here. Parsing happens once in the worker. + # Use the formatted prompt text + prompt_text = hidden_prompt_text - # Use the current seed value as is for this job # Call the process function with all arguments - # Pass the model_type and the ORIGINAL prompt_text string to the backend process function - result = process_fn(model_type, input_image, prompt_text, n_prompt, seed_value, total_second_length, # Pass original prompt_text string + result = process_fn(model_type, input_image, prompt_text, n_prompt, seed_value, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, save_metadata_checked, blend_sections, latent_type, clean_up_videos, selected_loras, resolutionW, resolutionH, *lora_args) @@ -472,7 +675,8 @@ def end_process_with_update(): # --- Inputs for Original Model --- ips = [ input_image, - prompt, + prompt_segments_state, + hidden_prompt, n_prompt, seed, total_second_length, @@ -561,12 +765,9 @@ def update_lora_sliders(selected_loras): # --- Connect Metadata Loading --- - # Function to load metadata from JSON file - def load_metadata_from_json(json_path): + def load_metadata_from_json(json_path, max_segments=10): if not json_path: - # Return updates for all potentially affected components - num_orig_sliders = len(lora_sliders) - return [gr.update()] * (2 + num_orig_sliders) + return [gr.update()] * (3 + len(lora_names) + max_segments * 3 + 1) try: with open(json_path, 'r') as f: @@ -575,43 +776,74 @@ def load_metadata_from_json(json_path): prompt_val = metadata.get('prompt') seed_val = metadata.get('seed') + # Parse the prompt into segments + segments = parse_prompt_segments(prompt_val) if prompt_val else [{"start_time": 0, "prompt": ""}] + segment_count = len(segments) + # Check for LoRA values in metadata - lora_weights = metadata.get('loras', {}) # Changed key to 'loras' based on studio.py worker + lora_weights = metadata.get('loras', {}) print(f"Loaded metadata from JSON: {json_path}") print(f"Prompt: {prompt_val}, Seed: {seed_val}") # Update the UI components - updates = [ - gr.update(value=prompt_val) if prompt_val else gr.update(), - gr.update(value=seed_val) if seed_val is not None else gr.update() - ] - - # Update LoRA sliders if they exist in metadata + updates = [] + + # prompt_segments_state + updates.append(segments) + + # hidden_prompt + updates.append(gr.update(value=prompt_val) if prompt_val else gr.update()) + + # seed + updates.append(gr.update(value=seed_val) if seed_val is not None else gr.update()) + + # LoRA sliders for lora in lora_names: if lora in lora_weights: updates.append(gr.update(value=lora_weights[lora])) else: - updates.append(gr.update()) # No change if LoRA not in metadata + updates.append(gr.update()) + + # segment_count + updates.append(segment_count) + + # Update visibility of rows + for i in range(max_segments): + updates.append(gr.update(visible=(i < segment_count))) + + # Update time inputs + for i in range(max_segments): + if i < segment_count: + updates.append(gr.update(value=segments[i]['start_time'])) + else: + updates.append(gr.update(value=0)) + + # Update prompt inputs + for i in range(max_segments): + if i < segment_count: + updates.append(gr.update(value=segments[i]['prompt'])) + else: + updates.append(gr.update(value='')) - # Ensure the number of updates matches the number of outputs - num_orig_sliders = len(lora_sliders) - return updates[:2 + num_orig_sliders] # Return updates for prompt, seed, and sliders + return updates except Exception as e: print(f"Error loading metadata: {e}") - num_orig_sliders = len(lora_sliders) - return [gr.update()] * (2 + num_orig_sliders) - + return [gr.update()] * (3 + len(lora_names) + max_segments * 3 + 1) # Connect JSON metadata loader for Original tab json_upload.change( fn=load_metadata_from_json, inputs=[json_upload], - outputs=[prompt, seed] + [lora_sliders[lora] for lora in lora_names] + outputs=[prompt_segments_state, hidden_prompt, seed] + + [lora_sliders[lora] for lora in lora_names] + + [segment_count] + + prompt_interface['segment_rows'] + + prompt_interface['segment_time_inputs'] + + prompt_interface['segment_prompt_inputs'] ) - # --- Helper Functions (defined within create_interface scope if needed by handlers) --- # Function to get queue statistics def get_queue_stats(): @@ -662,18 +894,6 @@ def get_queue_stats(): """) - # Add CSS for footer - css += """ - #footer { - margin-top: 20px; - padding: 20px; - border-top: 1px solid #eee; - } - #footer a:hover { - color: #4f46e5 !important; - } - """ - return block @@ -717,33 +937,4 @@ def format_queue_status(jobs): elapsed_time # Removed thumbnail from row data ]) - return rows - -# Create the queue status update function (wrapper around format_queue_status) -def update_queue_status_with_thumbnails(): # Function name is now slightly misleading, but keep for now to avoid breaking clicks - # This function is likely called by the refresh button and potentially the timer - # It needs access to the job_queue object - # Assuming job_queue is accessible globally or passed appropriately - # For now, let's assume it's globally accessible as defined in studio.py - # If not, this needs adjustment based on how job_queue is managed. - try: - # Need access to the global job_queue instance from studio.py - # This might require restructuring or passing job_queue differently. - # For now, assuming it's accessible (this might fail if run standalone) - from __main__ import job_queue # Attempt to import from main script scope - - jobs = job_queue.get_all_jobs() - for job in jobs: - if job.status == JobStatus.PENDING: - job.queue_position = job_queue.get_queue_position(job.id) - - if job_queue.current_job: - job_queue.current_job.status = JobStatus.RUNNING - - return format_queue_status(jobs) - except ImportError: - print("Error: Could not import job_queue. Queue status update might fail.") - return [] # Return empty list on error - except Exception as e: - print(f"Error updating queue status: {e}") - return [] + return rows \ No newline at end of file diff --git a/modules/prompt_handler.py b/modules/prompt_handler.py index 51008600..473b75c9 100644 --- a/modules/prompt_handler.py +++ b/modules/prompt_handler.py @@ -1,6 +1,6 @@ import re from dataclasses import dataclass -from typing import List, Optional +from typing import List, Optional, Dict @dataclass @@ -162,3 +162,90 @@ def get_quick_prompts() -> List[List[str]]: '[0s: Person looks surprised] [1.1s: Person raises arms above head] [2.2s-3.3s: Person puts hands on hips]' ] return [[x] for x in prompts] + + +def parse_prompt_segments(prompt_text: str) -> List[Dict[str, float | str]]: + """ + Parse existing prompt text to segments for editing in the UI + + Args: + prompt_text: The formatted prompt text with timestamps + + Returns: + List of dictionaries containing start_time and prompt for each segment + """ + if not prompt_text or "[" not in prompt_text: + return [{"start_time": 0, "prompt": prompt_text}] + + segments = [] + pattern = r'\[(\d+(?:\.\d+)?s)(?:-(\d+(?:\.\d+)?s))?\s*:\s*(.*?)\]' + + for match in re.finditer(pattern, prompt_text): + start_time_str = match.group(1) + section_text = match.group(3).strip() + start_time = float(start_time_str.rstrip('s')) + segments.append({"start_time": start_time, "prompt": section_text}) + + # Sort by start time + segments.sort(key=lambda x: x['start_time']) + return segments if segments else [{"start_time": 0, "prompt": ""}] + + +def format_prompt_segments(segments: List[Dict[str, float | str]]) -> str: + """ + Convert prompt segments from UI format to the format expected by the backend + + Args: + segments: List of segment dictionaries with start_time and prompt + + Returns: + Formatted prompt string with timestamp notation + """ + formatted_parts = [] + for segment in segments: + start_time = segment.get('start_time', 0) + prompt = segment.get('prompt', '') + if prompt: + formatted_parts.append(f"[{start_time}s: {prompt}]") + return " ".join(formatted_parts) + + +def validate_segments(segments: List[Dict[str, float | str]], total_duration: float) -> List[str]: + """ + Validate prompt segments for potential issues + + Args: + segments: List of segment dictionaries + total_duration: Total video duration in seconds + + Returns: + List of validation error messages (empty if valid) + """ + errors = [] + + if not segments: + errors.append("At least one prompt segment is required") + return errors + + # Check for empty prompts + for i, segment in enumerate(segments): + if not segment.get('prompt', '').strip(): + errors.append(f"Segment {i + 1} has an empty prompt") + + # Check for out-of-range times + for i, segment in enumerate(segments): + start_time = segment.get('start_time', 0) + if start_time < 0: + errors.append(f"Segment {i + 1} has negative start time") + elif start_time > total_duration: + errors.append(f"Segment {i + 1} starts after video ends") + + # Check for overlapping segments (optional - could be intentional for blending) + sorted_segments = sorted(segments, key=lambda x: x.get('start_time', 0)) + for i in range(len(sorted_segments) - 1): + current_end = sorted_segments[i].get('start_time', 0) + 0.1 # Minimal duration + next_start = sorted_segments[i + 1].get('start_time', 0) + if current_end > next_start: + errors.append(f"Segments {i + 1} and {i + 2} have overlapping times") + + return errors \ No newline at end of file