#!/usr/bin/env python3 """ Replace the first frame of a video with a cover frame that has title text overlay. Usage: python3 generate_cover.py --title "视频标题" python3 generate_cover.py --transcript If --title is not provided but --transcript is, the transcript text will be printed for the AI agent to summarize from the audience's perspective, then call again with --title. The script replaces the video's first frame with a title-overlaid version, outputting a new video file. The original video is not modified. Output: _with_cover.mp4 (in the same directory as the video) """ import argparse import json import os import subprocess import sys import tempfile sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from utils import ( find_chinese_font as _find_font, sanitize_title, detect_gpu, get_ffmpeg_encode_args, escape_ffmpeg_path, ) def get_video_info(video_path): """Get video duration, width, height, and frame rate.""" cmd = [ "ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height,r_frame_rate,duration", "-show_entries", "format=duration", "-of", "json", video_path, ] result = subprocess.check_output(cmd, text=True) info = json.loads(result) fmt_duration = float(info.get("format", {}).get("duration", 0)) streams = info.get("streams", [{}]) s = streams[0] if streams else {} w = s.get("width", 1080) h = s.get("height", 1920) # Parse frame rate like "30/1" or "29.97" r_str = s.get("r_frame_rate", "30/1") if "/" in r_str: num, den = r_str.split("/") fps = float(num) / float(den) if float(den) != 0 else 30.0 else: fps = float(r_str) stream_duration = float(s.get("duration", 0)) duration = stream_duration if stream_duration > 0 else fmt_duration return duration, w, h, fps def extract_first_frame(video_path, output_path): """Extract the first frame of a video as a PNG image (lossless).""" cmd = [ "ffmpeg", "-y", "-i", video_path, "-vframes", "1", "-q:v", "1", output_path, ] subprocess.run(cmd, check=True, capture_output=True, text=True) def _wrap_title(title, font_size=60, img_width=1080): """Split title into lines that fit the image width.""" usable = img_width * 0.80 chars_per_line = max(4, int(usable / font_size)) if len(title) <= chars_per_line: return [title] lines = [] remaining = title while remaining: if len(remaining) <= chars_per_line: lines.append(remaining) break cut = chars_per_line best = cut for offset in range(min(4, cut)): for pos in [cut - offset, cut + offset]: if 0 < pos < len(remaining) and remaining[pos] in ' ,,。、;:!?·': best = pos + 1 if remaining[pos] != ' ' else pos break else: continue break else: best = cut lines.append(remaining[:best].strip()) remaining = remaining[best:].strip() return lines def build_drawtext_filters(title, font_path, width, height): """Build ffmpeg drawtext filter string for title overlay.""" short_side = min(width, height) font_size = max(36, min(int(short_side * 0.06), 120)) lines = _wrap_title(title, font_size=font_size, img_width=width) line_height = int(font_size * 1.5) block_height = len(lines) * line_height start_y = int(height * 0.40) - block_height // 2 escaped_font = escape_ffmpeg_path(font_path) if font_path else "" filters = [] for i, line in enumerate(lines): escaped_text = line.replace("\\", "\\\\").replace("'", "\u2019") escaped_text = escaped_text.replace(":", "\\:") y = start_y + i * line_height f = ( f"drawtext=text='{escaped_text}'" f":fontsize={font_size}" f":fontcolor=white" f":borderw=4:bordercolor=black@0.8" f":shadowcolor=black@0.5:shadowx=3:shadowy=3" f":x=(w-text_w)/2:y={y}" ) if escaped_font: f += f":fontfile='{escaped_font}'" filters.append(f) return ",".join(filters) def replace_first_frame(video_path, cover_image_path, output_path, fps): """Replace the first frame of the video with the cover image. Strategy: 1. Create a short video clip from the cover image (duration = 1 frame). 2. Trim the original video starting from the 2nd frame. 3. Concatenate: cover_clip + rest_of_video. """ frame_duration = 1.0 / fps tmp_dir = tempfile.mkdtemp(prefix="cover_") cover_clip = os.path.join(tmp_dir, "cover_clip.mp4") rest_clip = os.path.join(tmp_dir, "rest.mp4") concat_list = os.path.join(tmp_dir, "concat.txt") # GPU-accelerated encoding encode_args = get_ffmpeg_encode_args() try: # Step 1: Create a 1-frame video from the cover image cmd_cover = [ "ffmpeg", "-y", "-loop", "1", "-i", cover_image_path, "-t", str(frame_duration), "-r", str(fps), ] + encode_args + [ "-pix_fmt", "yuv420p", "-an", cover_clip, ] subprocess.run(cmd_cover, check=True, capture_output=True, text=True) # Step 2: Trim the original video, skip the first frame cmd_rest = [ "ffmpeg", "-y", "-ss", str(frame_duration), "-i", video_path, ] + encode_args + [ "-c:a", "aac", "-b:a", "192k", rest_clip, ] subprocess.run(cmd_rest, check=True, capture_output=True, text=True) # Step 3: Concatenate cover_clip + rest_clip cmd_final = [ "ffmpeg", "-y", "-i", cover_clip, "-i", rest_clip, "-filter_complex", "[0:v][1:v]concat=n=2:v=1:a=0[outv]", "-map", "[outv]", "-map", "1:a?", ] + encode_args + [ "-c:a", "aac", "-b:a", "192k", output_path, ] subprocess.run(cmd_final, check=True, capture_output=True, text=True) finally: # Clean up temp files for f in [cover_clip, rest_clip, concat_list]: if os.path.exists(f): os.remove(f) if os.path.isdir(tmp_dir): os.rmdir(tmp_dir) def collect_transcript_text(transcript_path): """Read transcript JSON and return the combined text of all segments.""" with open(transcript_path, "r", encoding="utf-8") as f: data = json.load(f) segments = data.get("segments", []) return " ".join(seg["text"].strip() for seg in segments if seg.get("text", "").strip()) def main(): parser = argparse.ArgumentParser( description="Replace the first frame of a video with a titled cover frame") parser.add_argument("video_path", help="Path to the video file") parser.add_argument("--title", default=None, help="Cover title text. If omitted, prints transcript for AI to summarize.") parser.add_argument("--transcript", default=None, help="Path to transcript JSON (for auto title generation)") parser.add_argument("--font-path", default=None, help="Custom font file path") parser.add_argument("--output", default=None, help="Output video path (default: _with_cover.mp4)") args = parser.parse_args() video_path = os.path.abspath(args.video_path) if not os.path.isfile(video_path): print(f"Error: Video file not found: {video_path}", file=sys.stderr) sys.exit(1) # If no title given, output transcript text for the AI agent to summarize if not args.title: if args.transcript and os.path.isfile(args.transcript): full_text = collect_transcript_text(args.transcript) print("TRANSCRIPT_FOR_TITLE_GENERATION:") print(full_text) print("\n(Please provide a --title based on the transcript above)") else: print("Error: Either --title or --transcript must be provided.", file=sys.stderr) sys.exit(0) title = sanitize_title(args.title) if not title: print("Error: Title is empty after sanitization.", file=sys.stderr) sys.exit(1) # Determine output path if args.output: output_path = os.path.abspath(args.output) else: base, ext = os.path.splitext(video_path) output_path = base + "_with_cover" + ext # Get video info duration, width, height, fps = get_video_info(video_path) print(f"Video: {width}x{height}, {fps:.2f}fps, {duration:.1f}s") # Extract first frame frame_fd, frame_path = tempfile.mkstemp(suffix=".png", prefix="cover_frame_") os.close(frame_fd) cover_fd, cover_path = tempfile.mkstemp(suffix=".png", prefix="cover_titled_") os.close(cover_fd) try: print(f"Extracting first frame...") extract_first_frame(video_path, frame_path) # Find font font_path, font_name = _find_font(args.font_path) if not font_path: print("WARNING: No Chinese font found, title may not render correctly.", file=sys.stderr) # Overlay title on the first frame print(f"Overlaying title: {title}") drawtext = build_drawtext_filters(title, font_path, width, height) cmd = [ "ffmpeg", "-y", "-i", frame_path, "-vf", drawtext, cover_path, ] subprocess.run(cmd, check=True, capture_output=True, text=True) # Replace first frame in video print(f"Replacing first frame with cover...") replace_first_frame(video_path, cover_path, output_path, fps) print(f"Done: {output_path}") finally: for p in [frame_path, cover_path]: if os.path.exists(p): os.remove(p) if __name__ == "__main__": main()