#!/usr/bin/env python3 """ 抖音榜单 HTML 报告生成脚本 用法: python generate_report.py --data result.json --output report.html python generate_report.py --data result.json # 自动命名输出文件 """ import argparse import json import sys from pathlib import Path # ───────────────────────────────────────────────────────── # 赛道识别:基于账号名推断细分赛道 # ───────────────────────────────────────────────────────── CATEGORY_INFER_RULES = [ # 游戏类 {"keywords": ["王者荣耀", "英雄联盟", "原神", "崩坏", "蛋仔", "阴阳师", "明日方舟", "光遇", "鸣潮"], "category": "游戏"}, {"keywords": ["吃鸡", "和平精英", "PUBG", "LOL", "无畏契约", "CSGO", "DOTA", "Valorant"], "category": "游戏"}, {"keywords": ["迷你世界", "我的世界", "MC", "Minecraft"], "category": "游戏"}, # 明星/娱乐类 {"keywords": ["official", "工作室"], "category": "官方账号"}, {"keywords": ["爱奇艺", "优酷", "腾讯视频", "芒果TV", "Bilibili", "B站"], "category": "视频平台"}, {"keywords": ["微博", "新浪", "凤凰网", "澎湃"], "category": "新闻媒体"}, {"keywords": ["明星", "演员", "歌手", "偶像"], "category": "明星娱乐"}, # 美食类细分 {"keywords": ["美食", "吃货", "探店", "烹饪"], "category": "美食"}, {"keywords": ["烘焙", "蛋糕", "面包", "甜品"], "category": "烘焙甜点"}, # 旅行/户外类 {"keywords": ["旅行", "旅游", "自驾", "露营"], "category": "旅行"}, # 宠物类 {"keywords": ["宠物", "猫", "狗", "萌宠", "铲屎"], "category": "动物"}, # 时尚/穿搭类 {"keywords": ["穿搭", "OOTD", "衣橱"], "category": "潮流风尚"}, # 健身/运动类 {"keywords": ["健身", "瑜伽", "减脂", "增肌", "运动"], "category": "身体锻炼"}, # 学习/知识类 {"keywords": ["教育", "学习", "干货", "职场", "知识"], "category": "学习教育"}, # 科技/测评类 {"keywords": ["数码", "手机", "电脑", "测评", "评测", "科技"], "category": "数码科技"}, # 亲子/母婴类 {"keywords": ["母婴", "育儿", "宝宝", "辣妈", "萌娃", "亲子"], "category": "亲子"}, # 家居/装修类 {"keywords": ["家居", "装修", "软装", "设计", "收纳"], "category": "居家装修"}, # 情感/心理类 {"keywords": ["星座", "情感", "恋爱", "心理", "塔罗"], "category": "情感"}, # 音乐类 {"keywords": ["音乐", "唱歌", "弹琴", "乐器"], "category": "音乐"}, # 影视类 {"keywords": ["影视", "娱乐", "综艺", "剧集", "电影"], "category": "影视"}, # 舞蹈类 {"keywords": ["舞蹈", "跳舞", "舞"], "category": "舞蹈才艺"}, # 日常生活/Vlog {"keywords": ["日记", "vlog", "plog", "日常", "记录", "生活"], "category": "生活vlog"}, # 二次元类 {"keywords": ["二次元", "动漫", "动画", "ACG"], "category": "二次元"}, # 健康医学类 {"keywords": ["健康", "养生", "医疗", "医学"], "category": "健康医学"}, ] def infer_category(account_name: str) -> str: """根据账号名推断赛道分类""" combined = (account_name).lower() # 精确账号名匹配 exact_account_map = { "王者荣耀": "游戏", "原神": "游戏", "和平精英": "游戏", "英雄联盟": "游戏", "迷你世界": "游戏", } for exact_name, cat in exact_account_map.items(): if exact_name.lower() in combined: return cat # 规则遍历匹配 for rule in CATEGORY_INFER_RULES: for kw in rule["keywords"]: if kw.lower() in combined: return rule["category"] return "全部" def parse_num(val) -> int: """解析数字字符串""" if val is None or val == "-": return 0 s = str(val).replace("w", "").replace("W", "").replace("+", "").strip() try: if "." in s: return int(float(s) * 10000) return int(float(s)) except (ValueError, TypeError): return 0 def format_interaction(num: int) -> str: if num >= 100_000: return f"{num // 10_000}w+" elif num >= 10_000: return f"{num / 10_000:.1f}w+" return str(num) def format_followers(num: int) -> str: if num >= 100_000_000: return f"{num / 100_000_000:.1f}亿" elif num >= 10_000: return f"{num / 10_000:.1f}w" return str(num) PERIOD_UPDATE_RULES = { "day": "每日17:30", "week": "每周一17:30", "month": "每月2号9点", } PERIOD_LABELS = {"day": "日榜", "week": "周榜", "month": "月榜"} HTML_TEMPLATE = """ 抖音{period_label} · {date}

抖音{period_label} · {category_label}

数据日期:{date}  |  {display_hint}
💡 榜单说明:{update_rule},与实时数据存在差异。
📐 综合评分(满分100):综合评分根据达人在抖音的 总粉丝数、周期内的 粉丝增量点赞增量分享增量 以及 评论增量 加权计算所得(满分100)。
{displayed_count}
展示条数
{top_interaction}
最高互动
{category_th} {rows}
排名 账号名综合评分 总粉丝数 新增粉丝 新增点赞 新增评论 新增分享

📬 订阅服务

1️⃣ 是否需要订阅每日/周/月的抖音账号最新排名?
2️⃣ 是否需要订阅具体赛道的账号表现?我们支持:
个人才艺 生活vlog 财富理财 二次元 居家装修 学习教育 小剧场 数码科技 旅行 美食 化妆美容 动物 亲子 汽车 情感 三农 健康医学 潮流风尚 舞蹈才艺 颜值造型 人文 音乐 影视 身体锻炼 体育 明星娱乐 游戏
""" # 全品类行模板(带赛道列) ROW_TEMPLATE_CAT = """ {rank} {account_name} {category} {score} {followers} {new_fans} {new_likes} {new_comments} {new_shares} """ # 非全品类行模板(无赛道列) ROW_TEMPLATE = """ {rank} {account_name} {score} {followers} {new_fans} {new_likes} {new_comments} {new_shares} """ def _fmt_num(val) -> str: """格式化数字""" if val is None or val == "-" or val == "": return "-" if isinstance(val, str): return val if val.strip() else "-" try: n = int(val) if n == 0: return "-" return format_followers(n) except (TypeError, ValueError): return str(val) if val else "-" def generate_html(data: dict, output_path: str): """生成 HTML 报告""" items = data.get("list", []) period = data.get("period", "day") date_start = data.get("dateStart", data.get("date", "")) date_end = data.get("dateEnd", date_start) category = data.get("category", "全部") total = data.get("total", len(items)) # 实际展示的条数 displayed_count = len(items) if date_start == date_end: date_display = date_start else: date_display = f"{date_start} 至 {date_end}" period_label = PERIOD_LABELS.get(period, "日榜") CAT_DISPLAY = { "全部": "全品类", "化妆美容": "美妆类", "美食": "美食类", "旅行": "旅行类", "数码科技": "科技类", "游戏": "游戏类", "健康医学": "健康类", "亲子": "亲子类", "身体锻炼": "运动类", "学习教育": "教育类", "动物": "动物类", "潮流风尚": "时尚类", "居家装修": "家居类", "影视": "影视类", "音乐": "音乐类", "舞蹈才艺": "舞蹈类", "明星娱乐": "娱乐类", "体育": "体育类", "情感": "情感类", "财富理财": "理财类", "二次元": "二次元类", "小剧场": "小剧场类", "汽车": "汽车类", "三农": "三农类", "人文": "人文类", "颜值造型": "颜值类", "个人才艺": "才艺类", "生活vlog": "生活类", } cat_display = CAT_DISPLAY.get(category, category + "类") is_all_category = (category == "全部") # 表头:全品类时添加赛道列 if is_all_category: category_th = '赛道' else: category_th = '' # 榜单说明:根据周期展示更新时间 if period == "day": update_rule = "每日17:30更新" window_human = "过去7天" latest_desc = "昨日" elif period == "week": update_rule = "每周一17:30更新" window_human = "过去3周" latest_desc = "上周" else: update_rule = "每月2号9点更新" window_human = "过去3月" latest_desc = "上月" # 超出范围时的提示 warning_text = "" if data.get("status") == "future": warning_text = f'非常抱歉🙏,我们最新的是{latest_desc}的数据,将为您提供最接近您需求的{latest_desc}热榜。' elif data.get("status") == "too_early": warning_text = f'非常抱歉🙏,目前榜单最多支持回溯「{window_human}」,我将为您查询最接近您需求的时间范围~' # 合并 warning 到榜单说明 if warning_text: update_rule_full = f'{warning_text}{update_rule}' else: update_rule_full = update_rule # 统计 max_interaction = 0 for item in items: likes = parse_num(item.get('newLikes') or item.get('likedGrowth')) comments = parse_num(item.get('newComments') or item.get('commentsGrowth')) shares = parse_num(item.get('newShares') or item.get('sharedGrowth')) interaction = likes + comments + shares if interaction > max_interaction: max_interaction = interaction # 生成行 rows = [] for item in items: rank = item.get('rank', '-') rank_class = f"rank-{rank}" if rank in [1, 2, 3] else "rank-other" account_name = item.get('accountName', '') profile_url = item.get('profileUrl', '') score = item.get('comprehensiveScore', '-') if isinstance(score, (int, float)) and score > 0: score = int(score) followers = _fmt_num(item.get('followers') or item.get('fansCount')) new_fans = _fmt_num(item.get('newFans') or item.get('fansGrowth')) new_likes = _fmt_num(item.get('newLikes') or item.get('likedGrowth')) new_comments = _fmt_num(item.get('newComments') or item.get('commentsGrowth')) new_shares = _fmt_num(item.get('newShares') or item.get('sharedGrowth')) # 全品类时使用带赛道列的模板 if is_all_category: account_category = item.get('category', '-') rows.append(ROW_TEMPLATE_CAT.format( rank_class=rank_class, rank=rank, account_name=account_name, profile_url=profile_url or '#', category=account_category, score=score, followers=followers, new_fans=new_fans, new_likes=new_likes, new_comments=new_comments, new_shares=new_shares, )) else: rows.append(ROW_TEMPLATE.format( rank_class=rank_class, rank=rank, account_name=account_name, profile_url=profile_url or '#', score=score, followers=followers, new_fans=new_fans, new_likes=new_likes, new_comments=new_comments, new_shares=new_shares, )) # 显示提示 if displayed_count >= total: display_hint = f"共 {total} 个账号上榜" total_hidden_class = "" more_hint_class = "" remaining_count = 0 else: display_hint = f"共 {total} 个账号上榜(展示 TOP {displayed_count} 条)" total_hidden_class = "hidden" more_hint_class = "show" remaining_count = total - displayed_count html = HTML_TEMPLATE.format( period_label=period_label, category_label=cat_display, date=date_display.replace(' ', '_'), displayed_count=displayed_count, total_count=total, total_hidden_class=total_hidden_class, remaining_count=remaining_count, more_hint_class=more_hint_class, display_hint=display_hint, update_rule=update_rule, top_interaction=format_interaction(max_interaction), category_th=category_th, rows="\n".join(rows), ) with open(output_path, "w", encoding="utf-8") as f: f.write(html) print(f"[INFO] HTML 报告已生成:{output_path}") def main(): parser = argparse.ArgumentParser(description="抖音榜单 HTML 报告生成") parser.add_argument("--data", "-d", required=True, help="JSON 数据文件路径") parser.add_argument("--output", "-o", default="", help="HTML 输出文件路径") parser.add_argument("--limit", type=int, default=20, help="限制展示条数,0 表示全部展示") args = parser.parse_args() with open(args.data, "r", encoding="utf-8") as f: data = json.load(f) # 应用 limit 限制 if args.limit > 0 and "list" in data: data["list"] = data["list"][:args.limit] # 生成文件名:赛道+周期+日期+时间戳 if args.output: output_path = Path(args.output) else: import time date_start = data.get("dateStart", data.get("date", "")) category = data.get("category", "全部") period = data.get("period", "day") period_name = {"day": "日榜", "week": "周榜", "month": "月榜"}.get(period, "榜") date_str = date_start.replace("-", "") timestamp = int(time.time()) filename = f"{category}{period_name}{date_str}_{timestamp}.html" output_path = Path(filename) generate_html(data, str(output_path)) # 自动打开 HTML 文件 import platform import subprocess abs_path = output_path.resolve() try: system = platform.system() if system == "Darwin": # macOS subprocess.run(["open", str(abs_path)], check=True) elif system == "Windows": subprocess.run(["start", "", str(abs_path)], shell=True, check=True) else: # Linux subprocess.run(["xdg-open", str(abs_path)], check=True) print(f"\n✓ HTML 报告已自动打开: {abs_path}", file=sys.stderr) except Exception as e: print(f"\n✓ HTML 报告已生成: {abs_path}", file=sys.stderr) if __name__ == "__main__": main()