#!/usr/bin/env python3
"""
抖音榜单 HTML 报告生成脚本
用法:
python generate_report.py --data result.json --output report.html
python generate_report.py --data result.json # 自动命名输出文件
"""
import argparse
import json
import sys
from pathlib import Path
# ─────────────────────────────────────────────────────────
# 赛道识别:基于账号名推断细分赛道
# ─────────────────────────────────────────────────────────
CATEGORY_INFER_RULES = [
# 游戏类
{"keywords": ["王者荣耀", "英雄联盟", "原神", "崩坏", "蛋仔", "阴阳师", "明日方舟", "光遇", "鸣潮"], "category": "游戏"},
{"keywords": ["吃鸡", "和平精英", "PUBG", "LOL", "无畏契约", "CSGO", "DOTA", "Valorant"], "category": "游戏"},
{"keywords": ["迷你世界", "我的世界", "MC", "Minecraft"], "category": "游戏"},
# 明星/娱乐类
{"keywords": ["official", "工作室"], "category": "官方账号"},
{"keywords": ["爱奇艺", "优酷", "腾讯视频", "芒果TV", "Bilibili", "B站"], "category": "视频平台"},
{"keywords": ["微博", "新浪", "凤凰网", "澎湃"], "category": "新闻媒体"},
{"keywords": ["明星", "演员", "歌手", "偶像"], "category": "明星娱乐"},
# 美食类细分
{"keywords": ["美食", "吃货", "探店", "烹饪"], "category": "美食"},
{"keywords": ["烘焙", "蛋糕", "面包", "甜品"], "category": "烘焙甜点"},
# 旅行/户外类
{"keywords": ["旅行", "旅游", "自驾", "露营"], "category": "旅行"},
# 宠物类
{"keywords": ["宠物", "猫", "狗", "萌宠", "铲屎"], "category": "动物"},
# 时尚/穿搭类
{"keywords": ["穿搭", "OOTD", "衣橱"], "category": "潮流风尚"},
# 健身/运动类
{"keywords": ["健身", "瑜伽", "减脂", "增肌", "运动"], "category": "身体锻炼"},
# 学习/知识类
{"keywords": ["教育", "学习", "干货", "职场", "知识"], "category": "学习教育"},
# 科技/测评类
{"keywords": ["数码", "手机", "电脑", "测评", "评测", "科技"], "category": "数码科技"},
# 亲子/母婴类
{"keywords": ["母婴", "育儿", "宝宝", "辣妈", "萌娃", "亲子"], "category": "亲子"},
# 家居/装修类
{"keywords": ["家居", "装修", "软装", "设计", "收纳"], "category": "居家装修"},
# 情感/心理类
{"keywords": ["星座", "情感", "恋爱", "心理", "塔罗"], "category": "情感"},
# 音乐类
{"keywords": ["音乐", "唱歌", "弹琴", "乐器"], "category": "音乐"},
# 影视类
{"keywords": ["影视", "娱乐", "综艺", "剧集", "电影"], "category": "影视"},
# 舞蹈类
{"keywords": ["舞蹈", "跳舞", "舞"], "category": "舞蹈才艺"},
# 日常生活/Vlog
{"keywords": ["日记", "vlog", "plog", "日常", "记录", "生活"], "category": "生活vlog"},
# 二次元类
{"keywords": ["二次元", "动漫", "动画", "ACG"], "category": "二次元"},
# 健康医学类
{"keywords": ["健康", "养生", "医疗", "医学"], "category": "健康医学"},
]
def infer_category(account_name: str) -> str:
"""根据账号名推断赛道分类"""
combined = (account_name).lower()
# 精确账号名匹配
exact_account_map = {
"王者荣耀": "游戏",
"原神": "游戏",
"和平精英": "游戏",
"英雄联盟": "游戏",
"迷你世界": "游戏",
}
for exact_name, cat in exact_account_map.items():
if exact_name.lower() in combined:
return cat
# 规则遍历匹配
for rule in CATEGORY_INFER_RULES:
for kw in rule["keywords"]:
if kw.lower() in combined:
return rule["category"]
return "全部"
def parse_num(val) -> int:
"""解析数字字符串"""
if val is None or val == "-":
return 0
s = str(val).replace("w", "").replace("W", "").replace("+", "").strip()
try:
if "." in s:
return int(float(s) * 10000)
return int(float(s))
except (ValueError, TypeError):
return 0
def format_interaction(num: int) -> str:
if num >= 100_000:
return f"{num // 10_000}w+"
elif num >= 10_000:
return f"{num / 10_000:.1f}w+"
return str(num)
def format_followers(num: int) -> str:
if num >= 100_000_000:
return f"{num / 100_000_000:.1f}亿"
elif num >= 10_000:
return f"{num / 10_000:.1f}w"
return str(num)
PERIOD_UPDATE_RULES = {
"day": "每日17:30",
"week": "每周一17:30",
"month": "每月2号9点",
}
PERIOD_LABELS = {"day": "日榜", "week": "周榜", "month": "月榜"}
HTML_TEMPLATE = """
抖音{period_label} · {date}
💡 榜单说明:{update_rule},与实时数据存在差异。
📐 综合评分(满分100):综合评分根据达人在抖音的 总粉丝数、周期内的 粉丝增量、点赞增量、分享增量 以及 评论增量 加权计算所得(满分100)。
| 排名 |
账号名 |
{category_th}
综合评分 |
总粉丝数 |
新增粉丝 |
新增点赞 |
新增评论 |
新增分享 |
{rows}
📬 订阅服务
1️⃣ 是否需要订阅每日/周/月的抖音账号最新排名?
2️⃣ 是否需要订阅具体赛道的账号表现?我们支持:
个人才艺
生活vlog
财富理财
二次元
居家装修
学习教育
小剧场
数码科技
旅行
美食
化妆美容
动物
亲子
汽车
情感
三农
健康医学
潮流风尚
舞蹈才艺
颜值造型
人文
音乐
影视
身体锻炼
体育
明星娱乐
游戏
"""
# 全品类行模板(带赛道列)
ROW_TEMPLATE_CAT = """
| {rank} |
{account_name} |
{category} |
{score} |
{followers} |
{new_fans} |
{new_likes} |
{new_comments} |
{new_shares} |
"""
# 非全品类行模板(无赛道列)
ROW_TEMPLATE = """
| {rank} |
{account_name} |
{score} |
{followers} |
{new_fans} |
{new_likes} |
{new_comments} |
{new_shares} |
"""
def _fmt_num(val) -> str:
"""格式化数字"""
if val is None or val == "-" or val == "":
return "-"
if isinstance(val, str):
return val if val.strip() else "-"
try:
n = int(val)
if n == 0:
return "-"
return format_followers(n)
except (TypeError, ValueError):
return str(val) if val else "-"
def generate_html(data: dict, output_path: str):
"""生成 HTML 报告"""
items = data.get("list", [])
period = data.get("period", "day")
date_start = data.get("dateStart", data.get("date", ""))
date_end = data.get("dateEnd", date_start)
category = data.get("category", "全部")
total = data.get("total", len(items))
# 实际展示的条数
displayed_count = len(items)
if date_start == date_end:
date_display = date_start
else:
date_display = f"{date_start} 至 {date_end}"
period_label = PERIOD_LABELS.get(period, "日榜")
CAT_DISPLAY = {
"全部": "全品类", "化妆美容": "美妆类", "美食": "美食类",
"旅行": "旅行类", "数码科技": "科技类", "游戏": "游戏类",
"健康医学": "健康类", "亲子": "亲子类", "身体锻炼": "运动类",
"学习教育": "教育类", "动物": "动物类", "潮流风尚": "时尚类",
"居家装修": "家居类", "影视": "影视类", "音乐": "音乐类",
"舞蹈才艺": "舞蹈类", "明星娱乐": "娱乐类", "体育": "体育类",
"情感": "情感类", "财富理财": "理财类", "二次元": "二次元类",
"小剧场": "小剧场类", "汽车": "汽车类", "三农": "三农类",
"人文": "人文类", "颜值造型": "颜值类", "个人才艺": "才艺类",
"生活vlog": "生活类",
}
cat_display = CAT_DISPLAY.get(category, category + "类")
is_all_category = (category == "全部")
# 表头:全品类时添加赛道列
if is_all_category:
category_th = '赛道 | '
else:
category_th = ''
# 榜单说明:根据周期展示更新时间
if period == "day":
update_rule = "每日17:30更新"
window_human = "过去7天"
latest_desc = "昨日"
elif period == "week":
update_rule = "每周一17:30更新"
window_human = "过去3周"
latest_desc = "上周"
else:
update_rule = "每月2号9点更新"
window_human = "过去3月"
latest_desc = "上月"
# 超出范围时的提示
warning_text = ""
if data.get("status") == "future":
warning_text = f'非常抱歉🙏,我们最新的是{latest_desc}的数据,将为您提供最接近您需求的{latest_desc}热榜。'
elif data.get("status") == "too_early":
warning_text = f'非常抱歉🙏,目前榜单最多支持回溯「{window_human}」,我将为您查询最接近您需求的时间范围~'
# 合并 warning 到榜单说明
if warning_text:
update_rule_full = f'{warning_text}{update_rule}'
else:
update_rule_full = update_rule
# 统计
max_interaction = 0
for item in items:
likes = parse_num(item.get('newLikes') or item.get('likedGrowth'))
comments = parse_num(item.get('newComments') or item.get('commentsGrowth'))
shares = parse_num(item.get('newShares') or item.get('sharedGrowth'))
interaction = likes + comments + shares
if interaction > max_interaction:
max_interaction = interaction
# 生成行
rows = []
for item in items:
rank = item.get('rank', '-')
rank_class = f"rank-{rank}" if rank in [1, 2, 3] else "rank-other"
account_name = item.get('accountName', '')
profile_url = item.get('profileUrl', '')
score = item.get('comprehensiveScore', '-')
if isinstance(score, (int, float)) and score > 0:
score = int(score)
followers = _fmt_num(item.get('followers') or item.get('fansCount'))
new_fans = _fmt_num(item.get('newFans') or item.get('fansGrowth'))
new_likes = _fmt_num(item.get('newLikes') or item.get('likedGrowth'))
new_comments = _fmt_num(item.get('newComments') or item.get('commentsGrowth'))
new_shares = _fmt_num(item.get('newShares') or item.get('sharedGrowth'))
# 全品类时使用带赛道列的模板
if is_all_category:
account_category = item.get('category', '-')
rows.append(ROW_TEMPLATE_CAT.format(
rank_class=rank_class,
rank=rank,
account_name=account_name,
profile_url=profile_url or '#',
category=account_category,
score=score,
followers=followers,
new_fans=new_fans,
new_likes=new_likes,
new_comments=new_comments,
new_shares=new_shares,
))
else:
rows.append(ROW_TEMPLATE.format(
rank_class=rank_class,
rank=rank,
account_name=account_name,
profile_url=profile_url or '#',
score=score,
followers=followers,
new_fans=new_fans,
new_likes=new_likes,
new_comments=new_comments,
new_shares=new_shares,
))
# 显示提示
if displayed_count >= total:
display_hint = f"共 {total} 个账号上榜"
total_hidden_class = ""
more_hint_class = ""
remaining_count = 0
else:
display_hint = f"共 {total} 个账号上榜(展示 TOP {displayed_count} 条)"
total_hidden_class = "hidden"
more_hint_class = "show"
remaining_count = total - displayed_count
html = HTML_TEMPLATE.format(
period_label=period_label,
category_label=cat_display,
date=date_display.replace(' ', '_'),
displayed_count=displayed_count,
total_count=total,
total_hidden_class=total_hidden_class,
remaining_count=remaining_count,
more_hint_class=more_hint_class,
display_hint=display_hint,
update_rule=update_rule,
top_interaction=format_interaction(max_interaction),
category_th=category_th,
rows="\n".join(rows),
)
with open(output_path, "w", encoding="utf-8") as f:
f.write(html)
print(f"[INFO] HTML 报告已生成:{output_path}")
def main():
parser = argparse.ArgumentParser(description="抖音榜单 HTML 报告生成")
parser.add_argument("--data", "-d", required=True, help="JSON 数据文件路径")
parser.add_argument("--output", "-o", default="", help="HTML 输出文件路径")
parser.add_argument("--limit", type=int, default=20, help="限制展示条数,0 表示全部展示")
args = parser.parse_args()
with open(args.data, "r", encoding="utf-8") as f:
data = json.load(f)
# 应用 limit 限制
if args.limit > 0 and "list" in data:
data["list"] = data["list"][:args.limit]
# 生成文件名:赛道+周期+日期+时间戳
if args.output:
output_path = Path(args.output)
else:
import time
date_start = data.get("dateStart", data.get("date", ""))
category = data.get("category", "全部")
period = data.get("period", "day")
period_name = {"day": "日榜", "week": "周榜", "month": "月榜"}.get(period, "榜")
date_str = date_start.replace("-", "")
timestamp = int(time.time())
filename = f"{category}{period_name}{date_str}_{timestamp}.html"
output_path = Path(filename)
generate_html(data, str(output_path))
# 自动打开 HTML 文件
import platform
import subprocess
abs_path = output_path.resolve()
try:
system = platform.system()
if system == "Darwin": # macOS
subprocess.run(["open", str(abs_path)], check=True)
elif system == "Windows":
subprocess.run(["start", "", str(abs_path)], shell=True, check=True)
else: # Linux
subprocess.run(["xdg-open", str(abs_path)], check=True)
print(f"\n✓ HTML 报告已自动打开: {abs_path}", file=sys.stderr)
except Exception as e:
print(f"\n✓ HTML 报告已生成: {abs_path}", file=sys.stderr)
if __name__ == "__main__":
main()