#!/usr/bin/env python3 """ 小红书榜单 HTML 报告生成脚本 用法: python generate_report.py --data result.json --output report.html python generate_report.py --data result.json # 自动命名输出文件 """ import argparse import json import sys import html as html_utils from datetime import datetime from pathlib import Path # ───────────────────────────────────────────────────────── # 赛道识别:基于账号名 + 主页链接,识别更精准的细分赛道 # ───────────────────────────────────────────────────────── # 细分赛道识别规则(按优先级排序) CATEGORY_INFER_RULES = [ # 游戏类(最高优先级) {"keywords": ["王者荣耀", "第五人格", "崩坏", "蛋仔", "原神", "鸣潮", "阴阳师", "明日方舟", "光遇", "恋与深空"], "category": "游戏"}, {"keywords": ["吃鸡", "和平精英", "PUBG", "英雄联盟", "LOL", "无畏契约", "Valorant", "CSGO", "DOTA"], "category": "游戏"}, {"keywords": ["迷你世界", "我的世界", "MC", "Minecraft"], "category": "游戏"}, {"keywords": ["超自然行动组"], "category": "游戏"}, # 品牌/官方类 {"keywords": ["official", "工作室"], "category": "官方账号"}, {"keywords": ["爱奇艺", "优酷", "腾讯视频", "芒果TV", "Bilibili", "B站"], "category": "视频平台"}, {"keywords": ["微博", "新浪", "凤凰网", "澎湃", "界面新闻"], "category": "新闻媒体"}, {"keywords": ["茶百道", "喜茶", "奈雪", "一点点", "蜜雪冰城", "霸王茶姬"], "category": "奶茶饮品"}, {"keywords": ["德施曼", "凯迪仕", "飞利浦智能锁"], "category": "智能家居"}, {"keywords": ["天猫", "京东", "淘宝", "拼多多"], "category": "电商平台"}, # 明星/娱乐类 {"keywords": ["赵露思", "白鹿", "虞书欣", "张凌赫", "田曦薇", "万妮达", "李维嘉"], "category": "明星"}, {"keywords": ["煎饼果仔"], "category": "短剧"}, {"keywords": ["小胡同学", "神仙藤井树"], "category": "美妆博主"}, {"keywords": ["papi酱"], "category": "搞笑博主"}, # 美食类细分 {"keywords": ["刘雨鑫", "茄猫", "小镇上的猪精", "阿晨吃饱了"], "category": "美食博主"}, {"keywords": ["美食", "吃货"], "category": "美食博主"}, {"keywords": ["烘焙", "蛋糕", "面包", "甜品"], "category": "烘焙甜点"}, {"keywords": ["探店", "奶茶", "咖啡"], "category": "探店美食"}, # 旅行/户外类 {"keywords": ["程前朋友圈"], "category": "商业观察"}, {"keywords": ["柯子又胖了", "旅行", "旅游", "自驾", "露营"], "category": "旅行博主"}, {"keywords": ["Linksphotograph", "行缘", "旅行摄影"], "category": "旅行摄影"}, # 宠物类 {"keywords": ["扣肉有脾气", "宠物", "猫", "狗", "萌宠", "铲屎"], "category": "宠物博主"}, # 时尚/穿搭类 {"keywords": ["穿搭", "OOTD", "衣橱"], "category": "穿搭博主"}, {"keywords": ["康康和爷爷"], "category": "时尚博主"}, {"keywords": ["白昼小熊"], "category": "潮流博主"}, # 健身/运动类 {"keywords": ["帕梅拉", "欧阳春晓"], "category": "健身博主"}, {"keywords": ["健身", "瑜伽", "减脂", "增肌", "运动"], "category": "健身运动"}, # 学习/知识类 {"keywords": ["周小闹", "Prof.Alan"], "category": "知识博主"}, {"keywords": ["教育", "学习", "干货", "职场"], "category": "知识教育"}, # 科技/测评类 {"keywords": ["小狮日记", "数码", "手机", "电脑", "测评", "评测", "搞机", "极客", "科技"], "category": "科技数码"}, {"keywords": ["AI", "人工智能", "ChatGPT", "GPT"], "category": "AI科技"}, # 亲子/母婴类 {"keywords": ["一只静猪"], "category": "亲子博主"}, {"keywords": ["母婴", "育儿", "宝宝", "辣妈", "萌娃"], "category": "母婴育儿"}, # 家居/装修类 {"keywords": ["家居", "装修", "软装", "设计", "收纳"], "category": "家居博主"}, # 情感/心理类 {"keywords": ["星座", "情感", "恋爱", "心理", "塔罗", "MBTI"], "category": "情感博主"}, # 日常生活/Vlog {"keywords": ["日记", "vlog", "plog", "日常", "记录", "生活"], "category": "日常Vlog"}, # 科学/探索类 {"keywords": ["亿点点不一样", "科普", "科学", "探索", "实验"], "category": "科学探索"}, ] # 兜底关键词 FALLBACK_KEYWORDS = { "新闻媒体": ["吃瓜", "资讯", "热点", "日报"], "影视娱乐": ["影视", "娱乐", "综艺", "剧集", "电影", "演员"], "休闲爱好": ["爱好", "手工", "DIY", "绘画", "乐器"], "科学探索": ["天文", "地理", "宇宙", "太空"], } def infer_category(account_name: str, profile_url: str = "") -> str: """ 根据账号名和主页链接推断赛道分类。 优先级:精确账号匹配 > 分类规则 > 兜底 """ combined = (account_name + " " + profile_url).lower() # 1. 精确账号名匹配(最高优先级) exact_account_map = { "王者荣耀": "游戏", "第五人格": "游戏", "崩坏:星穹铁道": "游戏", "网易蛋仔派对": "游戏", "超自然行动组": "游戏", "恋与深空": "游戏", "茶百道ChaPanda": "奶茶饮品", "德施曼": "智能家居", "爱奇艺": "视频平台", "亿点点不一样": "科学探索", "朱铁雄": "短剧", "一只静猪": "亲子博主", "扣肉有脾气": "宠物博主", "王冰汝": "新闻媒体", "神仙藤井树": "美妆博主", "小胡同学呀": "美妆博主", "康康和爷爷": "时尚博主", "帕梅拉Pamela Reif": "健身博主", "煎饼果仔(张问初)": "短剧", "Prof.Alan Macfarlane": "知识博主", "周小闹": "知识博主", "刘雨鑫JASON": "美食博主", "茄猫的罐头": "美食博主", "程前朋友圈": "商业观察", "Linksphotograph": "旅行摄影", "李蠕蠕": "搞笑博主", "吃瓜吗喽": "新闻媒体", } for exact_name, cat in exact_account_map.items(): if exact_name.lower() in combined: return cat # 2. 规则遍历匹配 for rule in CATEGORY_INFER_RULES: for kw in rule["keywords"]: if kw.lower() in combined: return rule["category"] # 3. 兜底关键词 for category, keywords in FALLBACK_KEYWORDS.items(): for kw in keywords: if kw.lower() in combined: return category return "日常Vlog" def parse_num(val) -> int: """解析数字字符串,如 '24.64w' -> 246400, '6919' -> 6919""" if val is None or val == "-": return 0 s = str(val).replace("w", "").replace("W", "").replace("+", "") try: if "." in s: return int(float(s) * 10000) return int(float(s)) except (ValueError, TypeError): return 0 def format_interaction(num: int) -> str: if num >= 100_000: return f"{num // 10_000}w+" elif num >= 10_000: return f"{num / 10_000:.1f}w+" return str(num) def format_followers(num: int) -> str: if num >= 100_000_000: return f"{num / 100_000_000:.1f}亿" elif num >= 10_000: return f"{num / 10_000:.1f}万" return str(num) PERIOD_UPDATE_RULES = { "day": "每日 19:00 更新", "week": "每周一 15:00 更新", "month": "每月 2号 9:00 更新", } PERIOD_LABELS = {"day": "日榜", "week": "周榜", "month": "月榜"} HTML_TEMPLATE = """
| 排名 | 账号名 | 综合评分 | 总粉丝数 | 新增笔记数 | 新增粉丝 | 新增点赞 | 新增评论 | 新增收藏 | 新增分享 |
|---|