#!/usr/bin/env python3
"""
HTML报告生成脚本
从structured_report.json读取智能体已分析好的结构化热点数据,填充HTML模板生成报告
【核心原则】
本脚本只负责模板渲染,不进行任何数据分析或事件识别:
- 不识别热点事件(由智能体完成)
- 不计算热度值(由智能体完成)
- 不生成趋势预测(由智能体完成)
- 只读取JSON数据,填充模板,输出HTML
这确保HTML报告与对话输出完全一致,因为数据来源相同。
"""
import argparse
import json
import os
from datetime import datetime
from typing import Dict, List
def get_platform_class(platform_name: str) -> str:
"""获取平台对应的CSS类名"""
platform_map = {
"微博": "weibo",
"抖音": "douyin",
"知乎": "zhihu",
"B站": "bilibili",
"快手": "kuaishou",
"头条": "toutiao",
"百度": "baidu"
}
return platform_map.get(platform_name, "weibo")
def get_platform_emoji(platform_name: str) -> str:
"""获取平台对应的emoji"""
emoji_map = {
"微博": "🌐",
"抖音": "🎵",
"知乎": "📚",
"B站": "📺",
"快手": "🎬",
"头条": "📰",
"百度": "🔍"
}
return emoji_map.get(platform_name, "📍")
def get_hot_score_value(hot_score_str: str) -> int:
"""将热度字符串(如'938万')转换为数值"""
try:
return int(hot_score_str.replace("万", "")) * 10000
except (ValueError, AttributeError):
return 0
def get_rank_header_class(rank: int, hot_score_str: str) -> str:
"""根据排名和热度获取卡片头部样式类名"""
hot_score = get_hot_score_value(hot_score_str)
if rank == 1:
return "rank-1-header"
elif rank == 2:
return "rank-2-header"
elif rank == 3:
return "rank-3-header"
elif hot_score >= 10000000:
return "rank-hot"
elif hot_score >= 5000000:
return "rank-medium"
else:
return "rank-normal"
def get_rank_badge_class(rank: int) -> str:
"""获取排名徽章样式类名"""
if rank == 1:
return "rank-1"
elif rank == 2:
return "rank-2"
elif rank == 3:
return "rank-3"
else:
return "rank-other"
def generate_table_row(hotspot: Dict, rank: int) -> str:
"""生成TOP10表格行HTML"""
return f'''
| {rank} |
{hotspot["title"]} |
{hotspot["hot_score"]} |
{hotspot["platform_count"]}个 |
{hotspot["duration"]} |
'''
def generate_hotspot_card(hotspot: Dict, rank: int) -> str:
"""生成热点卡片HTML"""
platforms = hotspot.get("platforms", [])
discussions = hotspot.get("discussions", [])
prediction = hotspot.get("prediction", "")
prediction_emoji = hotspot.get("prediction_emoji", "🔥")
# 生成平台标签
platform_tags = ""
for plat in platforms:
plat_class = get_platform_class(plat)
platform_tags += f''' {plat}\n'''
# 构建discussions的平台索引,用于快速查找
disc_map = {d["platform"]: d for d in discussions}
# 生成讨论差异:必须覆盖platforms中所有在榜平台
discussion_items = ""
for plat in platforms:
plat_class = get_platform_class(plat)
if plat in disc_map:
# 该平台有讨论数据,使用原始数据
disc = disc_map[plat]
focus = disc.get("focus", "")
topics = disc.get("topics", [])
# 构建话题链接
topic_links = []
for t in topics[:3]:
title = t.get("title", "")
url = t.get("url", "")
if url:
topic_links.append(f'{title}')
else:
topic_links.append(title)
topics_str = "、".join([f"「{t}」" for t in topic_links])
discussion_items += f'''
{get_platform_emoji(plat)} {plat}
{focus},如{topics_str}
\n'''
else:
# 该平台在榜但discussions中缺失,兜底补全
discussion_items += f'''
{get_platform_emoji(plat)} {plat}
{plat}用户关注该事件
\n'''
return f'''
跨平台讨论差异
{discussion_items}
{prediction_emoji} 综合预测
{prediction}
'''
def generate_html_report(data: Dict, template_path: str) -> str:
"""生成完整的HTML报告"""
# 读取模板
with open(template_path, "r", encoding="utf-8") as f:
template = f.read()
# 获取数据
query_range = data.get("query_range", {})
start_date = query_range.get("start_date", "")
end_date = query_range.get("end_date", "")
hotspots = data.get("hotspots", [])
if not hotspots:
return "无热点数据
"
# 生成TOP10表格内容
table_rows = ""
for i, hotspot in enumerate(hotspots[:10], 1):
table_rows += generate_table_row(hotspot, i) + "\n"
# 生成热点卡片
cards = ""
for i, hotspot in enumerate(hotspots[:10], 1):
cards += generate_hotspot_card(hotspot, i) + "\n"
# 替换模板中的占位符
html = template.replace("{start_date}", start_date)
html = html.replace("{end_date}", end_date)
# 替换表格tbody
html = html.replace(
'''