#!/usr/bin/env python3 """ HTML报告生成脚本 从structured_report.json读取智能体已分析好的结构化热点数据,填充HTML模板生成报告 【核心原则】 本脚本只负责模板渲染,不进行任何数据分析或事件识别: - 不识别热点事件(由智能体完成) - 不计算热度值(由智能体完成) - 不生成趋势预测(由智能体完成) - 只读取JSON数据,填充模板,输出HTML 这确保HTML报告与对话输出完全一致,因为数据来源相同。 """ import argparse import json import os from datetime import datetime from typing import Dict, List def get_platform_class(platform_name: str) -> str: """获取平台对应的CSS类名""" platform_map = { "微博": "weibo", "抖音": "douyin", "知乎": "zhihu", "B站": "bilibili", "快手": "kuaishou", "头条": "toutiao", "百度": "baidu" } return platform_map.get(platform_name, "weibo") def get_platform_emoji(platform_name: str) -> str: """获取平台对应的emoji""" emoji_map = { "微博": "🌐", "抖音": "🎵", "知乎": "📚", "B站": "📺", "快手": "🎬", "头条": "📰", "百度": "🔍" } return emoji_map.get(platform_name, "📍") def get_hot_score_value(hot_score_str: str) -> int: """将热度字符串(如'938万')转换为数值""" try: return int(hot_score_str.replace("万", "")) * 10000 except (ValueError, AttributeError): return 0 def get_rank_header_class(rank: int, hot_score_str: str) -> str: """根据排名和热度获取卡片头部样式类名""" hot_score = get_hot_score_value(hot_score_str) if rank == 1: return "rank-1-header" elif rank == 2: return "rank-2-header" elif rank == 3: return "rank-3-header" elif hot_score >= 10000000: return "rank-hot" elif hot_score >= 5000000: return "rank-medium" else: return "rank-normal" def get_rank_badge_class(rank: int) -> str: """获取排名徽章样式类名""" if rank == 1: return "rank-1" elif rank == 2: return "rank-2" elif rank == 3: return "rank-3" else: return "rank-other" def generate_table_row(hotspot: Dict, rank: int) -> str: """生成TOP10表格行HTML""" return f''' {rank} {hotspot["title"]} {hotspot["hot_score"]} {hotspot["platform_count"]}个 {hotspot["duration"]} ''' def generate_hotspot_card(hotspot: Dict, rank: int) -> str: """生成热点卡片HTML""" platforms = hotspot.get("platforms", []) discussions = hotspot.get("discussions", []) prediction = hotspot.get("prediction", "") prediction_emoji = hotspot.get("prediction_emoji", "🔥") # 生成平台标签 platform_tags = "" for plat in platforms: plat_class = get_platform_class(plat) platform_tags += f''' {plat}\n''' # 构建discussions的平台索引,用于快速查找 disc_map = {d["platform"]: d for d in discussions} # 生成讨论差异:必须覆盖platforms中所有在榜平台 discussion_items = "" for plat in platforms: plat_class = get_platform_class(plat) if plat in disc_map: # 该平台有讨论数据,使用原始数据 disc = disc_map[plat] focus = disc.get("focus", "") topics = disc.get("topics", []) # 构建话题链接 topic_links = [] for t in topics[:3]: title = t.get("title", "") url = t.get("url", "") if url: topic_links.append(f'{title}') else: topic_links.append(title) topics_str = "、".join([f"「{t}」" for t in topic_links]) discussion_items += f'''
{get_platform_emoji(plat)} {plat}
{focus},如{topics_str}
\n''' else: # 该平台在榜但discussions中缺失,兜底补全 discussion_items += f'''
{get_platform_emoji(plat)} {plat}
{plat}用户关注该事件
\n''' return f'''
{rank}

{hotspot["title"]}

热度:{hotspot["hot_score"]}
上榜平台:{hotspot["platform_count"]}个
持续时长:{hotspot["duration"]}
最高排名:TOP{hotspot["max_position"]}
上榜平台
{platform_tags}
跨平台讨论差异
{discussion_items}
{prediction_emoji} 综合预测
{prediction}
''' def generate_html_report(data: Dict, template_path: str) -> str: """生成完整的HTML报告""" # 读取模板 with open(template_path, "r", encoding="utf-8") as f: template = f.read() # 获取数据 query_range = data.get("query_range", {}) start_date = query_range.get("start_date", "") end_date = query_range.get("end_date", "") hotspots = data.get("hotspots", []) if not hotspots: return "

无热点数据

" # 生成TOP10表格内容 table_rows = "" for i, hotspot in enumerate(hotspots[:10], 1): table_rows += generate_table_row(hotspot, i) + "\n" # 生成热点卡片 cards = "" for i, hotspot in enumerate(hotspots[:10], 1): cards += generate_hotspot_card(hotspot, i) + "\n" # 替换模板中的占位符 html = template.replace("{start_date}", start_date) html = html.replace("{end_date}", end_date) # 替换表格tbody html = html.replace( '''
\n
\n{cards}