#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
公众号原创爆款文章HTML生成脚本
功能:
1. 读取API数据
2. 生成公众号风格的HTML页面
3. 支持PDF导出
使用方法:
python generate_hot_html.py --data_file data.json --output ranking.html
python generate_hot_html.py --articles '[{"title": "...", ...}]' --output ranking.html
"""
import argparse
import json
import os
from datetime import datetime
def get_rank_display(rank: int) -> str:
"""获取排名显示(奖牌或数字)"""
if rank == 1:
return "🥇"
elif rank == 2:
return "🥈"
elif rank == 3:
return "🥉"
else:
return str(rank)
def get_article_html(article: dict, rank: int, is_top: bool = False) -> str:
"""生成单篇文章的HTML"""
try:
title = article.get("title", "未知标题")
url = article.get("oriUrl", "#")
account = article.get("userName", article.get("accountId", "未知账号"))
account_id = article.get("accountId", "")
reads = article.get("clicksCount", "0")
# 处理日期
public_time = article.get("publicTime", "")
if public_time:
try:
date = str(public_time)[:10]
except:
date = ""
else:
date = ""
# 生成公众号名片链接
if account_id:
account_url = f"https://open.weixin.qq.com/qr/code?username={account_id}"
else:
account_url = "#"
top_class = " top-item" if is_top else ""
rank_display = get_rank_display(rank)
top_rank_class = " top" if is_top else ""
return f'''
{rank_display}
{title}
👤{account}
📖 阅读 {reads}
📅 {date}
'''
except:
# 如果生成失败,返回空字符串
return ""
def generate_html(keyword: str, articles: list, insights: dict = None, top_n: int = 10) -> str:
"""生成完整的HTML页面"""
# 数据验证
if not articles:
articles = []
# 计算统计数据
try:
account_count = len(set(a.get("accountId", "") for a in articles if a.get("accountId")))
except:
account_count = 0
# 计算日期范围
dates = []
for a in articles:
try:
public_time = a.get("publicTime", "")
if public_time:
date_str = str(public_time)[:10] # 只取日期部分
if date_str and len(date_str) >= 10: # 确保日期格式正确
dates.append(date_str)
except:
continue
if dates:
try:
min_date = min(datetime.strptime(d, "%Y-%m-%d") for d in dates if d)
max_date = max(datetime.strptime(d, "%Y-%m-%d") for d in dates if d)
days = (max_date - min_date).days + 1
except:
days = 30
else:
days = 30
# 生成文章列表HTML
articles_html = ""
for i, article in enumerate(articles[:top_n], 1):
try:
articles_html += get_article_html(article, i, i <= 3)
except:
# 如果单篇文章生成失败,跳过
continue
# 完整的HTML模板
html = f'''
{keyword} · 公众号原创爆款文章
{keyword} · 原创爆款文章
每日推送公众号原创爆款内容,解析流量密码
'''
return html
def main():
parser = argparse.ArgumentParser(description="生成公众号原创爆款文章HTML")
parser.add_argument("--temp_file", default="temp_articles.json", help="临时JSON文件路径")
parser.add_argument("--output", default="ranking.html", help="输出文件路径")
parser.add_argument("--display_count", type=int, default=None, help="要展示的文章数量(如果不指定,则展示所有文章)")
args = parser.parse_args()
try:
# 从临时JSON文件读取数据
try:
with open(args.temp_file, "r", encoding="utf-8") as f:
temp_data = json.load(f)
keyword = temp_data.get("keyword", "热门文章")
articles = temp_data.get("articles", [])
if not isinstance(articles, list):
articles = []
except FileNotFoundError:
print(f"错误: 临时文件不存在 - {args.temp_file}", file=sys.stderr)
return 1
except json.JSONDecodeError:
print(f"错误: 临时文件格式错误 - {args.temp_file}", file=sys.stderr)
return 1
except Exception as e:
print(f"错误: 读取临时文件失败 - {str(e)}", file=sys.stderr)
return 1
if not articles:
print(f"错误: 临时文件中没有文章数据", file=sys.stderr)
return 1
# 根据display_count参数确定要展示的文章数量
display_count = args.display_count if args.display_count is not None else len(articles)
display_articles = articles[:display_count]
print(f"✅ 从临时文件读取到 {len(articles)} 条文章数据,本次展示 {len(display_articles)} 条")
# 生成HTML(使用display_articles而不是articles)
try:
html_content = generate_html(keyword, display_articles, None, len(display_articles))
except Exception as e:
print(f"错误: 生成HTML失败 - {str(e)}", file=sys.stderr)
return 1
# 写入文件
try:
with open(args.output, "w", encoding="utf-8") as f:
f.write(html_content)
except Exception as e:
print(f"错误: 写入文件失败 - {str(e)}", file=sys.stderr)
return 1
print(f"✅ HTML文件已生成: {args.output}")
return 0
except Exception as e:
print(f"错误: {str(e)}", file=sys.stderr)
return 1
if __name__ == "__main__":
import sys
sys.exit(main())