#!/usr/bin/env python3 # -*- coding: utf-8 -*- """公众号综合实力榜HTML生成器 从API获取公众号综合实力榜单数据,生成可独立打开的HTML页面。 用法:python gen_gzh_html.py [--rank_type day|week|month] [--rank_date YYYY-MM-DD] [--category 分类名] [--top N] [--output PATH] 输出:gzh_growth.html(与脚本同目录) 样式特性: - 公众号风格(绿色主题 #07c160) - 卡片式布局 - 公众号头像+名称+数据指标 - TOP3 奖牌徽章 + 左边框高亮 - 导出 PDF/图片功能 - 页面最大宽度 750px """ import json import sys import os import socket import ssl from datetime import datetime, timedelta from urllib.parse import quote # ===== 常量 ===== API_HOST = "onetotenvip.com" API_PATH = "/story/cozeSkill/getGzhCozeSkillDataIndex" SOURCE = "公众号综合实力账号榜-ClawHub" CATEGORIES = [ "总排名", "乐活生活", "人文资讯", "企业品牌", "体育娱乐", "健康养生", "创投商业", "学术研究", "情感心理", "房产楼市", "搞笑幽默", "教育考试", "文摘精选", "旅游出行", "时尚潮流", "民生资讯", "汽车交通", "知识百科", "科技数码", "美容美体", "美食餐饮", "职场发展", "财富理财" ] CATEGORY_KEYWORDS = { "总排名": ["总排名", "综合", "全部", "热门", "推荐", "随便", "总榜", "整体"], "乐活生活": ["乐活", "生活", "日常", "生活方式", "生活日常", "好物推荐"], "人文资讯": ["人文", "资讯", "文化", "历史", "哲学", "人文社科"], "企业品牌": ["企业", "品牌", "公司", "商业品牌", "品牌营销"], "体育娱乐": ["体育", "娱乐", "运动", "健身", "篮球", "足球", "综艺", "明星"], "健康养生": ["健康", "养生", "保健", "中医", "调理", "减肥", "瘦身"], "创投商业": ["创投", "商业", "投资", "创业", "融资", "商业模式"], "学术研究": ["学术", "研究", "论文", "科研", "学报"], "情感心理": ["情感", "心理", "恋爱", "婚姻", "情绪", "心理咨询"], "房产楼市": ["房产", "楼市", "买房", "房价", "地产", "租房", "装修"], "搞笑幽默": ["搞笑", "幽默", "段子", "吐槽", "沙雕", "表情包"], "教育考试": ["教育", "考试", "培训", "考研", "考公", "留学", "英语", "学习"], "文摘精选": ["文摘", "精选", "美文", "散文", "故事", "文章精选"], "旅游出行": ["旅游", "出行", "旅行", "攻略", "景点", "酒店", "度假"], "时尚潮流": ["时尚", "潮流", "穿搭", "服饰", "OOTD", "ootd", "搭配"], "民生资讯": ["民生", "社会", "热点", "时事", "政策", "民生新闻"], "汽车交通": ["汽车", "交通", "车", "新能源", "电动车", "买车"], "知识百科": ["知识", "百科", "科普", "常识", "冷知识"], "科技数码": ["科技", "数码", "手机", "电脑", "智能", "AI", "互联网", "软件", "硬件"], "美容美体": ["美容", "美体", "护肤", "化妆", "美妆", "彩妆"], "美食餐饮": ["美食", "餐饮", "做饭", "烹饪", "餐厅", "探店", "食谱"], "职场发展": ["职场", "工作", "求职", "面试", "跳槽", "升职", "加薪", "简历"], "财富理财": ["财富", "理财", "投资", "基金", "股票", "保险", "财务", "赚钱"], } # 榜单更新时间规则 DAY_UPDATE_HOUR = 17 DAY_UPDATE_MINUTE = 30 MONTH_UPDATE_DAY = 3 MONTH_UPDATE_HOUR = 23 MONTH_UPDATE_MINUTE = 0 DAY_MAX_DAYS_BACK = 7 WEEK_MAX_WEEKS_BACK = 3 MONTH_MAX_MONTHS_BACK = 3 # ===== 日期计算 ===== def _is_after_day_update(now=None): """判断当前时间是否已过日榜/周榜更新时间(17:30)""" if now is None: now = datetime.now() cutoff = now.replace(hour=DAY_UPDATE_HOUR, minute=DAY_UPDATE_MINUTE, second=0, microsecond=0) return now >= cutoff def _is_after_month_update(now=None): """判断当前时间是否已过月榜更新时间(当月3号23:00)""" if now is None: now = datetime.now() try: cutoff = now.replace(day=MONTH_UPDATE_DAY, hour=MONTH_UPDATE_HOUR, minute=MONTH_UPDATE_MINUTE, second=0, microsecond=0) except ValueError: cutoff = now.replace(day=MONTH_UPDATE_DAY, hour=MONTH_UPDATE_HOUR, minute=MONTH_UPDATE_MINUTE, second=0, microsecond=0) return now >= cutoff def get_latest_query_date(rank_type="day"): """获取最新可查询的榜单日期""" now = datetime.now() if rank_type == "day": if _is_after_day_update(now): target = now - timedelta(days=1) else: target = now - timedelta(days=2) return target.strftime("%Y-%m-%d") elif rank_type == "week": weekday = now.weekday() this_monday = now - timedelta(days=weekday) if _is_after_day_update(now): return this_monday.strftime("%Y-%m-%d") else: last_monday = this_monday - timedelta(weeks=1) return last_monday.strftime("%Y-%m-%d") elif rank_type == "month": # 月榜: 每月3号23:00更新上月数据 # rankDate传上月1号 = 上月数据 # 3号23:00后 → 上月1号(=上月数据已更新); 3号23:00前 → 上上月1号 if _is_after_month_update(now): if now.month == 1: return datetime(now.year - 1, 12, 1).strftime("%Y-%m-%d") else: return datetime(now.year, now.month - 1, 1).strftime("%Y-%m-%d") else: if now.month == 1: return datetime(now.year - 1, 11, 1).strftime("%Y-%m-%d") elif now.month == 2: return datetime(now.year - 1, 12, 1).strftime("%Y-%m-%d") else: return datetime(now.year, now.month - 2, 1).strftime("%Y-%m-%d") return now.strftime("%Y-%m-%d") def get_earliest_query_date(rank_type="day"): """获取可查询的最早日期""" now = datetime.now() if rank_type == "day": earliest = now - timedelta(days=DAY_MAX_DAYS_BACK) return earliest.strftime("%Y-%m-%d") elif rank_type == "week": weekday = now.weekday() this_monday = now - timedelta(days=weekday) earliest_monday = this_monday - timedelta(weeks=WEEK_MAX_WEEKS_BACK) return earliest_monday.strftime("%Y-%m-%d") elif rank_type == "month": m = now.month y = now.year m -= MONTH_MAX_MONTHS_BACK while m <= 0: m += 12 y -= 1 return datetime(y, m, 1).strftime("%Y-%m-%d") return now.strftime("%Y-%m-%d") def validate_and_adjust_date(rank_type, user_date_str): """验证用户指定日期是否在可查询范围""" user_date = datetime.strptime(user_date_str, "%Y-%m-%d") latest = datetime.strptime(get_latest_query_date(rank_type), "%Y-%m-%d") earliest = datetime.strptime(get_earliest_query_date(rank_type), "%Y-%m-%d") if earliest <= user_date <= latest: return { "original_date": user_date_str, "adjusted_date": user_date_str, "is_adjusted": False, "reminder": "" } if user_date > latest: adjusted = latest.strftime("%Y-%m-%d") else: adjusted = earliest.strftime("%Y-%m-%d") rank_label = {"day": "日榜", "week": "周榜", "month": "月榜"}.get(rank_type, "日榜") reminder = ( '非常抱歉🙏,目前公众号榜单最多支持回溯「近7天的日榜/近3周的周榜/近3个月的月榜」,' '我将为您查询最接近您需求时间的{}数据⭐~' ).format(rank_label) return { "original_date": user_date_str, "adjusted_date": adjusted, "is_adjusted": True, "reminder": reminder } def get_query_date(rank_type="day", user_date=None): """根据榜单类型和当前时间确定查询日期 Args: rank_type: day/week/month user_date: 用户指定日期 Returns: (查询日期字符串, 是否自动推断, 提醒消息) """ if user_date: validation = validate_and_adjust_date(rank_type, user_date) return validation["adjusted_date"], False, validation["reminder"] rank_date = get_latest_query_date(rank_type) # 检查今日日榜是否已更新 reminder = "" if rank_type == "day" and not _is_after_day_update(): yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") reminder = ( '日榜数据暂未更新,将为您查询最接近您需求日期的榜单数据⭐~\n' f'推荐查询昨日更新的最新榜单({yesterday}日榜)' ) now = datetime.now() if rank_type == "day": if _is_after_day_update(now): pass # 已过17:30,查询昨日数据 else: pass # 未过17:30,查询前天数据 elif rank_type == "week": if _is_after_day_update(now): pass else: pass elif rank_type == "month": if _is_after_month_update(now): pass else: pass return rank_date, True, reminder # ===== 分类匹配 ===== def match_category(user_input): """根据用户输入匹配分类""" if not user_input: return "人文资讯" if user_input in CATEGORIES: return user_input user_lower = user_input.lower().strip() for category, keywords in CATEGORY_KEYWORDS.items(): for keyword in keywords: if keyword.lower() in user_lower: return category return "人文资讯" # ===== HTTP请求 ===== def fetch_gzh_growth(rank_type="day", rank_date=None, category="人文资讯"): """获取公众号综合实力榜单数据""" if not rank_date: rank_date = get_latest_query_date(rank_type) source = quote(SOURCE) category_encoded = quote(category) path = f"{API_PATH}?rankType={rank_type}&rankDate={rank_date}&category={category_encoded}&source={source}" request = f"GET {path} HTTP/1.1\r\nHost: {API_HOST}\r\nAccept: application/json\r\nConnection: close\r\n\r\n" sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(30) context = ssl.create_default_context() context.check_hostname = False context.verify_mode = ssl.CERT_NONE ssl_sock = context.wrap_socket(sock) try: ssl_sock.connect((API_HOST, 443)) ssl_sock.sendall(request.encode("utf-8")) response = b"" while True: data = ssl_sock.recv(8192) if not data: break response += data finally: ssl_sock.close() resp_text = response.decode("utf-8", errors="ignore") if "\r\n\r\n" not in resp_text: return { "fetch_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "rank_type": rank_type, "rank_date": rank_date, "category": category, "account_list": [] } headers, body = resp_text.split("\r\n\r\n", 1) status_line = headers.split("\r\n")[0] status_code = int(status_line.split(" ")[1]) if status_code != 200: return { "fetch_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "rank_type": rank_type, "rank_date": rank_date, "category": category, "account_list": [] } api_response = json.loads(body) if isinstance(api_response, dict): data = api_response.get("data", []) elif isinstance(api_response, list): data = api_response else: data = [] if not data: return { "fetch_time": _get_update_time_label(rank_type, rank_date), "rank_type": rank_type, "rank_date": rank_date, "category": category, "time_range": _get_data_time_range(rank_type, rank_date), "account_list": [] } # 保持接口原始顺序,禁止重新排序 # 综合评分直接采用接口返回的comprehensiveScore字段 account_list = [] for idx, item in enumerate(data): account_list.append({ "index": idx + 1, "rankPosition": item.get("rankPosition", idx + 1), "accountName": item.get("accountName", ""), "accountId": item.get("accountId", ""), "accountAvatar": item.get("accountAvatar", ""), "category": item.get("category", ""), "compositeScore": round(float(item.get("comprehensiveScore", 0)), 1) if item.get("comprehensiveScore") else 0, "totalReadCount": item.get("totalReadCount", 0), "headlineReadCount": item.get("headlineReadCount", 0), "maxReadCount": item.get("maxReadCount", 0), "totalLikeCount": item.get("totalLikeCount", 0), "totalForwardCount": item.get("totalForwardCount", 0), "totalInSeeCount": item.get("totalInSeeCount", 0), "publishCount": item.get("publishCount", "-"), }) return { "fetch_time": _get_update_time_label(rank_type, rank_date), "rank_type": rank_type, "rank_date": rank_date, "category": category, "time_range": _get_data_time_range(rank_type, rank_date), "account_list": account_list } def _get_data_time_range(rank_type, rank_date): """根据榜单类型和查询日期生成数据统计时间周期描述""" import calendar date_obj = datetime.strptime(rank_date, "%Y-%m-%d") if rank_type == "day": return f"{rank_date}" elif rank_type == "week": end_date = date_obj + timedelta(days=6) return f"{date_obj.strftime('%Y-%m-%d')}至{end_date.strftime('%Y-%m-%d')}" elif rank_type == "month": last_day = calendar.monthrange(date_obj.year, date_obj.month)[1] return f"{date_obj.strftime('%Y-%m')}-01至{date_obj.strftime('%Y-%m')}-{last_day:02d}" return rank_date def _get_update_time_label(rank_type, rank_date): """获取榜单更新时间标签""" if rank_type == "day": d = datetime.strptime(rank_date, "%Y-%m-%d") update_time = d + timedelta(days=1) return f"{update_time.strftime('%Y-%m-%d')} 17:30" elif rank_type == "week": d = datetime.strptime(rank_date, "%Y-%m-%d") next_monday = d + timedelta(weeks=1) return f"{next_monday.strftime('%Y-%m-%d')} 17:30" elif rank_type == "month": d = datetime.strptime(rank_date, "%Y-%m-%d") if d.month == 12: update_day = datetime(d.year + 1, 1, MONTH_UPDATE_DAY) else: update_day = datetime(d.year, d.month + 1, MONTH_UPDATE_DAY) return f"{update_day.strftime('%Y-%m-%d')} 23:00" return rank_date # ===== HTML生成 ===== def generate_html(result, top_n=50): """生成HTML页面 - 公众号综合实力榜 Args: result: 榜单数据结果 top_n: 显示条数,默认50 """ account_list = result["account_list"] fetch_time = result["fetch_time"] rank_date = result.get("rank_date", "") category = result.get("category", "人文资讯") rank_type = result.get("rank_type", "day") rank_type_label = {"day": "日榜", "week": "周榜", "month": "月榜"}.get(rank_type, "日榜") time_range = result.get("time_range", _get_data_time_range(rank_type, rank_date)) top_n = min(top_n, len(account_list), 100) account_list = account_list[:top_n] js_data = json.dumps(account_list, ensure_ascii=False, indent=2) page_title = f"公众号综合实力{rank_type_label} - {category}" html = f''' {page_title}

公众号综合实力{rank_type_label}{category}

更新时间:{fetch_time}
*公众号综合实力{rank_type_label},基于阅读、点赞、转发、在看等多维数据综合排名*
*数据统计时间周期:{time_range}*
--
公众号数
--
最高总阅读
--
平均总阅读
''' # Replace js_data placeholder html = html.replace("{js_data}", js_data) return html if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='生成公众号综合实力榜HTML页面') parser.add_argument('--rank_type', type=str, default='day', choices=['day', 'week', 'month'], help='榜单类型') parser.add_argument('--rank_date', type=str, help='查询日期,格式 YYYY-MM-DD') parser.add_argument('--category', type=str, default=None, help='分类名称') parser.add_argument('--keyword', type=str, help='用户输入的关键词,用于自动匹配分类') parser.add_argument('--output', type=str, help='输出文件路径') parser.add_argument('--top', type=int, default=50, help='显示条数,默认50') args = parser.parse_args() # 处理分类 if args.keyword and not args.category: category = match_category(args.keyword) print(f"根据关键词【{args.keyword}】匹配到分类:【{category}】", file=sys.stderr) elif args.category: category = args.category else: category = "人文资讯" # 处理日期 rank_date, is_auto, reminder = get_query_date(args.rank_type, args.rank_date) rank_type_label = {"day": "日榜", "week": "周榜", "month": "月榜"}.get(args.rank_type, "日榜") print(f"正在获取公众号综合实力{rank_type_label}数据...", file=sys.stderr) print(f"当前时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", file=sys.stderr) if reminder: print(f"提醒: {reminder}", file=sys.stderr) print(f"榜单类型: {rank_type_label}", file=sys.stderr) print(f"查询日期: {rank_date}", file=sys.stderr) print(f"分类:{category}", file=sys.stderr) result = fetch_gzh_growth(rank_type=args.rank_type, rank_date=rank_date, category=category) html = generate_html(result, top_n=args.top) if args.output: output_path = args.output else: rank_short = {"day": "日", "week": "周", "month": "月"}.get(args.rank_type, "日") timestamp_str = datetime.now().strftime("%Y%m%d")[-8:] + f"{datetime.now().strftime('%H%M%S')}" filename = f"公众号综合实力{rank_short}_{category}_{timestamp_str}.html" output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) with open(output_path, 'w', encoding='utf-8') as f: f.write(html) print(f"已生成:{output_path}", file=sys.stderr) print(f"共 {len(result['account_list'])} 条{rank_type_label}数据,展示TOP{args.top}", file=sys.stderr)