"""Match restaurants across Dianping and Xiaohongshu platforms."""

import math
import re
from typing import List, Tuple, Dict
from dataclasses import dataclass
from thefuzz import fuzz

from fetch_dianping import DianpingRestaurant
from fetch_xiaohongshu import XiaohongshuPost


@dataclass
class MatchedRestaurant:
    """Restaurant matched across both platforms."""
    name: str
    dianping_data: DianpingRestaurant
    xhs_data: XiaohongshuPost
    similarity_score: float  # 0-1, how confident the match is
    consistency_score: float = 0.0  # 显式声明，避免 hasattr 检查


class RestaurantMatcher:
    """Match restaurants from different platforms using fuzzy matching."""

    # 连锁店常见后缀，匹配时可忽略
    CHAIN_SUFFIXES = re.compile(
        r'[（(].{0,10}[)）]|'
        r'(静安|徐汇|浦东|朝阳|海淀|南山|福田|天河|武侯|锦江)'
        r'(店|分店|旗舰店|总店)?$'
    )

    def __init__(self, similarity_threshold: float = 0.6):
        self.similarity_threshold = similarity_threshold

    def match(
        self,
        dianping_restaurants: List[DianpingRestaurant],
        xhs_posts: List[XiaohongshuPost]
    ) -> List[MatchedRestaurant]:
        """
        Match Dianping restaurants with Xiaohongshu posts.

        Args:
            dianping_restaurants: List from Dianping
            xhs_posts: List from Xiaohongshu

        Returns:
            List of matched restaurants with confidence scores
        """
        matches = []
        used_xhs_indices = set()

        for dp_restaurant in dianping_restaurants:
            best_match_idx = None
            best_score = 0

            for idx, xhs_post in enumerate(xhs_posts):
                if idx in used_xhs_indices:
                    continue

                # Calculate similarity score
                score = self._calculate_similarity(dp_restaurant, xhs_post)

                if score > best_score and score >= self.similarity_threshold:
                    best_score = score
                    best_match_idx = idx

            if best_match_idx is not None:
                matches.append(MatchedRestaurant(
                    name=dp_restaurant.name,
                    dianping_data=dp_restaurant,
                    xhs_data=xhs_posts[best_match_idx],
                    similarity_score=best_score
                ))
                used_xhs_indices.add(best_match_idx)

        return matches

    def _normalize_name(self, name: str) -> str:
        """标准化餐厅名：去除分店后缀、空格、特殊符号。"""
        name = name.strip()
        name = self.CHAIN_SUFFIXES.sub('', name)
        name = re.sub(r'[\s·・\-—]+', '', name)
        return name

    def _calculate_similarity(
        self,
        dp_restaurant: DianpingRestaurant,
        xhs_post: XiaohongshuPost
    ) -> float:
        """
        Calculate similarity score between Dianping restaurant and XHS post.
        Uses multi-strategy matching for robustness.

        Returns score between 0-1.
        """
        dp_name = dp_restaurant.name
        xhs_name = xhs_post.restaurant_name

        # 标准化名称
        dp_norm = self._normalize_name(dp_name)
        xhs_norm = self._normalize_name(xhs_name)

        if not dp_norm or not xhs_norm:
            return 0.0

        # 策略1: 完全匹配（标准化后）
        if dp_norm == xhs_norm:
            return 1.0

        # 策略2: 精确比率 — 整体字符串相似度
        exact_score = fuzz.ratio(dp_norm, xhs_norm) / 100

        # 策略3: 部分匹配 — 一个名字包含另一个
        partial_score = fuzz.partial_ratio(dp_norm, xhs_norm) / 100

        # 策略4: Token排序 — 处理词序差异
        token_score = fuzz.token_sort_ratio(dp_norm, xhs_norm) / 100

        # 策略5: 包含关系 — 一个名字是另一个的子串
        containment_score = 0.0
        if dp_norm in xhs_norm or xhs_norm in dp_norm:
            shorter = min(len(dp_norm), len(xhs_norm))
            longer = max(len(dp_norm), len(xhs_norm))
            containment_score = shorter / longer if longer > 0 else 0.0

        # 取最优策略的结果
        final_score = max(
            exact_score,
            partial_score * 0.90,   # 部分匹配略降权
            token_score * 0.85,     # token匹配再降权
            containment_score * 0.88
        )

        return final_score


def normalize_engagement(xhs_post: XiaohongshuPost, all_posts: List = None) -> float:
    """
    Normalize Xiaohongshu engagement to a 0-5 rating scale.
    Uses log normalization to handle extreme values gracefully.

    Args:
        xhs_post: Post with engagement metrics
        all_posts: Optional list of all posts for dynamic normalization

    Returns:
        Normalized rating (0-5)
    """
    # Weight different engagement metrics
    engagement_score = (
        (xhs_post.likes * 1.0) +
        (xhs_post.saves * 2.0) +  # Saves are worth more
        (xhs_post.comments * 1.5)
    )

    if all_posts and len(all_posts) > 1:
        # 动态归一化：基于当前批次数据
        all_scores = [
            p.likes * 1.0 + p.saves * 2.0 + p.comments * 1.5
            for p in all_posts
        ]
        max_score = max(all_scores) if all_scores else 1
        if max_score > 0:
            normalized = (engagement_score / max_score) * 5
        else:
            normalized = 0
    else:
        # 对数归一化：避免极端值影响，适应不同量级
        # log1p(5000) ≈ 8.52，作为"满分"参考点
        if engagement_score <= 0:
            normalized = 0
        else:
            normalized = math.log1p(engagement_score) / math.log1p(5000) * 5

    # Clamp to 0-5 range
    return max(0.0, min(5.0, normalized))


def calculate_consistency(
    dp_rating: float,
    xhs_engagement_normalized: float,
    xhs_sentiment: float
) -> float:
    """
    Calculate consistency score between platforms.

    Args:
        dp_rating: Dianping rating (0-5)
        xhs_engagement_normalized: XHS engagement normalized to 0-5
        xhs_sentiment: XHS sentiment score (-1 to 1)

    Returns:
        Consistency score (0-1)
    """
    # 输入安全 clamp
    dp_rating = max(0.0, min(5.0, dp_rating))
    xhs_engagement_normalized = max(0.0, min(5.0, xhs_engagement_normalized))
    xhs_sentiment = max(-1.0, min(1.0, xhs_sentiment))

    # Rating correlation (0-1)
    rating_diff = abs(dp_rating - xhs_engagement_normalized)
    rating_correlation = max(0.0, 1.0 - (rating_diff / 2.5))  # 更宽容的差值容忍度

    # Sentiment alignment (convert -1 to 1 range to 0 to 1)
    sentiment_normalized = (xhs_sentiment + 1) / 2  # -1 to 1 -> 0 to 1
    sentiment_alignment = sentiment_normalized

    # Combine metrics
    consistency = (rating_correlation * 0.6) + (sentiment_alignment * 0.4)

    # 最终 clamp
    return max(0.0, min(1.0, consistency))


def match_and_score(
    dianping_restaurants: List[DianpingRestaurant],
    xhs_posts: List[XiaohongshuPost],
    config: Dict
) -> List[MatchedRestaurant]:
    """
    Match restaurants and calculate scores.

    Returns list sorted by recommendation score.
    """
    # Match restaurants
    matcher = RestaurantMatcher(
        similarity_threshold=config.get('similarity_threshold', 0.6)
    )
    matches = matcher.match(dianping_restaurants, xhs_posts)

    # Calculate consistency for each match
    for match in matches:
        xhs_engagement_norm = normalize_engagement(match.xhs_data)
        consistency = calculate_consistency(
            match.dianping_data.rating,
            xhs_engagement_norm,
            match.xhs_data.sentiment_score
        )
        # Store consistency for later use
        match.consistency_score = consistency

    return matches