"""Fetch restaurant data from Xiaohongshu (小红书)."""

import time
import random
from typing import List, Dict, Optional
from dataclasses import dataclass
import requests
from bs4 import BeautifulSoup


@dataclass
class XiaohongshuPost:
    """Post data from Xiaohongshu."""
    restaurant_name: str
    likes: int
    saves: int
    comments: int
    sentiment_score: float  # -1.0 to 1.0 (negative to positive)
    keywords: List[str]
    url: str


class XiaohongshuFetcher:
    """Fetch restaurant data from Xiaohongshu."""

    def __init__(self, config: Dict):
        self.config = config
        self.base_url = "https://www.xiaohongshu.com"
        self.session = requests.Session()
        self._setup_headers()

    def _setup_headers(self):
        """Setup request headers to mimic browser."""
        user_agents = [
            'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15',
            'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36',
        ]
        self.session.headers.update({
            'User-Agent': random.choice(user_agents),
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Referer': 'https://www.xiaohongshu.com/',
        })

    def search(self, location: str, cuisine: str, min_notes: int = 20) -> List[XiaohongshuPost]:
        """
        Search for restaurant posts by location and cuisine.

        Args:
            location: Geographic area (e.g., "上海静安区")
            cuisine: Cuisine type (e.g., "日式料理")
            min_notes: Minimum number of notes/posts to include

        Returns:
            List of XiaohongshuPost objects
        """
        # Note: This is a simplified implementation
        # Actual implementation needs to handle:
        # - Cookie authentication (required for most searches)
        # - Anti-scraping measures (very strict)
        # - Dynamic content (JavaScript rendering)
        # - API endpoint discovery
        # - Pagination

        search_query = f"{location} {cuisine}"
        print(f"🔍 Searching Xiaohongshu for: {search_query}")

        # Simulated data for demonstration
        # In production, this would scrape actual Xiaohongshu pages
        posts = self._fetch_mock_data(location, cuisine)

        # Aggregate posts by restaurant name
        aggregated = self._aggregate_by_restaurant(posts)

        # Filter by minimum notes
        filtered = {name: data for name, data in aggregated.items()
                   if len(data['posts']) >= min_notes}

        # Convert to list of aggregated posts
        result = []
        for name, data in filtered.items():
            avg_post = self._aggregate_post_data(data['posts'])
            avg_post.restaurant_name = name
            result.append(avg_post)

        # Rate limiting
        time.sleep(self.config.get('xhs_delay', 3))

        return result

    def _aggregate_by_restaurant(self, posts: List[XiaohongshuPost]) -> Dict[str, Dict]:
        """Group posts by restaurant name."""
        aggregated = {}
        for post in posts:
            if post.restaurant_name not in aggregated:
                aggregated[post.restaurant_name] = {'posts': []}
            aggregated[post.restaurant_name]['posts'].append(post)
        return aggregated

    def _aggregate_post_data(self, posts: List[XiaohongshuPost]) -> XiaohongshuPost:
        """Calculate average metrics from multiple posts."""
        if not posts:
            return XiaohongshuPost(
                restaurant_name="",
                likes=0, saves=0, comments=0,
                sentiment_score=0.0, keywords=[], url=""
            )

        total_posts = len(posts)
        avg_likes = sum(p.likes for p in posts) // total_posts
        avg_saves = sum(p.saves for p in posts) // total_posts
        avg_comments = sum(p.comments for p in posts) // total_posts
        avg_sentiment = sum(p.sentiment_score for p in posts) / total_posts

        # Combine keywords
        all_keywords = []
        for post in posts:
            all_keywords.extend(post.keywords)
        # Get top keywords
        keyword_counts = {}
        for kw in all_keywords:
            keyword_counts[kw] = keyword_counts.get(kw, 0) + 1
        top_keywords = sorted(keyword_counts.items(), key=lambda x: x[1], reverse=True)[:5]
        top_keywords_list = [kw for kw, count in top_keywords]

        return XiaohongshuPost(
            restaurant_name="",  # Will be set by caller
            likes=avg_likes,
            saves=avg_saves,
            comments=avg_comments,
            sentiment_score=avg_sentiment,
            keywords=top_keywords_list,
            url=posts[0].url  # First post URL
        )

    def _fetch_mock_data(self, location: str, cuisine: str) -> List[XiaohongshuPost]:
        """Generate mock data for testing (replace with actual scraping)."""
        import random

        mock_data = []

        # Restaurant A - Generate 25 mock posts
        for i in range(25):
            mock_data.append(XiaohongshuPost(
                restaurant_name=f"{cuisine}店A",
                likes=random.randint(200, 500),
                saves=random.randint(50, 150),
                comments=random.randint(20, 80),
                sentiment_score=random.uniform(0.6, 0.95),
                keywords=["好吃", "环境", "值得", "正宗", "新鲜"],
                url=f"{self.base_url}/explore/12345{i}"
            ))

        # Restaurant B - Generate 30 mock posts
        for i in range(30):
            mock_data.append(XiaohongshuPost(
                restaurant_name=f"{cuisine}店B",
                likes=random.randint(100, 300),
                saves=random.randint(30, 100),
                comments=random.randint(10, 50),
                sentiment_score=random.uniform(0.5, 0.85),
                keywords=["性价比", "分量", "实惠", "性价比高"],
                url=f"{self.base_url}/explore/67890{i}"
            ))

        return mock_data


def fetch_xiaohongshu(location: str, cuisine: str, config: Dict) -> List[XiaohongshuPost]:
    """Convenience function to fetch Xiaohongshu data."""
    fetcher = XiaohongshuFetcher(config)
    return fetcher.search(location, cuisine)