#!/usr/bin/env python3
"""
Tier configuration and usage limits for PDF Field Extractor.
Token prefixes: PDF-FREE / PDF-BSC / PDF-STD / PDF-PRO / PDF-ENT
"""

from dataclasses import dataclass
from typing import List, Optional


# ─── Tier Limits ───────────────────────────────────────────────────────────────
TIER_LIMITS = {
    "PDF-FREE": {
        "pages_per_month": 10,
        "doc_types": ["invoice"],
        "output_formats": ["text"],
        "batch_size": 1,
        "ocr_languages": ["eng"],
        "custom_fields": False,
        "api_access": False,
    },
    "PDF-BSC": {
        "pages_per_month": 200,
        "doc_types": ["invoice", "receipt", "license", "id_card"],
        "output_formats": ["excel", "text"],
        "batch_size": 10,
        "ocr_languages": ["eng", "chi_sim"],
        "custom_fields": False,
        "api_access": False,
    },
    "PDF-STD": {
        "pages_per_month": 1000,
        "doc_types": ["invoice", "receipt", "license", "id_card", "contract", "bank_statement", "express", "generic"],
        "output_formats": ["excel", "json", "text"],
        "batch_size": 50,
        "ocr_languages": ["eng", "chi_sim", "chi_tra", "jpn", "kor"],
        "custom_fields": True,
        "api_access": False,
    },
    "PDF-PRO": {
        "pages_per_month": float("inf"),
        "doc_types": ["invoice", "receipt", "license", "id_card", "contract", "bank_statement", "express", "generic"],
        "output_formats": ["excel", "json", "text"],
        "batch_size": float("inf"),
        "ocr_languages": ["eng", "chi_sim", "chi_tra", "jpn", "kor"],
        "custom_fields": True,
        "api_access": True,
    },
    "PDF-ENT": {
        "pages_per_month": float("inf"),
        "doc_types": ["invoice", "receipt", "license", "id_card", "contract", "bank_statement", "express", "generic"],
        "output_formats": ["excel", "json", "text"],
        "batch_size": float("inf"),
        "ocr_languages": ["eng", "chi_sim", "chi_tra", "jpn", "kor", "fra", "deu", "spa", "por", "rus"],
        "custom_fields": True,
        "api_access": True,
    },
}

# ─── Document Type Mapping ──────────────────────────────────────────────────────
DOC_TYPE_ALIASES = {
    "invoice": ["发票", "invoice", "增值税发票", "普通发票", "电子发票"],
    "contract": ["合同", "contract", "协议书", "agreement"],
    "receipt": ["收据", "receipt", "小票", "凭据"],
    "bank_statement": ["银行对账单", "bank_statement", "银行流水", "对账单"],
    "license": ["营业执照", "license", "经营许可证"],
    "id_card": ["身份证", "id_card", "护照", "passport", "证件"],
    "express": ["快递单", "express", "物流单", "运单", "shipping"],
    "generic": ["通用", "generic", "其他", "文档", "document"],
}

DOC_TYPE_FIELDS = {
    "invoice": ["发票号", "日期", "金额", "买方", "卖方", "商品明细", "税率", "发票代码", "备注"],
    "contract": ["合同号", "签订日期", "到期日期", "金额", "甲方", "乙方", "地址", "联系人", "违约条款", "解除条款", "付款条件"],
    "receipt": ["日期", "金额", "收款方", "消费内容", "明细项目", "小费"],
    "bank_statement": ["日期", "交易金额", "对方账户", "余额", "交易类型", "摘要"],
    "license": ["统一社会信用代码", "公司名称", "法人", "注册资本", "注册地址", "经营范围"],
    "id_card": ["姓名", "性别", "出生日期", "国籍", "证件号码", "有效期"],
    "express": ["运单号", "发件人", "收件人", "地址", "重量", "运费"],
    "generic": [],  # 用户自定义
}


@dataclass
class TierConfig:
    """Tier configuration for PDF Field Extractor."""
    tier: str = "PDF-FREE"

    def get_limits(self) -> dict:
        """Return the limits for the current tier."""
        return TIER_LIMITS.get(self.tier, TIER_LIMITS["PDF-FREE"])

    def check_limits(
        self,
        pages: int = 0,
        doc_type: Optional[str] = None,
        output_format: Optional[str] = None,
        batch_size: Optional[int] = None,
        use_custom_fields: bool = False,
    ) -> None:
        """
        Check if the requested usage exceeds tier limits.
        Raises ValueError if any limit is exceeded.
        """
        limits = self.get_limits()

        # Check page limit
        if pages > limits["pages_per_month"]:
            raise ValueError(
                f"Page limit exceeded: {pages} pages requested, "
                f"limit is {limits['pages_per_month']} pages/month for {self.tier}"
            )

        # Check document type
        if doc_type is not None and doc_type not in limits["doc_types"]:
            raise ValueError(
                f"Document type '{doc_type}' not supported in {self.tier}. "
                f"Supported types: {limits['doc_types']}"
            )

        # Check output format
        if output_format is not None and output_format not in limits["output_formats"]:
            raise ValueError(
                f"Output format '{output_format}' not supported in {self.tier}. "
                f"Supported formats: {limits['output_formats']}"
            )

        # Check batch size
        if batch_size is not None and batch_size > limits["batch_size"]:
            raise ValueError(
                f"Batch size {batch_size} exceeds limit of {limits['batch_size']} "
                f"for {self.tier}"
            )

        # Check custom fields
        if use_custom_fields and not limits["custom_fields"]:
            raise ValueError(
                f"Custom fields not supported in {self.tier}. "
                f"Upgrade to Standard or higher."
            )

    def supports_doc_type(self, doc_type: str) -> bool:
        """Check if this tier supports the given document type."""
        return doc_type in self.get_limits()["doc_types"]

    def supports_format(self, fmt: str) -> bool:
        """Check if this tier supports the given output format."""
        return fmt in self.get_limits()["output_formats"]

    def get_ocr_languages(self) -> List[str]:
        """Get the list of OCR languages supported by this tier."""
        return self.get_limits()["ocr_languages"]


def get_default_fields_for_doc_type(doc_type: str) -> List[str]:
    """Return the default extraction fields for a document type."""
    return DOC_TYPE_FIELDS.get(doc_type, [])


def resolve_doc_type(doc_type_input: str) -> str:
    """Resolve a user-input doc type string to a canonical type."""
    doc_type_input_lower = doc_type_input.lower().strip()
    for canonical, aliases in DOC_TYPE_ALIASES.items():
        if doc_type_input_lower in [a.lower() for a in aliases] or doc_type_input_lower == canonical:
            return canonical
    return "generic"