#!/usr/bin/env python3
"""Extract text from resume files (PDF, DOCX, TXT, MD)"""
import sys
import argparse
from pathlib import Path

def extract_pdf(file_path):
    """Extract text from PDF"""
    try:
        import pypdf
        with open(file_path, 'rb') as f:
            reader = pypdf.PdfReader(f)
            text = '\n\n'.join(page.extract_text() for page in reader.pages)
        return text
    except ImportError:
        print("Error: pypdf not installed. Run: pip install pypdf", file=sys.stderr)
        sys.exit(1)

def extract_docx(file_path):
    """Extract text from DOCX"""
    try:
        import docx
        doc = docx.Document(file_path)
        text = '\n\n'.join(para.text for para in doc.paragraphs if para.text.strip())
        return text
    except ImportError:
        print("Error: python-docx not installed. Run: pip install python-docx", file=sys.stderr)
        sys.exit(1)

def extract_text(file_path):
    """Extract text from TXT/MD"""
    with open(file_path, 'r', encoding='utf-8') as f:
        return f.read()

def main():
    parser = argparse.ArgumentParser(description='Extract text from resume files')
    parser.add_argument('input', help='Input file path')
    parser.add_argument('--output', '-o', help='Output file path (default: stdout)')
    args = parser.parse_args()

    file_path = Path(args.input)
    if not file_path.exists():
        print(f"Error: File not found: {file_path}", file=sys.stderr)
        sys.exit(1)

    ext = file_path.suffix.lower()

    if ext == '.pdf':
        text = extract_pdf(file_path)
    elif ext in ['.docx', '.doc']:
        text = extract_docx(file_path)
    elif ext in ['.txt', '.md']:
        text = extract_text(file_path)
    else:
        print(f"Error: Unsupported file type: {ext}", file=sys.stderr)
        sys.exit(1)

    if args.output:
        with open(args.output, 'w', encoding='utf-8') as f:
            f.write(text)
        print(f"Extracted to: {args.output}")
    else:
        print(text)

if __name__ == '__main__':
    main()