import os
import json
import fitz  # PyMuPDF
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
from typing import List, Dict, Any, Optional
import difflib
from database import settings_collection

# Create necessary directories
STATIC_DIR = os.path.join(os.path.dirname(__file__), "static")
TEMPLATES_DIR = os.path.join(os.path.dirname(__file__), "templates")
os.makedirs(STATIC_DIR, exist_ok=True)
os.makedirs(TEMPLATES_DIR, exist_ok=True)

class QueryRequest(BaseModel):
    query: str

class QueryResponse(BaseModel):
    answer: str
    intent: str = None
    confidence: float = None

# Initialize FastAPI with metadata
app = FastAPI(
    title="DLC Chatbot API",
    description="A chatbot API for ABU Distance Learning Centre",
    version="1.0.0"
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Mount static files directory
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")

# Initialize templates
templates = Jinja2Templates(directory=TEMPLATES_DIR)

# Initialize model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
qa_data = []

def load_qa_data():
    """Load Q&A data from JSON files"""
    global qa_data
    qa_data = []
    
    # Get current settings
    settings = settings_collection.find_one({})
    if not settings:
        from models import ChatbotSettings
        settings = ChatbotSettings().dict()
        settings_collection.insert_one(settings)
    
    documents_dir = os.path.join(os.path.dirname(__file__), 'documents')
    if not os.path.exists(documents_dir):
        print(f"Documents directory not found at {documents_dir}")
        return qa_data
    
    debug_mode = settings.get('debug_mode', False)
    
    if debug_mode:
        print(f"\nLoading Q&A data from {documents_dir}")
    
    for filename in os.listdir(documents_dir):
        if filename.endswith('.json'):
            file_path = os.path.join(documents_dir, filename)
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                    if debug_mode:
                        print(f"\nProcessing {filename}...")
                    
                    if isinstance(data, list):
                        # Handle list-based JSON files
                        for item in data:
                            if isinstance(item, dict) and 'intent' in item and 'questions' in item:
                                answers = item.get('answers', [item.get('answer', '')])
                                if not isinstance(answers, list):
                                    answers = [answers]
                                qa_data.extend([{
                                    'intent': item['intent'],
                                    'question': q,
                                    'answer': answers[0]
                                } for q in item['questions']])
                    elif isinstance(data, dict):
                        # Handle category-based JSON files
                        for category, items in data.items():
                            if isinstance(items, list):
                                for item in items:
                                    if isinstance(item, dict) and 'intent' in item and 'questions' in item:
                                        answers = item.get('answers', [item.get('answer', '')])
                                        if not isinstance(answers, list):
                                            answers = [answers]
                                        qa_data.extend([{
                                            'intent': item['intent'],
                                            'question': q,
                                            'answer': answers[0]
                                        } for q in item['questions']])
                    
                    if debug_mode:
                        print(f"Added Q&A pairs from {filename}")
            except Exception as e:
                print(f"Error loading {filename}: {str(e)}")
    
    if debug_mode:
        print(f"\nTotal Q&A pairs loaded: {len(qa_data)}")
        print("Sample intents:", list(set(qa['intent'] for qa in qa_data))[:5])
    
    return qa_data

def find_best_match(query: str, qa_data: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Find the best matching Q&A pair for a given query using cosine similarity"""
    if not qa_data:
        return None
    
    # Get current settings
    settings = settings_collection.find_one({})
    if not settings:
        from models import ChatbotSettings
        settings = ChatbotSettings().dict()
        settings_collection.insert_one(settings)
    
    similarity_threshold = settings.get('similarity_threshold', 0.6)
    debug_mode = settings.get('debug_mode', False)
    default_response = settings.get('default_response', "I'm sorry, I couldn't find a good match for your question. Could you please rephrase it?")
    
    # Encode the query
    query_embedding = model.encode([query], convert_to_tensor=True, normalize_embeddings=True)
    
    # Get all questions
    questions = [item['question'] for item in qa_data]
    
    # Encode all questions with normalization for cosine similarity
    question_embeddings = model.encode(questions, convert_to_tensor=True, normalize_embeddings=True)
    
    # Calculate cosine similarities (since vectors are normalized, dot product = cosine similarity)
    similarities = np.dot(query_embedding, question_embeddings.T)[0]
    
    # Find the best match
    best_idx = np.argmax(similarities)
    best_similarity = similarities[best_idx]
    
    if debug_mode:
        print(f"\nQuery: {query}")
        print(f"Best match: {questions[best_idx]}")
        print(f"Cosine similarity score: {best_similarity:.4f}")
        print(f"Threshold: {similarity_threshold}")
    
    if best_similarity >= similarity_threshold:
        return qa_data[best_idx]
    
    return {'answer': default_response}

def norm(vector):
    """Calculate L2 norm of a vector"""
    return (vector @ vector) ** 0.5

# Load Q&A data on startup
load_qa_data()

# --- API Routes ---
@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
    """Serve the chatbot UI"""
    return templates.TemplateResponse(
        "chat.html",
        {"request": request, "title": "DLC Chatbot"}
    )

@app.get("/docs-ui")
async def docs_ui(request: Request):
    """Serve the documentation UI"""
    return templates.TemplateResponse(
        "docs.html",
        {
            "request": request,
            "title": "DLC Chatbot Documentation",
            "intents": list(set(qa['intent'] for qa in qa_data)),
            "total_questions": sum(len(qa['questions']) for qa in qa_data)
        }
    )

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy",
        "model_loaded": model is not None,
        "qa_pairs_loaded": len(qa_data)
    }

@app.get("/stats")
async def get_stats():
    """Get API statistics"""
    return {
        "total_qa_pairs": len(qa_data),
        "intents": list(set(qa['intent'] for qa in qa_data)),
        "model_type": "sentence-transformer",
        "model_name": "sentence-transformers/all-MiniLM-L6-v2"
    }

@app.post("/ask", response_model=QueryResponse)
async def ask_question(query_request: QueryRequest):
    """
    Ask a question to the chatbot
    
    - **query**: The question to ask
    
    Returns:
    - **answer**: The bot's response
    - **intent**: The detected intent (if any)
    - **confidence**: Confidence score of the match
    """
    if not query_request.query:
        raise HTTPException(status_code=400, detail="Query cannot be empty")
    
    if not model:
        raise HTTPException(status_code=503, detail="Model not loaded")
    
    if not qa_data:
        raise HTTPException(status_code=503, detail="No Q&A data available")
    
    match = find_best_match(query_request.query, qa_data)
    
    if match:
        return QueryResponse(
            answer=match['answer'],
            intent=match['intent'],
            confidence=1.0
        )
    
    return QueryResponse(
        answer="I'm sorry, I couldn't find a good match for your question. Could you please rephrase it?",
        confidence=0.0
    )

# For local development
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
