import io
import json
import re
from typing import Any
import pandas as pd
from bs4 import BeautifulSoup
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from app.models.entities import Product
from app.services.normalise import normalise_row
from app.services.audit import log_event


def upsert_products(db: Session, rows: list[dict[str, Any]], source: str, source_type: str) -> dict[str, int]:
    created = updated = skipped = 0
    for raw in rows:
        row = normalise_row(raw, source_type=source_type)
        if not row.get("product_name"):
            skipped += 1
            continue
        existing = db.query(Product).filter(
            Product.retailer_name == row["retailer_name"],
            Product.product_name == row["product_name"],
            Product.brand == row.get("brand", ""),
            Product.pack_size == row.get("pack_size", ""),
            Product.product_url == row.get("product_url", ""),
        ).first()
        if existing:
            for key, value in row.items():
                if hasattr(existing, key) and value not in (None, ""):
                    setattr(existing, key, value)
            updated += 1
        else:
            db.add(Product(**row))
            created += 1
        try:
            db.commit()
        except IntegrityError:
            db.rollback()
            skipped += 1
    log_event(db, event_type="import", source=source, source_type=source_type, status="success", products_created=created, products_updated=updated, products_skipped=skipped, compliance_decision="authorised/manual/imported source")
    return {"created": created, "updated": updated, "skipped": skipped}


def import_csv_bytes(db: Session, data: bytes, filename: str) -> dict[str, int]:
    df = pd.read_csv(io.BytesIO(data))
    return upsert_products(db, df.to_dict(orient="records"), filename, "csv_import")


def import_excel_bytes(db: Session, data: bytes, filename: str) -> dict[str, int]:
    df = pd.read_excel(io.BytesIO(data))
    return upsert_products(db, df.to_dict(orient="records"), filename, "excel_import")


def parse_text_products(text: str, default_retailer: str = "Manual Import") -> list[dict[str, Any]]:
    text = text.strip()
    if not text:
        return []
    # JSON array/object pasted by an analyst
    try:
        parsed = json.loads(text)
        if isinstance(parsed, dict):
            parsed = [parsed]
        if isinstance(parsed, list):
            return [dict(x, retailer_name=x.get("retailer_name", default_retailer)) for x in parsed if isinstance(x, dict)]
    except Exception:
        pass

    # HTML product cards with schema/meta-ish content
    if "<" in text and ">" in text:
        soup = BeautifulSoup(text, "lxml")
        rows = []
        cards = soup.select('[itemtype*="Product"], .product, .product-card, li, article') or [soup]
        for card in cards:
            name = ""
            for sel in ['[itemprop="name"]', '.product-name', '.name', 'h1', 'h2', 'h3', 'a']:
                found = card.select_one(sel)
                if found and found.get_text(strip=True):
                    name = found.get_text(" ", strip=True)
                    break
            price = ""
            for sel in ['[itemprop="price"]', '.price', '.product-price', '[class*="price"]']:
                found = card.select_one(sel)
                if found:
                    price = found.get("content") or found.get_text(" ", strip=True)
                    break
            if name:
                rows.append({"retailer_name": default_retailer, "product_name": name, "price": price, "source_type": "manual_html_import"})
        return rows

    # Simple copied text lines: Product Name - Rxx.xx
    rows = []
    for line in text.splitlines():
        line = line.strip()
        if len(line) < 3:
            continue
        price_match = re.search(r"R\s?\d+[\d\s,.]*", line, re.I)
        price = price_match.group(0) if price_match else ""
        name = line.replace(price, "").strip(" -|\t") if price else line
        rows.append({"retailer_name": default_retailer, "product_name": name, "price": price, "source_type": "manual_text_import"})
    return rows


def import_text(db: Session, text: str, source: str = "manual_text") -> dict[str, int]:
    rows = parse_text_products(text)
    return upsert_products(db, rows, source, "manual_text_import")