import os
import sys
import logging
import re
import json
import markdown
from collections import defaultdict
from datetime import datetime
from jinja2 import Environment, FileSystemLoader
from import_helpers import annotation_data

# Constants
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))

# Define which annotation groups to process
TARGET_GROUPS = [
    "ai 2027",
    "perceptual origin of math",
    "allergy, hives, eoe",
]

# --- Helper Functions ---

def format_timestamp(ts_string: str) -> str:
    """Formats an ISO 8601 timestamp string (with optional Z) into a readable format."""
    if not ts_string:
        return ""
    try:
        # Handle 'Z' for UTC timezone correctly
        if ts_string.endswith('Z'):
            ts_string = ts_string[:-1] + '+00:00'
        dt_obj = datetime.fromisoformat(ts_string)
        # Format: Abbreviated month, day, 'YY -> e.g., Jan 15 '25
        return dt_obj.strftime("%b '%y")
    except ValueError:
        logging.warning(f"Could not parse timestamp: {ts_string}")
        return ts_string # Return original if parsing fails

def slugify(text: str) -> str:
    """Converts text into a URL-friendly slug."""
    text = text.lower()
    text = re.sub(r'\s+', '-', text) # Replace spaces with hyphens
    text = re.sub(r'[^a-z0-9\-]', '', text) # Remove invalid chars
    text = re.sub(r'-+', '-', text) # Collapse multiple hyphens
    text = text.strip('-') # Trim leading/trailing hyphens
    return text or "untitled"

def render_annotation_group_html(group_name: str, annotations: list, output_dir: str, env: Environment) -> dict | None:
    """Renders the HTML for a single annotation group using the template."""
    try:
        template = env.get_template('templates/annotation_group_template.html')
    except Exception as e:
        logging.error(f"Error loading annotation_group_template.html: {e}")
        return None

    # Initialize Markdown parser
    md_parser = markdown.Markdown()

    # Prepare data for template (e.g., format timestamps) and group by URL
    grouped_by_url = defaultdict(list)
    for ann in annotations:
        # Format timestamp
        ann['timestamp_str'] = format_timestamp(ann.get('timestamp', ''))

        # Convert annotation text to HTML
        if ann.get('annotation'):
            ann['annotation_html'] = md_parser.convert(ann['annotation'])
            md_parser.reset() # Reset parser state
        else:
            ann['annotation_html'] = ''

        # Convert selected text to HTML if it exists
        if ann.get('selection') and ann['selection'].get('selectedText'):
            ann['selectedText_html'] = md_parser.convert(ann['selection'][ 'selectedText'])
            md_parser.reset() # Reset parser state
        else:
            # Ensure the key exists even if there's no selected text
            if 'selection' in ann:
                 ann['selectedText_html'] = '' # Or handle differently if needed

        # Extract URL, use a placeholder if missing
        url = ann.get('url', 'No URL Provided')
        grouped_by_url[url].append(ann)

    # Sort annotations within each URL group chronologically (oldest first)
    for url in grouped_by_url:
        grouped_by_url[url].sort(key=lambda x: x.get('timestamp', '')) # Ascending sort

    html_content = template.render(
        group_name=group_name,
        # Pass the grouped and sorted dictionary to the template
        grouped_annotations=grouped_by_url
    )

    filename = f"annotation_{slugify(group_name)}.html"
    output_path = os.path.join(output_dir, filename)
    try:
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(html_content)
        logging.debug(f"Successfully generated HTML for group '{group_name}' -> {output_path}")
        return {
            "filename": filename,
            "group_name": group_name
        }
    except Exception as e:
        logging.error(f"Error writing HTML to {output_path} for group '{group_name}': {e}")
        return None

# --- Annotation Index Generation ---

def generate_annotation_index_html(pages_info: list, output_dir: str, env: Environment):
    """Generates the index.html file for the annotations section."""
    logging.info("Generating annotation index page...")
    index_md_path = os.path.join(CURRENT_DIR, "md", "annotation_index.md") # Assume MD file location

    # Read the index markdown content
    site_description_html = "<p>Annotation section description not available.</p>"
    if os.path.exists(index_md_path):
        try:
            with open(index_md_path, "r", encoding="utf-8") as f:
                index_md_content = f.read()
            # Use a Markdown parser instance
            md_parser = markdown.Markdown()
            site_description_html = md_parser.convert(index_md_content)
            logging.info(f"Successfully read and converted {index_md_path}")
        except Exception as e:
            logging.error(f"Error processing {index_md_path}: {e}")
    else:
        logging.warning(f"{index_md_path} not found. Using default description.")

    # Prepare section data from generated pages
    sections_data = []
    for page in pages_info:
        # Assuming page_info contains 'group_name' and 'filename'
        group_name = page.get('group_name', 'Untitled Group')
        filename = page.get('filename')
        if filename: # Only add if filename exists
            sections_data.append({
                "name": group_name,
                "url": f"/annotations/{filename}", # Relative to site root
                "description": f"Annotations related to {group_name}." # Simple description
            })
        else:
             logging.warning(f"Skipping index entry for group '{group_name}' due to missing filename.")


    # Sort sections alphabetically by name for consistent order
    sections_data.sort(key=lambda x: x['name'])

    # Generate the HTML using the templates
    try:
        template = env.get_template('templates/list_page_template.html')
        # Use the correct path for the item template based on previous changes
        item_template = env.get_template('templates/list_templates/section_item.html')

        list_items_html = []
        for section_info in sections_data:
            list_items_html.append(item_template.render(section=section_info))

        html_content = template.render(
            page_title="WU - Annotations Index",
            site_description_html=site_description_html,
            list_items_html=list_items_html,
        )

        output_path = os.path.join(output_dir, "index.html")
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(html_content)
        logging.info(f"Successfully generated annotation index.html -> {output_path}")
    except Exception as e:
        logging.error(f"Error generating annotation index.html: {e}")


# --- Main Generation Function ---

def generate_annotation_site(output_dir: str, templates_dir: str):
    """Generates the annotation section of the website."""
    logging.info(f"Starting annotation site generation for {len(TARGET_GROUPS)} target groups...")
    logging.info(f"Output directory set to: {output_dir}")
    logging.info(f"Templates directory set to: {templates_dir}")

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Set up Jinja2 environment using the provided templates directory
    try:
        jinja_env = Environment(loader=FileSystemLoader(templates_dir), autoescape=True)
    except Exception as e:
        logging.error(f"Failed to initialize Jinja2 environment at {templates_dir}: {e}")
        return

    # --- Filter and Group Annotations ---
    grouped_annotations = defaultdict(list)
    valid_annotations_count = 0
    for annotation in annotation_data:
        group = annotation.get('group')
        if group and group in TARGET_GROUPS:
            grouped_annotations[group].append(annotation)
            valid_annotations_count += 1

    logging.info(f"Found {valid_annotations_count} annotations belonging to {len(grouped_annotations)} target groups.")

    if not grouped_annotations:
        logging.warning("No annotations found for the target groups. No HTML will be generated.")
        return

    # --- Generate HTML for each group ---
    logging.info("--- Generating Annotation Group HTML Pages ---")
    generated_pages_info = []
    for group_name, annotations in grouped_annotations.items():
        # Remove the previous sorting here, as it's now done in render_annotation_group_html
        # annotations.sort(key=lambda x: x.get('timestamp', ''), reverse=True)

        page_info = render_annotation_group_html(
            group_name=group_name,
            annotations=annotations,
            output_dir=output_dir,
            env=jinja_env
        )
        if page_info:
            generated_pages_info.append(page_info)
        else:
            logging.error(f"Failed to generate HTML for annotation group '{group_name}'")

    # --- Generate Annotation Index HTML ---
    if generated_pages_info:
        generate_annotation_index_html(generated_pages_info, output_dir, jinja_env)
    else:
        logging.warning("No annotation group pages were generated successfully. Annotation index file not created.")

    logging.info("Annotation site generation process finished.")

# --- Standalone Execution ---
if __name__ == "__main__":
    # Configure basic logging
    log_format = '%(asctime)s - %(levelname)s: %(message)s'
    logging.basicConfig(level=logging.INFO, format=log_format, stream=sys.stdout)

    # Define output directory relative to this script's location
    base_output_dir = os.path.join(CURRENT_DIR, "output")
    annotation_output_dir = os.path.join(base_output_dir, "annotations")
    script_templates_dir = os.path.join(CURRENT_DIR, 'templates') # Define for standalone execution

    logging.info(f"Running {__file__} as standalone script.")
    generate_annotation_site(annotation_output_dir, script_templates_dir) 