# --- markdown_extensions.py ---
import markdown
from markdown.inlinepatterns import InlineProcessor
from markdown.extensions import Extension
import xml.etree.ElementTree as etree
import re
import logging # Use logging for warnings
import os # Import os for path joining

# Data attribute names (lowercase for consistency)
DATA_ATTR_SONG_ID = "data-song-id"
DATA_ATTR_START = "data-start"
DATA_ATTR_END = "data-end"
DEFAULT_AUDIO_FORMAT = "m4a" # Or choose mp3, opus, etc.

# Setup basic logging
logging.basicConfig(level=logging.WARNING, format='%(levelname)s: %(message)s')

# Regex needs correction
# Correct the regex: Only escape regex special characters [], () within a raw string
AUDIO_CLIP_RE = r'\[([^\]]+)\]\(audio:([^)]+)\)'

def sanitize_for_id(text):
    """Replaces characters potentially problematic in HTML IDs. Accepts strings."""
    str_text = str(text) # Ensure input is string
    if not str_text:
        return "_" # Return "_" for empty or None input
    # Replace colon with hyphen, remove other non-alphanumeric/hyphen chars
    sanitized = re.sub(r':', '-', str_text)
    sanitized = re.sub(r'[^a-zA-Z0-9_-]', '', sanitized)
    # Prevent IDs starting with a digit or hyphen which can be invalid in CSS selectors
    if sanitized and (sanitized[0].isdigit() or sanitized[0] == '-'):
        sanitized = '_' + sanitized
    return sanitized or "_" # Return "_" if empty after sanitization

def generate_audio_element_id(song_id: str, start_seconds: float, end_seconds: float) -> str:
    """Generates a unique and sanitized HTML ID for an audio element.
    Uses integer part of seconds for ID compatibility.
    """
    song_id_sanitized = sanitize_for_id(song_id)
    # Use int() for ID generation
    start_seconds_sanitized = sanitize_for_id(int(start_seconds))
    end_seconds_sanitized = sanitize_for_id(int(end_seconds))
    return f"{song_id_sanitized}_{start_seconds_sanitized}_{end_seconds_sanitized}"

def generate_audio_filename(element_id: str, format: str = DEFAULT_AUDIO_FORMAT) -> str:
    """Generates the filename for an audio segment based on its element ID and format."""
    return f"{element_id}.{format}"

def generate_audio_src(filename: str, base_dir: str) -> str:
    """Generates the src attribute value for an audio element."""
    # Use os.path.join for cross-platform compatibility, but format for web path
    # Ensure forward slashes for web paths, even on Windows if base_dir came from os.path.join
    web_path = f"{base_dir.replace(os.sep, '/')}/{filename}"
    return web_path

def time_str_to_seconds(time_str):
    """Converts time string (e.g., 'H:MM:SS', 'M:SS', 'SS', 'SS.ms') to float seconds."""
    if not time_str:
        return None
    parts = str(time_str).split(':')
    seconds = 0.0 # Use float
    try:
        if len(parts) == 1:
            # Allow float input
            seconds = float(parts[0])
        elif len(parts) == 2:
            # Allow float seconds part
            seconds = float(int(parts[0]) * 60 + float(parts[1]))
        elif len(parts) == 3:
            # Allow float seconds part
            seconds = float(int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2]))
        else:
            raise ValueError("Invalid time format")
        # Ensure non-negative? For now, just return float.
        if seconds < 0:
            raise ValueError("Time cannot be negative")
        return seconds
    except (ValueError, TypeError):
        logging.warning(f"Could not parse time string: '{time_str}'")
        return None

class AudioClipInlineProcessor(InlineProcessor):
    def __init__(self, pattern, md, fallback_song_id=None, relative_audio_path=None):
        super().__init__(pattern, md)
        self.fallback_song_id = fallback_song_id
        self.relative_audio_path = relative_audio_path
        if not self.relative_audio_path:
            logging.error("AudioClipInlineProcessor initialized without relative_audio_path!")

    def handleMatch(self, m, data):
        display_text = m.group(1)
        param_string = m.group(2)

        params = {}
        try:
            for pair in param_string.split(','):
                pair = pair.strip()
                if '=' in pair:
                    key, value = pair.split('=', 1)
                    params[key.strip().lower()] = value.strip() # Lowercase keys
        except ValueError:
            logging.warning(f"Malformed audio parameters found: {param_string}. Skipping.")
            return m.group(0), m.start(0), m.end(0)

        song_id = params.get('song_id', self.fallback_song_id)
        start_time_str = params.get('from')
        end_time_str = params.get('to')

        start_seconds = time_str_to_seconds(start_time_str)
        end_seconds = time_str_to_seconds(end_time_str)

        if not all([song_id, start_time_str, end_time_str]):
             missing_params = [k for k, v in {'song_id': song_id, 'from': start_time_str, 'to': end_time_str}.items() if not v]
             logging.warning(f"Missing audio parameters ({', '.join(missing_params)}) in: {m.group(0)}. Fallback was '{self.fallback_song_id}'. Skipping generation.")
             return m.group(0), m.start(0), m.end(0)
        if start_seconds is None or end_seconds is None:
             logging.warning(f"Invalid time format in: {m.group(0)}. Could not convert 'from' ('{start_time_str}') or 'to' ('{end_time_str}') to seconds. Skipping generation.")
             return m.group(0), m.start(0), m.end(0)

        # --- Generate IDs and SRC (using float seconds now) ---
        # Use the new helper functions
        # ID/filename uses integer part of seconds
        audio_element_id = generate_audio_element_id(song_id, start_seconds, end_seconds)
        # Filename still derived from integer-based ID
        audio_filename = generate_audio_filename(audio_element_id)
        # Use the stored relative path
        if not self.relative_audio_path:
            logging.error(f"Cannot generate audio src for {m.group(0)} - relative_audio_path not set.")
            return m.group(0), m.start(0), m.end(0)
        audio_src = generate_audio_src(audio_filename, base_dir=self.relative_audio_path)

        # --- Create HTML Elements ---
        wrapper = etree.Element("span")

        trigger_span = etree.SubElement(wrapper, "span")
        trigger_span.set("class", "play-trigger")
        trigger_span.set("data-audio-target", audio_element_id)
        trigger_span.text = display_text

        audio_el = etree.SubElement(wrapper, "audio")
        audio_el.set("id", audio_element_id)
        audio_el.set("src", audio_src)
        # Use constants for data attribute names
        audio_el.set(DATA_ATTR_SONG_ID, song_id)
        audio_el.set(DATA_ATTR_START, str(start_seconds)) # Store precise float value
        audio_el.set(DATA_ATTR_END, str(end_seconds))   # Store precise float value
        audio_el.set("preload", "metadata")

        return wrapper, m.start(0), m.end(0)

class AudioExtension(Extension):
    def __init__(self, **kwargs):
        self.config = {
            'fallback_song_id': ['', "Default song ID to use if not specified in the link"],
            'relative_audio_path': ['', "Relative path from HTML output to audio files (e.g., 'assets/audio')"]
        }
        super().__init__(**kwargs)

    def extendMarkdown(self, md):
        fallback_id = self.getConfig('fallback_song_id')
        relative_path = self.getConfig('relative_audio_path')
        if not relative_path:
            logging.error("AudioExtension requires 'relative_audio_path' to be configured.")
            return
        # Need to adjust priority? Default link processor is ~160. 175 should be fine.
        md.inlinePatterns.register(
            AudioClipInlineProcessor(AUDIO_CLIP_RE, md, fallback_song_id=fallback_id, relative_audio_path=relative_path),
            'audio_clip',
            175
        )

def makeExtension(**kwargs):
    """ Facilitates extension loading. """
    return AudioExtension(**kwargs)