o
    hu h*                     @   s   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlmZm	Z	m
Z
mZmZmZ eeZdd Zdedee fdd	Zd
edededefddZdedefddZdS )    N)BeautifulSoup)DATA_ATTR_SONG_IDDATA_ATTR_STARTDATA_ATTR_ENDDEFAULT_AUDIO_FORMATgenerate_audio_element_idgenerate_audio_filenamec                	   C   sV   zt jddgdddd W dS  t jtfy*   td td td Y dS w )	z-Checks if yt-dlp is installed and executable.yt-dlpz	--versionTcheckZcapture_outputtextz.yt-dlp command not found or failed to execute.z<Please ensure yt-dlp is installed and in your system's PATH.z.You can install it via pip: pip install yt-dlpF)
subprocessrunCalledProcessErrorFileNotFoundErrorloggererror r   r   4/Users/srajan/code/music-website/audio_downloader.pycheck_yt_dlp   s   


r   html_contentreturnc           
      C   s   t | d}|d}g }|D ]`}|t}|t}|t}|rn|rn|rnz0t|}t|}	|dkrE|	|krE||||	|ddd nt	d| d| d	| d
 W q t
ym   t	d| d| d	| d
 Y qw q|S )zGParses HTML content and finds audio tags with required data attributes.zhtml.parseraudior   idN)song_idstart_secondsend_secondsZtag_idz%Invalid start/end times found: start=z, end=z
 for song=z. Skipping.z1Could not parse start/end times as floats: start=)r   find_allgetr   r   r   floatappendr   warning
ValueError)
r   ZsoupZaudio_elementsZextracted_tagstagr   Z	start_strZend_strr   r   r   r   r   find_audio_tags   s2   






"r$   r   r   r   	audio_dirc                 C   s,  t | ||}t|dddd }t|}tj||}tj|r,td| d dS d|  }tj|dd	 d
ddt	dddd| d| dtj||d ddd|g}	td|  d| d| d|  zEt
j|	dddd}
td|  tj|std| d| d| d| d 	 ttj||d! }|rtd"|  W d#S W dS  t
jy } z+td$|  d| d| d% td&d'|j  td(|j  W Y d}~d#S d}~w ty   td) Y d#S  ty } ztd*|  d| d| d+|  W Y d}~d#S d}~ww ),zDownloads a specific audio segment using yt-dlp if it doesn't exist.
    Uses integer part of seconds for filename generation, but float for download command.
    `audio_dir` should be the absolute path to the target directory.
     )formatNzAudio segment already exists: z. Skipping download.Tz https://www.youtube.com/watch?v=exist_okr	   z-xz--audio-formatz--audio-quality0z--download-sections*-z-oz.%(ext)sz--force-keyframes-at-cutsz--quietz--no-warningsz Attempting to download segment: z [z] -> r
   zSuccessfully downloaded: zyt-dlp finished for z, but expected file 'z' not found. Check z for files starting with ''.z.*zFound potential match(es): FzFailed to download segment for z].z	Command:  zStderr: zTyt-dlp command not found during execution. Please ensure it's installed and in PATH.z1An unexpected error occurred during download for z]: )r   r   ospathjoinexistsr   infomakedirsr   r   r   debugr!   globr   r   cmdstderrr   	Exception)r   r   r   r%   
element_idZoutput_filename_baseoutput_filename_with_extoutput_filepathZ	video_urlcommand_Zpotential_fileser   r   r   download_audio_segmentB   sZ   
"$
"rA   html_diraudio_output_dirc                 C   s  t  std tj|dd ttj| d}t	dt
| d|  d t }|D ]g}zHt|dd	d
}| }W d   n1 sEw   Y  t|}tdt
| dtj| d |D ]}|d |d |d f}	||	 qbW q, ty }
 ztd| d|
  W Y d}
~
q,d}
~
ww d}d}d}|D ]F\}}}t|||}t|}tj||}tj|r|d7 }td|  qt||||}|rtj|r|d7 }q|d7 }q|d7 }qt	d t	dt
|  t	d|  t	d|  t	d|  dS )aU  
    Finds all HTML files in a directory, extracts audio tag data,
    and downloads the required audio segments into the specified `audio_output_dir`.

    Args:
        html_dir: Absolute path to the directory containing generated HTML files.
        audio_output_dir: Absolute path to the directory where audio files should be saved.
    z:yt-dlp is required but not found. Aborting audio download.Tr)   z*.htmlzFound z HTML files in 'r.   rzutf-8)encodingNz+ audio segments to potentially download in .r   r   r   zError processing file z: r      zSegment already exists: z--- Audio Download Summary ---zTotal unique segments found: zSuccessfully downloaded:     zAlready existed:            zFailed downloads:           )r   sysexitr0   r5   r7   r1   r2   r   r4   lensetopenreadr$   r6   basenameaddr:   r   r   r   r3   rA   )rB   rC   Z
html_filesZall_audio_segmentsZhtml_file_pathfcontentZ
audio_tagsZtag_dataZ
segment_idr@   Zdownload_countZ
fail_countZalready_exists_countr   r   r   r;   r<   r=   Zsuccessr   r   r   process_html_files   s\   	

$	"



rR   )r0   r   rH   Zbs4r   r7   loggingmarkdown_extensionsr   r   r   r   r   r   	getLogger__name__r   r   strlistdictr$   r   rA   rR   r   r   r   r   <module>   s     
	#Z