diff --git a/main.py b/main.py new file mode 100644 index 0000000..4e4119a --- /dev/null +++ b/main.py @@ -0,0 +1,112 @@ +import os +from dotenv import load_dotenv, find_dotenv +import argparse +from pathlib import Path +from src.split_markdown import split_markdown +from src.build_output import build_output + +def main(voice_id, api_key, reset, audio_only, markdown_only, combine_only): + """ + Main function to run the program + :param voice_id: ElevenLabs Voice ID + :param api_key: ElevenLabs API Key + :param reset: Reset the program + :param audio_only: Only build the audio components + :param markdown_only: Only build the markdown components + :param combine_only: Only build the final, combined audio files + :return: None + """ + + if sum([markdown_only, audio_only, combine_only]) > 1: + print("Error: Only one of --markdown-only, --audio-only, or --combine-only can be enabled at a time.") + exit(1) + + # Load environment variables + env_path = find_dotenv() + if not env_path: + print("Error: .env file not found. Please create one in the project root.") + exit(1) + load_dotenv(env_path) + + # Set the voice_id and api_key if not provided + if voice_id is None: + voice_id = os.getenv("ELEVENLABS_VOICE_ID") + + if api_key is None: + api_key = os.getenv("ELEVENLABS_API_KEY") + + # Set the project root + PROJECT_ROOT = Path(env_path).parent + + if reset: + os.system(f"rm -rf {os.path.join(PROJECT_ROOT, 'output')}") + + # Initialize the split_markdown class + if not audio_only and not combine_only: + split_md = split_markdown(PROJECT_ROOT) + files_to_process = split_md.main(reset) + + # Initialize the build_output class + audio = build_output(PROJECT_ROOT, voice_id, api_key) + + # Build the audio components + if not markdown_only and not combine_only: + audio.components() + + # Combine the audio files + if not markdown_only and not audio_only: + audio.combine_final_audio() + + +if __name__ == "__main__": + + # Set up argument parser + parser = argparse.ArgumentParser( + description="Convert Markdown files to audio using the Elevenlabs API." + ) + parser.add_argument( + "--voice-id", + type=str, + default=os.getenv("ELEVENLABS_VOICE_ID"), + help="Voice ID to use for Elevenlabs. Defaults to the ID in the .env file.", + ) + parser.add_argument( + "--api-key", + type=str, + default=os.getenv("ELEVENLABS_API_KEY"), + help="API Key for Elevenlabs. Defaults to the key in the .env file.", + ) + parser.add_argument( + "--reset", + action='store_true', + default=False, + help="Delete previous iterations, and start again (warning, can incur unexpected API expenses).", + ) + parser.add_argument( + "--audio-only", + action='store_true', + default=False, + help="Only build the audio components.", + ) + parser.add_argument( + "--markdown-only", + action='store_true', + default=False, + help="Only build the markdown components.", + ) + parser.add_argument( + "--combine-only", + action='store_true', + default=False, + help="Only build the final, combined audio files.", + ) + + args = parser.parse_args() + main( + voice_id=args.voice_id, + api_key=args.api_key, + reset=args.reset, + audio_only=args.audio_only, + markdown_only=args.markdown_only, + combine_only=args.combine_only + ) diff --git a/src/build_output.py b/src/build_output.py new file mode 100644 index 0000000..4ccda44 --- /dev/null +++ b/src/build_output.py @@ -0,0 +1,127 @@ +import os +from pathlib import Path +from elevenlabs import ElevenLabs +from pydub import AudioSegment + + +class build_output: + """ + This class is used to build the output of the program + """ + + def __init__(self, PROJECT_ROOT, voice_id, api_key): + """ + Constructor for build_output class + :param PROJECT_ROOT: Project root + :param voice_id: ElevenLabs Voice ID + :param api_key: ElevenLabs API Key + """ + self.PROJECT_ROOT = PROJECT_ROOT + self.voice_id = voice_id + self.api_key = api_key + self.input_dir = os.path.join(PROJECT_ROOT, "output", "markdown") + self.output_dir = os.path.join(PROJECT_ROOT, "output", "audio") + self.client = ElevenLabs(api_key=api_key) + self.audio_files = [] + + def components(self): + """ + Main function to build the audio output + """ + for markdown_folder in os.listdir(self.input_dir): + folder_path = os.path.join(self.input_dir, markdown_folder) + if os.path.isdir(folder_path): + self.process_folder(markdown_folder) + + def process_folder(self, folder_name): + """ + Process a single folder of split Markdown files + :param folder_name: Name of the folder + """ + folder_path = os.path.join(self.input_dir, folder_name) + audio_output_folder = os.path.join(self.output_dir, folder_name) + + # Create audio output folder if it doesn't exist + os.makedirs(audio_output_folder, exist_ok=True) + + for file_name in sorted(os.listdir(folder_path)): + if file_name.endswith(".md"): + section_path = os.path.join(folder_path, file_name) + output_audio_path = os.path.join(audio_output_folder, file_name.replace(".md", ".mp3")) + self.generate_audio(section_path, output_audio_path) + + def combine_final_audio(self): + """ + Combine all the audio files into one + :return: + """ + + # Combine all audio files into one + for folder_name in sorted(os.listdir(self.output_dir)): + combined_audio_path = os.path.join(self.output_dir, f"{folder_name}.mp3") + print(f"Combining audio for: {folder_name}") + + # Get all audio files in the folder + audio_files = [] + folder_path = os.path.join(self.output_dir, folder_name) + for file_name in sorted(os.listdir(folder_path)): + if file_name.endswith(".mp3"): + audio_files.append(os.path.join(folder_path, file_name)) + # Combine audio files for each folder + self.combine_audio(audio_files, combined_audio_path) + + def generate_audio(self, input_file, output_audio_path): + """ + Generate audio for a single section using ElevenLabs API + :param input_file: Path to the Markdown file section + :param output_audio_path: Path to save the generated audio + """ + with open(input_file, "r") as file: + text = file.read().strip() + + if not text: + print(f"Skipping empty file: {input_file}") + return None + + # Generate audio using ElevenLabs API + print(f"Generating audio for: {input_file}") + response = self.client.text_to_speech.convert( + voice_id=self.voice_id, + model_id="eleven_multilingual_v2", # Use the updated multilingual model + text=text, + ) + + # Collect the full audio content from the generator + audio_content = b"".join(response) + + # Save the response audio to file + with open(output_audio_path, "wb") as audio_file: + audio_file.write(audio_content) + + return output_audio_path + + def combine_audio(self, audio_files, combined_audio_path): + """ + Combine multiple audio files into one with natural gaps + :param audio_files: List of audio file paths + :param combined_audio_path: Path to save the combined audio file + """ + combined_audio = None + gap = AudioSegment.silent(duration=1000) # 1-second pause + + for audio_file in audio_files: + + try: + audio_segment = AudioSegment.from_file(audio_file) + except Exception as e: + print(f"Error processing file {audio_file}: {e}") + continue + + if combined_audio is None: + combined_audio = audio_segment + else: + combined_audio += gap + audio_segment + + if combined_audio: + print(f"Saving combined audio to: {combined_audio_path}") + combined_audio.export(combined_audio_path, format="mp3") diff --git a/src/split_markdown.py b/src/split_markdown.py new file mode 100644 index 0000000..61f0f66 --- /dev/null +++ b/src/split_markdown.py @@ -0,0 +1,108 @@ +import os +import re +from unidecode import unidecode + +class split_markdown: + """ + Splits a Markdown file into individual sections. + """ + + def __init__(self, PROJECT_ROOT): + """ + Constructor for split_markdown class. + :param PROJECT_ROOT: + """ + self.PROJECT_ROOT = PROJECT_ROOT + self.source_dir = os.path.join(PROJECT_ROOT, "markdown") + self.output_dir = os.path.join(PROJECT_ROOT, "output", "markdown") + self.files_to_process = [] + + def main(self, reset): + """ + Main function to split the Markdown file. + :param reset: If True, deletes previous output and starts fresh. + :return: + """ + # Build the path to the Markdown files + markdown_dir = os.path.join(self.PROJECT_ROOT, "markdown") + markdown_files = os.listdir(markdown_dir) + markdown_files = [f for f in markdown_files if f.endswith(".md")] + + # If reset is True, delete the output directory + if reset and os.path.exists(self.output_dir): + os.system(f"rm -rf {self.output_dir}") + + # Loop through the Markdown files + for markdown_file in markdown_files: + self.split_file(markdown_file) + + def split_file(self, markdown_file): + """ + Split a Markdown file into individual sections. + :param markdown_file: The file to split. + :return: + """ + # Create the output directory if it doesn't exist + single_output_dir = os.path.join(self.output_dir, markdown_file.replace(".md", "")) + if not os.path.exists(single_output_dir): + os.makedirs(single_output_dir, exist_ok=True) + + # Read the Markdown file + try: + with open(os.path.join(self.source_dir, markdown_file), "r", encoding="utf-8") as f: + content = f.read() + except Exception as e: + print(f"Error reading file {markdown_file}: {e}") + return + + # Convert Unicode characters to ASCII + content = unidecode(content) + + # Preprocessing: Replace unusual characters + content = content.replace("—", ". ") # Replace em dash with period and space + content = content.replace("–", ". ") # Replace en dash with period and space + content = content.replace("…", "...") # Replace ellipsis with three dots + content = re.sub(r'[“”]', '"', content) # Replace fancy quotes with straight quotes + content = re.sub(r"[‘’]", "'", content) # Replace fancy single quotes with straight quotes + content = content.replace('"',"") # Remove double quotes + + # Remove links but keep labels if available, or remove entire link if standalone + content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content) # Handles labeled links + content = re.sub(r']+>', '', content) # Handles standalone URLs + + # Strip remaining Markdown formatting (e.g., headers, bold, italic, etc.) + content = re.sub(r'[#*_~`]', '', content) # Basic formatting characters + content = re.sub(r'!\[.*?\]\(.*?\)', '', content) # Images + content = re.sub(r'>\s+', '', content) # Blockquotes + content = re.sub(r'-{3,}', '', content) # Horizontal rules + + # Split content into sections by paragraphs or full lists + sections = re.split(r'(?:\n{2,})', content) # Splits by double newlines (paragraphs) + + # Process each section + section_count = 1 + for section in sections: + section = section.strip() + + # Skip empty sections + if not section: + continue + + # Check if the section is a list (starts with `-`, `*`, or digit + period) + if re.match(r'^(\s*[-*]|\d+\.)', section): + # Keep full list as one section by appending following list items + list_items = [section] + while sections and re.match(r'^(\s*[-*]|\d+\.)', sections[0]): + list_items.append(sections.pop(0)) + section = "\n".join(list_items) + + # Additional cleaning: Remove any lingering problematic characters + section = re.sub(r'[^\x00-\x7F]+', ' ', section) # Remove non-ASCII characters + + # Write the section to an individual file + output_file_path = os.path.join(single_output_dir, f"section_{section_count:03}.md") + with open(output_file_path, "w", encoding="utf-8") as out_file: + out_file.write(section) + self.files_to_process.append(output_file_path) + + section_count += 1