Added Markdown splitter, voiceover API calls, audio file combiner

danmenzies · Dec 2, 2024 · 1b5680f · 1b5680f
1 parent ed70e2a
commit 1b5680f
Show file tree

Hide file tree

Showing 3 changed files with 347 additions and 0 deletions.
diff --git a/main.py b/main.py
@@ -0,0 +1,112 @@
+import os
+from dotenv import load_dotenv, find_dotenv
+import argparse
+from pathlib import Path
+from src.split_markdown import split_markdown
+from src.build_output import build_output
+
+def main(voice_id, api_key, reset, audio_only, markdown_only, combine_only):
+    """
+    Main function to run the program
+    :param voice_id: ElevenLabs Voice ID
+    :param api_key: ElevenLabs API Key
+    :param reset: Reset the program
+    :param audio_only: Only build the audio components
+    :param markdown_only: Only build the markdown components
+    :param combine_only: Only build the final, combined audio files
+    :return: None
+    """
+
+    if sum([markdown_only, audio_only, combine_only]) > 1:
+        print("Error: Only one of --markdown-only, --audio-only, or --combine-only can be enabled at a time.")
+        exit(1)
+
+    # Load environment variables
+    env_path = find_dotenv()
+    if not env_path:
+        print("Error: .env file not found. Please create one in the project root.")
+        exit(1)
+    load_dotenv(env_path)
+
+    # Set the voice_id and api_key if not provided
+    if voice_id is None:
+        voice_id = os.getenv("ELEVENLABS_VOICE_ID")
+
+    if api_key is None:
+        api_key = os.getenv("ELEVENLABS_API_KEY")
+
+    # Set the project root
+    PROJECT_ROOT = Path(env_path).parent
+
+    if reset:
+        os.system(f"rm -rf {os.path.join(PROJECT_ROOT, 'output')}")
+
+    # Initialize the split_markdown class
+    if not audio_only and not combine_only:
+        split_md = split_markdown(PROJECT_ROOT)
+        files_to_process = split_md.main(reset)
+
+    # Initialize the build_output class
+    audio = build_output(PROJECT_ROOT, voice_id, api_key)
+
+    # Build the audio components
+    if not markdown_only and not combine_only:
+        audio.components()
+
+    # Combine the audio files
+    if not markdown_only and not audio_only:
+        audio.combine_final_audio()
+
+
+if __name__ == "__main__":
+
+    # Set up argument parser
+    parser = argparse.ArgumentParser(
+        description="Convert Markdown files to audio using the Elevenlabs API."
+    )
+    parser.add_argument(
+        "--voice-id",
+        type=str,
+        default=os.getenv("ELEVENLABS_VOICE_ID"),
+        help="Voice ID to use for Elevenlabs. Defaults to the ID in the .env file.",
+    )
+    parser.add_argument(
+        "--api-key",
+        type=str,
+        default=os.getenv("ELEVENLABS_API_KEY"),
+        help="API Key for Elevenlabs. Defaults to the key in the .env file.",
+    )
+    parser.add_argument(
+        "--reset",
+        action='store_true',
+        default=False,
+        help="Delete previous iterations, and start again (warning, can incur unexpected API expenses).",
+    )
+    parser.add_argument(
+        "--audio-only",
+        action='store_true',
+        default=False,
+        help="Only build the audio components.",
+    )
+    parser.add_argument(
+        "--markdown-only",
+        action='store_true',
+        default=False,
+        help="Only build the markdown components.",
+    )
+    parser.add_argument(
+        "--combine-only",
+        action='store_true',
+        default=False,
+        help="Only build the final, combined audio files.",
+    )
+
+    args = parser.parse_args()
+    main(
+        voice_id=args.voice_id,
+        api_key=args.api_key,
+        reset=args.reset,
+        audio_only=args.audio_only,
+        markdown_only=args.markdown_only,
+        combine_only=args.combine_only
+    )
diff --git a/src/build_output.py b/src/build_output.py
@@ -0,0 +1,127 @@
+import os
+from pathlib import Path
+from elevenlabs import ElevenLabs
+from pydub import AudioSegment
+
+
+class build_output:
+    """
+    This class is used to build the output of the program
+    """
+
+    def __init__(self, PROJECT_ROOT, voice_id, api_key):
+        """
+        Constructor for build_output class
+        :param PROJECT_ROOT: Project root
+        :param voice_id: ElevenLabs Voice ID
+        :param api_key: ElevenLabs API Key
+        """
+        self.PROJECT_ROOT = PROJECT_ROOT
+        self.voice_id = voice_id
+        self.api_key = api_key
+        self.input_dir = os.path.join(PROJECT_ROOT, "output", "markdown")
+        self.output_dir = os.path.join(PROJECT_ROOT, "output", "audio")
+        self.client = ElevenLabs(api_key=api_key)
+        self.audio_files = []
+
+    def components(self):
+        """
+        Main function to build the audio output
+        """
+        for markdown_folder in os.listdir(self.input_dir):
+            folder_path = os.path.join(self.input_dir, markdown_folder)
+            if os.path.isdir(folder_path):
+                self.process_folder(markdown_folder)
+
+    def process_folder(self, folder_name):
+        """
+        Process a single folder of split Markdown files
+        :param folder_name: Name of the folder
+        """
+        folder_path = os.path.join(self.input_dir, folder_name)
+        audio_output_folder = os.path.join(self.output_dir, folder_name)
+
+        # Create audio output folder if it doesn't exist
+        os.makedirs(audio_output_folder, exist_ok=True)
+
+        for file_name in sorted(os.listdir(folder_path)):
+            if file_name.endswith(".md"):
+                section_path = os.path.join(folder_path, file_name)
+                output_audio_path = os.path.join(audio_output_folder, file_name.replace(".md", ".mp3"))
+                self.generate_audio(section_path, output_audio_path)
+
+    def combine_final_audio(self):
+        """
+        Combine all the audio files into one
+        :return:
+        """
+
+        # Combine all audio files into one
+        for folder_name in sorted(os.listdir(self.output_dir)):
+            combined_audio_path = os.path.join(self.output_dir, f"{folder_name}.mp3")
+            print(f"Combining audio for: {folder_name}")
+
+            # Get all audio files in the folder
+            audio_files = []
+            folder_path = os.path.join(self.output_dir, folder_name)
+            for file_name in sorted(os.listdir(folder_path)):
+                if file_name.endswith(".mp3"):
+                    audio_files.append(os.path.join(folder_path, file_name))
+            # Combine audio files for each folder
+            self.combine_audio(audio_files, combined_audio_path)
+
+    def generate_audio(self, input_file, output_audio_path):
+        """
+        Generate audio for a single section using ElevenLabs API
+        :param input_file: Path to the Markdown file section
+        :param output_audio_path: Path to save the generated audio
+        """
+        with open(input_file, "r") as file:
+            text = file.read().strip()
+
+        if not text:
+            print(f"Skipping empty file: {input_file}")
+            return None
+
+        # Generate audio using ElevenLabs API
+        print(f"Generating audio for: {input_file}")
+        response = self.client.text_to_speech.convert(
+            voice_id=self.voice_id,
+            model_id="eleven_multilingual_v2",  # Use the updated multilingual model
+            text=text,
+        )
+
+        # Collect the full audio content from the generator
+        audio_content = b"".join(response)
+
+        # Save the response audio to file
+        with open(output_audio_path, "wb") as audio_file:
+            audio_file.write(audio_content)
+
+        return output_audio_path
+
+    def combine_audio(self, audio_files, combined_audio_path):
+        """
+        Combine multiple audio files into one with natural gaps
+        :param audio_files: List of audio file paths
+        :param combined_audio_path: Path to save the combined audio file
+        """
+        combined_audio = None
+        gap = AudioSegment.silent(duration=1000)  # 1-second pause
+
+        for audio_file in audio_files:
+
+            try:
+                audio_segment = AudioSegment.from_file(audio_file)
+            except Exception as e:
+                print(f"Error processing file {audio_file}: {e}")
+                continue
+
+            if combined_audio is None:
+                combined_audio = audio_segment
+            else:
+                combined_audio += gap + audio_segment
+
+        if combined_audio:
+            print(f"Saving combined audio to: {combined_audio_path}")
+            combined_audio.export(combined_audio_path, format="mp3")
diff --git a/src/split_markdown.py b/src/split_markdown.py
@@ -0,0 +1,108 @@
+import os
+import re
+from unidecode import unidecode
+
+class split_markdown:
+    """
+    Splits a Markdown file into individual sections.
+    """
+
+    def __init__(self, PROJECT_ROOT):
+        """
+        Constructor for split_markdown class.
+        :param PROJECT_ROOT:
+        """
+        self.PROJECT_ROOT = PROJECT_ROOT
+        self.source_dir = os.path.join(PROJECT_ROOT, "markdown")
+        self.output_dir = os.path.join(PROJECT_ROOT, "output", "markdown")
+        self.files_to_process = []
+
+    def main(self, reset):
+        """
+        Main function to split the Markdown file.
+        :param reset: If True, deletes previous output and starts fresh.
+        :return:
+        """
+        # Build the path to the Markdown files
+        markdown_dir = os.path.join(self.PROJECT_ROOT, "markdown")
+        markdown_files = os.listdir(markdown_dir)
+        markdown_files = [f for f in markdown_files if f.endswith(".md")]
+
+        # If reset is True, delete the output directory
+        if reset and os.path.exists(self.output_dir):
+            os.system(f"rm -rf {self.output_dir}")
+
+        # Loop through the Markdown files
+        for markdown_file in markdown_files:
+            self.split_file(markdown_file)
+
+    def split_file(self, markdown_file):
+        """
+        Split a Markdown file into individual sections.
+        :param markdown_file: The file to split.
+        :return:
+        """
+        # Create the output directory if it doesn't exist
+        single_output_dir = os.path.join(self.output_dir, markdown_file.replace(".md", ""))
+        if not os.path.exists(single_output_dir):
+            os.makedirs(single_output_dir, exist_ok=True)
+
+        # Read the Markdown file
+        try:
+            with open(os.path.join(self.source_dir, markdown_file), "r", encoding="utf-8") as f:
+                content = f.read()
+        except Exception as e:
+            print(f"Error reading file {markdown_file}: {e}")
+            return
+
+        # Convert Unicode characters to ASCII
+        content = unidecode(content)
+
+        # Preprocessing: Replace unusual characters
+        content = content.replace("—", ". ")  # Replace em dash with period and space
+        content = content.replace("–", ". ")  # Replace en dash with period and space
+        content = content.replace("…", "...")  # Replace ellipsis with three dots
+        content = re.sub(r'[“”]', '"', content)  # Replace fancy quotes with straight quotes
+        content = re.sub(r"[‘’]", "'", content)  # Replace fancy single quotes with straight quotes
+        content = content.replace('"',"")  # Remove double quotes
+
+        # Remove links but keep labels if available, or remove entire link if standalone
+        content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content)  # Handles labeled links
+        content = re.sub(r'<https?://[^\s>]+>', '', content)  # Handles standalone URLs
+
+        # Strip remaining Markdown formatting (e.g., headers, bold, italic, etc.)
+        content = re.sub(r'[#*_~`]', '', content)  # Basic formatting characters
+        content = re.sub(r'!\[.*?\]\(.*?\)', '', content)  # Images
+        content = re.sub(r'>\s+', '', content)  # Blockquotes
+        content = re.sub(r'-{3,}', '', content)  # Horizontal rules
+
+        # Split content into sections by paragraphs or full lists
+        sections = re.split(r'(?:\n{2,})', content)  # Splits by double newlines (paragraphs)
+
+        # Process each section
+        section_count = 1
+        for section in sections:
+            section = section.strip()
+
+            # Skip empty sections
+            if not section:
+                continue
+
+            # Check if the section is a list (starts with `-`, `*`, or digit + period)
+            if re.match(r'^(\s*[-*]|\d+\.)', section):
+                # Keep full list as one section by appending following list items
+                list_items = [section]
+                while sections and re.match(r'^(\s*[-*]|\d+\.)', sections[0]):
+                    list_items.append(sections.pop(0))
+                section = "\n".join(list_items)
+
+            # Additional cleaning: Remove any lingering problematic characters
+            section = re.sub(r'[^\x00-\x7F]+', ' ', section)  # Remove non-ASCII characters
+
+            # Write the section to an individual file
+            output_file_path = os.path.join(single_output_dir, f"section_{section_count:03}.md")
+            with open(output_file_path, "w", encoding="utf-8") as out_file:
+                out_file.write(section)
+                self.files_to_process.append(output_file_path)
+
+            section_count += 1