feat: Show spinner/logline during initial feed retrieval (#230)

janw · Jan 4, 2025 · dfa7d3f · dfa7d3f
1 parent f2b89d5
commit dfa7d3f
Show file tree

Hide file tree

Showing 10 changed files with 48 additions and 118 deletions.
diff --git a/.assets/podcast-archiver-dry-run.svg b/.assets/podcast-archiver-dry-run.svg
diff --git a/README.md b/README.md
@@ -103,8 +103,6 @@ To find out if you have to the right feed, you may want to use the `--dry-run` o
 podcast-archiver -f https://feeds.feedburner.com/TheAnthropoceneReviewed --dry-run
 ```
 
-![`podcast-archiver -f https://feeds.feedburner.com/TheAnthropoceneReviewed --dry-run`](.assets/podcast-archiver-dry-run.svg)
-
 ### Using a config file
 
 Podcast Archiver can be configured using a YAML config file as well. This way you can easily add and remove feeds to the list and let the archiver fetch the newest episodes, for example using a daily cronjob.

diff --git a/config.yaml.example b/config.yaml.example
@@ -1,5 +1,5 @@
 ## Podcast-Archiver configuration
-## Generated using podcast-archiver v2.2.0
+## Generated using podcast-archiver v2.2.1
 
 # Field 'feeds': Feed URLs to archive.
 #

diff --git a/podcast_archiver/base.py b/podcast_archiver/base.py
@@ -8,7 +8,6 @@
 from podcast_archiver.config import Settings
 from podcast_archiver.logging import logger, rprint
 from podcast_archiver.processor import FeedProcessor
-from podcast_archiver.utils.progress import progress_manager
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -39,7 +38,6 @@ def _cleanup(signum: int, *args: Any) -> None:
             logger.debug("Signal %s received", signum)
             rprint("✘ Terminating", style="error")
             self.processor.shutdown()
-            progress_manager.stop()
             ctx.close()
             sys.exit(0)
 

diff --git a/podcast_archiver/console.py b/podcast_archiver/console.py
@@ -8,7 +8,7 @@
         "error": "dark_red bold",
         "errorhint": "dark_red dim",
         "warning": "orange1 bold",
-        "warning_hint": "orange1 dim",
+        "warninghint": "orange1 dim",
         "completed": "dark_cyan bold",
         "success": "dark_cyan",
         "present": "dark_cyan",

diff --git a/podcast_archiver/models/feed.py b/podcast_archiver/models/feed.py
@@ -117,7 +117,7 @@ def parse_feed(cls, source: str | bytes, alt_url: str | None, retry: bool = Fals
             return obj
 
         if (fallback_url := obj.feed.alternate_rss) and not retry:
-            logger.info("Attempting to fetch alternate feed at %s", fallback_url)
+            logger.info("Attempting to load alternate feed from '%s'", fallback_url)
             return cls.from_url(fallback_url, retry=True)
 
         url = source if isinstance(source, str) and not alt_url else alt_url

diff --git a/podcast_archiver/processor.py b/podcast_archiver/processor.py
@@ -8,6 +8,7 @@
 
 from podcast_archiver import constants
 from podcast_archiver.config import Settings
+from podcast_archiver.console import console
 from podcast_archiver.database import get_database
 from podcast_archiver.download import DownloadJob
 from podcast_archiver.enums import DownloadResult, QueueCompletionType
@@ -20,8 +21,9 @@
     ProcessingResult,
 )
 from podcast_archiver.urls import registry
-from podcast_archiver.utils import FilenameFormatter, handle_feed_request
+from podcast_archiver.utils import FilenameFormatter, handle_feed_request, sanitize_url
 from podcast_archiver.utils.pretty_printing import PrettyPrintEpisodeRange
+from podcast_archiver.utils.progress import progress_manager
 
 if TYPE_CHECKING:
     from pathlib import Path
@@ -52,10 +54,17 @@ def __init__(self, settings: Settings | None = None, database: BaseDatabase | No
         self.known_feeds = {}
 
     def process(self, url: str, dry_run: bool = False) -> ProcessingResult:
-        if not (feed := self.load_feed(url)):
+        msg = f"Loading feed from '{sanitize_url(url)}' ..."
+        logger.info(msg)
+        with console.status(msg):
+            feed = self.load_feed(url)
+        if not feed:
             return ProcessingResult(feed=None, tombstone=QueueCompletionType.FAILED)
 
-        result = self.process_feed(feed=feed, dry_run=dry_run)
+        action = "Dry-run" if dry_run else "Processing"
+        rprint(f"→ {action}: {feed.info.title}", style="title", markup=False, highlight=False)
+        with progress_manager:
+            result = self.process_feed(feed=feed, dry_run=dry_run)
         rprint(result, end="\n\n")
         return result
 
@@ -107,8 +116,6 @@ def _does_already_exist(self, episode: BaseEpisode, *, target: Path) -> bool:
         return True
 
     def process_feed(self, feed: Feed, dry_run: bool) -> ProcessingResult:
-        action = "Dry-run" if dry_run else "Processing"
-        rprint(f"→ {action}: {feed}", style="title")
         tombstone = QueueCompletionType.COMPLETED
         results: EpisodeResultsList = []
         with PrettyPrintEpisodeRange() as pretty_range:
@@ -120,7 +127,7 @@ def process_feed(self, feed: Feed, dry_run: bool) -> ProcessingResult:
                 exists = isinstance(enqueued, EpisodeResult) and enqueued.result == DownloadResult.ALREADY_EXISTS
                 pretty_range.update(exists, episode)
 
-                if not dry_run:
+                if not dry_run and not exists:
                     results.append(enqueued)
 
                 if (max_count := self.settings.maximum_episode_count) and idx == max_count:

diff --git a/podcast_archiver/utils/__init__.py b/podcast_archiver/utils/__init__.py
@@ -6,6 +6,7 @@
 from functools import partial
 from string import Formatter
 from typing import IO, TYPE_CHECKING, Any, Generator, Iterable, Iterator, Literal, TypedDict, overload
+from urllib.parse import urlparse
 
 from pydantic import ValidationError
 from requests import HTTPError
@@ -177,3 +178,11 @@ def get_field_titles() -> str:
 
     all_field_titles = Episode.field_titles() + FeedInfo.field_titles()
     return "'" + ", '".join(all_field_titles) + "'"
+
+
+def sanitize_url(url: str) -> str:
+    parsed_url = urlparse(url)
+    sanitized_netloc = parsed_url.hostname or ""
+    if parsed_url.port:
+        sanitized_netloc += f":{parsed_url.port}"
+    return parsed_url._replace(netloc=sanitized_netloc, query="").geturl()
diff --git a/podcast_archiver/utils/progress.py b/podcast_archiver/utils/progress.py
@@ -2,7 +2,7 @@
 
 from functools import partial
 from threading import Event, Lock, Thread
-from typing import TYPE_CHECKING, Iterable
+from typing import TYPE_CHECKING, Any, Iterable
 
 from rich import progress as rp
 from rich.table import Column
@@ -133,12 +133,18 @@ def __init__(self) -> None:
             refresh_per_second=8,
         )
 
+    def __enter__(self) -> ProgressManager:
+        self.start()
+        return self
+
+    def __exit__(self, *args: Any) -> None:
+        self.stop()
+
     def track(self, iterable: Iterable[bytes], total: int, episode: BaseEpisode) -> Iterable[bytes]:
         if REDIRECT_VIA_LOGGING:
             yield from iterable
             return
 
-        self.start()
         task_id = self._progress.add_task("downloading", total=total, episode=episode)
         try:
             for it in iterable:
@@ -149,6 +155,8 @@ def track(self, iterable: Iterable[bytes], total: int, episode: BaseEpisode) ->
             self._progress.refresh()
 
     def start(self) -> None:
+        if REDIRECT_VIA_LOGGING:
+            return
         with self._lock:
             if self._started:
                 return

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,6 +1,6 @@
 import pytest
 
-from podcast_archiver.utils import truncate
+from podcast_archiver.utils import sanitize_url, truncate
 
 
 @pytest.mark.parametrize(
@@ -13,3 +13,15 @@
 )
 def test_truncate(input_str: str, expected_output: str) -> None:
     assert truncate(input_str, 20) == expected_output
+
+
+@pytest.mark.parametrize(
+    "url, expected_sanitized",
+    [
+        ("https://example.com", "https://example.com"),
+        ("https://foo:bar@example.com/baz", "https://example.com/baz"),
+        ("https://foo@example.com/baz?api-key=1234", "https://example.com/baz"),
+    ],
+)
+def test_sanitize_url(url: str, expected_sanitized: str) -> None:
+    assert sanitize_url(url) == expected_sanitized