From 18f1d4c93af464c5e8e3bc3a8c1420e2460649ec Mon Sep 17 00:00:00 2001 From: Jan Willhaus Date: Tue, 31 Dec 2024 14:27:56 +0100 Subject: [PATCH] docs: Improve usage documentation (#224) --- .pre-commit-config.yaml | 9 +++ README.md | 75 +++++++++++++++---------- config.yaml.example | 109 +++++++++++++++++++++++++++++++++++++ hack/config-generate.sh | 9 +++ podcast_archiver/config.py | 4 +- 5 files changed, 175 insertions(+), 31 deletions(-) create mode 100644 config.yaml.example create mode 100755 hack/config-generate.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c0007f5..ad3cdf1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,3 +55,12 @@ repos: require_serial: true pass_filenames: false types: [python] + + - id: config-generate + name: config-generate + entry: ./hack/config-generate.sh + language: system + require_serial: true + pass_filenames: false + files: ^podcast_archiver/config\.py$ + types: [python] diff --git a/README.md b/README.md index 3933ea0..df68f32 100644 --- a/README.md +++ b/README.md @@ -52,20 +52,15 @@ By default, the docker image downloads episodes to a volume mounted at `/archive ## Usage -Run `podcast-archiver --help` for details on how to use it: - -![`podcast-archiver --help`](.assets/podcast-archiver-help.svg) - -### Example invocation +Podcast Archiver supports command line arguments, environment variables, and a config file to set it up. The most simple invocation, passing feeds as command line arguments, would like like this: ```sh -podcast-archiver -d ~/Music/Podcasts \ - -f https://logbuch-netzpolitik.de/feed/mp3/ \ - -f https://raumzeit-podcast.de/feed/mp3/ \ - -f https://feeds.lagedernation.org/feeds/ldn-mp3.xml +podcast-archiver --dir ~/Podcasts --feed https://feeds.feedburner.com/TheAnthropoceneReviewed ``` -This way, you can easily add and remove feeds to the list and let the archiver fetch the newest episodes for example by adding it to your crontab. +### What constitutes a "feed" + +Podcast Archiver expects values to its `--feed/-f` argument to be URLs pointing to an [RSS feed of a podcast](https://archive.is/jYk3E). Feeds can also be "fetched" from a local file: @@ -73,7 +68,45 @@ Feeds can also be "fetched" from a local file: podcast-archiver -f file:/Users/janw/downloaded_feed.xml ``` -#### Continuous mode +### Using a config file + +Podcast Archiver can be configured using a YAML config file as well. This way you can easily add and remove feeds to the list and let the archiver fetch the newest episodes, for example using a daily cronjob. + +A simple config file can look like this: + +```yaml +archive_directory: ~/Podcasts +filename_template: '{show.title}/{episode.published_time:%Y-%m-%d} - {episode.title}.{ext}' +write_info_json: true +feeds: + - https://feeds.feedburner.com/TheAnthropoceneReviewed # The Anthropocene Reviewed + - https://feeds.megaphone.fm/heavyweight-spot # Heavyweight +``` + +To create a config file, you may use `podcast-archiver --config-generate` to emit an example configuration locally. You can also find a [pre-populated config file here](/~https://github.com/janw/podcast-archiver/blob/main/config.yaml.example). The example config contains descriptions and default values for all available parameters. After modifying it to your liking, you can invoke the archiver by bassing the config file as a command line argument: + +```sh +podcast-archiver --config config.yaml +``` + +Alternatively (for example, if you're running `podcast-archiver` in Docker), you may point it to the config file using the `PODCAST_ARCHIVER_CONFIG=path/to/config.yaml` environment variable. + +If the `--config` parameter is omitted, the archiver will look for a config file in its app config directory. The location of this directory is OS-specific; it is printed with the `podcast-archiver --help` output (next to the `--config` option help text). + +### Using environment variables + +Most settings of Podcast Archiver are available as environment variables, too. Check `podcast-archiver --help` for options with `env var: …` next to them. + +```sh +export PODCAST_ARCHIVER_FEEDS='https://feeds.feedburner.com/TheAnthropoceneReviewed' # multiple must be space-separated +export PODCAST_ARCHIVER_ARCHIVE_DIRECTORY="$HOME/Podcasts" + +podcast-archiver +``` + +## Advanced use + +### Continuous mode When the `--sleep-seconds` option is set to a non-zero value, Podcast Archiver operates in continuous mode. After successfully populating the archive, it will not terminate but rather sleep for the given number of seconds until it refreshes the feeds again and downloads episodes that have been published in the meantime. @@ -127,22 +160,8 @@ Note here that `episode.published_time` is a Python-native datetime, so its exac Results in `…/That Show/2023-03-12 ts001-episodefilename.mp3` -### Using a config file - -Command line arguments can be replaced with entries in a YAML configuration file. An example config can be generated with - -```bash -podcast-archiver --config-generate > config.yaml -``` - -After modifying the settings to your liking, `podcast-archiver` can be run with - -```bash -podcast-archiver --config config.yaml -``` - -Alternatively (for example, if you're running `podcast-archiver` in Docker), you may point it to the config file using the `PODCAST_ARCHIVER_CONFIG=path/to/config.yaml` environment variable. +### All available options -### Using environment variables +Run `podcast-archiver --help` to see all available parameters and the corresponding environment variables. -Some settings of Podcast Archiver are available as environment variables, too. Check `podcast-archiver --help` for options with `env var: …` next to them. +![`podcast-archiver --help`](.assets/podcast-archiver-help.svg) diff --git a/config.yaml.example b/config.yaml.example new file mode 100644 index 0000000..db5e3c1 --- /dev/null +++ b/config.yaml.example @@ -0,0 +1,109 @@ +## Podcast-Archiver configuration +## Generated using podcast-archiver v2.0.0 + +# Field 'feeds': Feed URLs to archive. +# +# Equivalent command line option: --feeds +# +feeds: [] + +# Field 'opml_files': OPML files containing feed URLs to archive. OPML files can +# be exported from a variety of podcatchers. +# +# Equivalent command line option: --opml-files +# +opml_files: [] + +# Field 'archive_directory': Directory to which to download the podcast archive. +# By default, the archive will be created in the current working directory +# ('.'). +# +# Equivalent command line option: --archive-directory +# +archive_directory: "." + +# Field 'write_info_json': Write episode metadata to a .info.json file next to +# the media file itself. +# +# Equivalent command line option: --write-info-json +# +write_info_json: false + +# Field 'quiet': Print only minimal progress information. Errors will always be +# emitted. +# +# Equivalent command line option: --quiet +# +quiet: false + +# Field 'verbose': Increase the level of verbosity while downloading. Can be +# passed multiple times. Increased verbosity and non-interactive execution (in +# a cronjob, docker compose, etc.) will disable progress bars. Non-interactive +# execution also always raises the verbosity unless --quiet is passed. +# +# Equivalent command line option: --verbose +# +verbose: 0 + +# Field 'slugify_paths': Format filenames in the most compatible way, replacing +# all special characters. +# +# Equivalent command line option: --slugify-paths +# +slugify_paths: false + +# Field 'filename_template': Template to be used when generating filenames. +# Available template variables are: 'episode.title, 'episode.published_time, +# 'episode.original_filename, 'episode.subtitle, 'show.title, 'show.subtitle, +# 'show.author, 'show.language', and 'ext' (the filename extension) +# +# Equivalent command line option: --filename-template +# +filename_template: "{show.title}/{episode.published_time:%Y-%m-%d} - {episode.title}.{ext}" + +# Field 'maximum_episode_count': Only download the given number of episodes per +# podcast feed. Useful if you don't really need the entire backlog. +# +# Equivalent command line option: --maximum-episode-count +# +maximum_episode_count: 0 + +# Field 'concurrency': Maximum number of simultaneous downloads. +# +# Equivalent command line option: --concurrency +# +concurrency: 4 + +# Field 'debug_partial': Download only the first 1048576 bytes of episodes for +# debugging purposes. +# +# Equivalent command line option: --debug-partial +# +debug_partial: false + +# Field 'database': Location of the database to keep track of downloaded +# episodes. By default, the database will be created as 'podcast-archiver.db' +# in the directory of the config file. +# +# Equivalent command line option: --database +# +database: null + +# Field 'ignore_database': Ignore the episodes database when downloading. This +# will cause files to be downloaded again, even if they already exist in the +# database. +# +# Equivalent command line option: --ignore-database +# +ignore_database: false + +# Field 'sleep_seconds': Run podcast-archiver continuously. Set to a non-zero +# number of seconds to sleep after all available episodes have been +# downloaded. Otherwise the application exits after all downloads have been +# completed. +# +# Equivalent command line option: --sleep-seconds +# +sleep_seconds: 0 + +# vim:syntax=yaml diff --git a/hack/config-generate.sh b/hack/config-generate.sh new file mode 100755 index 0000000..8c1c328 --- /dev/null +++ b/hack/config-generate.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +export CI=1 + +{ + poetry run podcast-archiver --config-generate + echo + echo '# vim:syntax=yaml' +} > config.yaml.example diff --git a/podcast_archiver/config.py b/podcast_archiver/config.py index 4cfbcab..31ffafc 100644 --- a/podcast_archiver/config.py +++ b/podcast_archiver/config.py @@ -3,7 +3,6 @@ import pathlib import sys import textwrap -from datetime import datetime from os import getenv from typing import IO, TYPE_CHECKING, Any, Text @@ -196,12 +195,11 @@ def get_option_name(name: str, field: FieldInfo) -> str: @classmethod def generate_default_config(cls, file: IO[Text] | None = None) -> None: - now = datetime.now().replace(microsecond=0).astimezone() wrapper = textwrap.TextWrapper(width=80, initial_indent="# ", subsequent_indent="# ") lines = [ f"## {constants.PROG_NAME.title()} configuration", - f"## Generated with {constants.PROG_NAME} {version} at {now}", + f"## Generated using {constants.PROG_NAME} {version}", ] for name, field in cls.model_fields.items():