From a9b1ea93304f430a1fde29182a6ce7ef31987cdc Mon Sep 17 00:00:00 2001 From: Spoked <5782630+dreulavelle@users.noreply.github.com> Date: Mon, 1 Jul 2024 05:20:18 -0500 Subject: [PATCH] Release 0.7.5 (#478) * feat: added log to show total processing scraped results * Add Zilean scraper: untested - Spoked can test :D (#473) Zilean is a service that allows you to search for DebridMediaManager sourced arr-less content. When the service is started, it will automatically download and index all the DMM shared hashlists and index them using Lucene. The service provides a search endpoint that allows you to search for content using a query string, and returns a list of filenames and infohashes. There is no clean filtering applied to the search results, the idea behind this endpoint is Riven performs that using RTN. The DMM import reruns on missing pages every hour. Its written exclusively for Riven /~https://github.com/iPromKnight/zilean * feat: Zilean support * Update to include get_top_title() as well as Show (#475) * Update zilean.py Allow shows in zilean * Update zilean.py change title to get_top_title() * feat: update with new changes * feat: update with new changes * Jacket Fixes and anime show fixes (#466) * Fixed anime identification * Improved jackett accuracy * Typo * Rewrote some of trakt indexer added new class method to show that reindex values accross episodes and seasons. Fixed weird titles * fixed propagate * Removed debug * Removed year param, now determine manually as it gives more accurate results * Changed to show year instead of air year for each item * Added back in validation * Created a new function to grab the season year. Changed jackett matching to support it as well. * Typo * Fixed poetry lock hash --------- Co-authored-by: Joshua Co-authored-by: Spoked <5782630+dreulavelle@users.noreply.github.com> * fix: sort imports * fix: revert jackett. plex watchlist rss feeds broken. --------- Co-authored-by: Spoked Co-authored-by: iPromKnight <156901906+iPromKnight@users.noreply.github.com> Co-authored-by: dextrous0z <139093885+dextrous0z@users.noreply.github.com> Co-authored-by: Joshua --- VERSION | 2 +- backend/controllers/default.py | 61 +++++++++ backend/main.py | 1 + backend/program/content/plex_watchlist.py | 11 +- backend/program/indexers/trakt.py | 109 +++++++--------- backend/program/media/item.py | 71 +++++++++- backend/program/program.py | 6 +- backend/program/scrapers/__init__.py | 152 +++++----------------- backend/program/scrapers/annatar.py | 10 +- backend/program/scrapers/knightcrawler.py | 15 +-- backend/program/scrapers/mediafusion.py | 11 +- backend/program/scrapers/orionoid.py | 78 ++++++----- backend/program/scrapers/shared.py | 119 +++++++++++++++++ backend/program/scrapers/zilean.py | 110 ++++++++++++++++ backend/program/settings/models.py | 56 ++++++-- backend/program/state_transition.py | 5 +- backend/program/types.py | 6 +- backend/program/updaters/__init__.py | 20 +-- backend/program/updaters/local.py | 29 +++-- backend/program/updaters/plex.py | 7 +- backend/utils/logger.py | 10 +- backend/utils/request.py | 32 +++-- poetry.lock | 33 +++-- pyproject.toml | 1 + 24 files changed, 653 insertions(+), 302 deletions(-) create mode 100644 backend/program/scrapers/shared.py create mode 100644 backend/program/scrapers/zilean.py diff --git a/VERSION b/VERSION index ef090a6c..da2ac9c7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.7.4 \ No newline at end of file +0.7.5 \ No newline at end of file diff --git a/backend/controllers/default.py b/backend/controllers/default.py index 414d99b3..7676d801 100644 --- a/backend/controllers/default.py +++ b/backend/controllers/default.py @@ -1,6 +1,10 @@ +import time + import requests from fastapi import APIRouter, HTTPException, Request from program.content.trakt import TraktContent +from program.media.state import States +from program.scrapers import Scraping from program.settings.manager import settings_manager router = APIRouter( @@ -90,3 +94,60 @@ async def trakt_oauth_callback(code: str, request: Request): return {"success": True, "message": "OAuth token obtained successfully"} else: raise HTTPException(status_code=400, detail="Failed to obtain OAuth token") + + +@router.get("/stats") +async def get_stats(request: Request): + payload = {} + + total_items = len(request.app.program.media_items._items) + total_movies = len(request.app.program.media_items._movies) + total_shows = len(request.app.program.media_items._shows) + total_seasons = len(request.app.program.media_items._seasons) + total_episodes = len(request.app.program.media_items._episodes) + + _incomplete_items = request.app.program.media_items.get_incomplete_items() + + incomplete_retries = {} + for _, item in _incomplete_items.items(): + incomplete_retries[item.log_string] = item.scraped_times + + states = {} + for state in States: + states[state] = request.app.program.media_items.count(state) + + payload["total_items"] = total_items + payload["total_movies"] = total_movies + payload["total_shows"] = total_shows + payload["total_seasons"] = total_seasons + payload["total_episodes"] = total_episodes + payload["incomplete_items"] = len(_incomplete_items) + payload["incomplete_retries"] = incomplete_retries + payload["states"] = states + + return {"success": True, "data": payload} + +@router.get("/scrape/{item_id:path}") +async def scrape_item(item_id: str, request: Request): + item = request.app.program.media_items.get_item(item_id) + if item is None: + raise HTTPException(status_code=404, detail="Item not found") + + scraper = request.app.program.services.get(Scraping) + if scraper is None: + raise HTTPException(status_code=404, detail="Scraping service not found") + + time_now = time.time() + scraped_results = scraper.scrape(item, log=False) + time_end = time.time() + duration = time_end - time_now + + results = {} + for hash, torrent in scraped_results.items(): + results[hash] = { + "title": torrent.data.parsed_title, + "raw_title": torrent.raw_title, + "rank": torrent.rank, + } + + return {"success": True, "total": len(results), "duration": round(duration, 3), "results": results} \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index 61255781..a7a1be70 100644 --- a/backend/main.py +++ b/backend/main.py @@ -9,6 +9,7 @@ import uvicorn from controllers.default import router as default_router from controllers.items import router as items_router + # from controllers.metrics import router as metrics_router from controllers.settings import router as settings_router from controllers.tmdb import router as tmdb_router diff --git a/backend/program/content/plex_watchlist.py b/backend/program/content/plex_watchlist.py index c815bf85..2ea24676 100644 --- a/backend/program/content/plex_watchlist.py +++ b/backend/program/content/plex_watchlist.py @@ -16,10 +16,10 @@ def __init__(self): self.key = "plex_watchlist" self.rss_enabled = False self.settings = settings_manager.settings.content.plex_watchlist + self.token = settings_manager.settings.updaters.plex.token self.initialized = self.validate() if not self.initialized: return - self.token = settings_manager.settings.plex.token self.recurring_items = set() logger.success("Plex Watchlist initialized!") @@ -27,6 +27,9 @@ def validate(self): if not self.settings.enabled: logger.warning("Plex Watchlists is set to disabled.") return False + if not self.token: + logger.error("Plex token is not set!") + return False if self.settings.rss: for rss_url in self.settings.rss: try: @@ -68,6 +71,7 @@ def run(self) -> Generator[Union[Movie, Show, Season, Episode], None, None]: self.recurring_items.add(imdb_id) yield items + def _get_items_from_rss(self) -> Generator[MediaItem, None, None]: """Fetch media from Plex RSS Feeds.""" for rss_url in self.settings.rss: @@ -76,8 +80,7 @@ def _get_items_from_rss(self) -> Generator[MediaItem, None, None]: if not response.is_ok: logger.error(f"Failed to fetch Plex RSS feed from {rss_url}: HTTP {response.status_code}") continue - for item in response.data.items: - yield from self._extract_imdb_ids(item.guids) + yield self._extract_imdb_ids(response.data.channel.item.guid) except Exception as e: logger.error(f"An unexpected error occurred while fetching Plex RSS feed from {rss_url}: {e}") @@ -102,7 +105,7 @@ def _get_items_from_watchlist(self) -> Generator[MediaItem, None, None]: @staticmethod def _ratingkey_to_imdbid(ratingKey: str) -> str: """Convert Plex rating key to IMDb ID""" - token = settings_manager.settings.plex.token + token = settings_manager.settings.updaters.plex.token filter_params = "includeGuids=1&includeFields=guid,title,year&includeElements=Guid" url = f"https://metadata.provider.plex.tv/library/metadata/{ratingKey}?X-Plex-Token={token}&{filter_params}" response = get(url) diff --git a/backend/program/indexers/trakt.py b/backend/program/indexers/trakt.py index 50cfec16..52221c02 100644 --- a/backend/program/indexers/trakt.py +++ b/backend/program/indexers/trakt.py @@ -1,7 +1,7 @@ """Trakt updater module""" from datetime import datetime, timedelta -from typing import Generator, List, Optional, Union +from typing import Generator, Optional, Union from program.media.item import Episode, MediaItem, Movie, Season, Show from program.settings.manager import settings_manager @@ -17,39 +17,47 @@ class TraktIndexer: def __init__(self): self.key = "traktindexer" - self.ids = [] self.initialized = True self.settings = settings_manager.settings.indexer - def copy_items(self, itema: MediaItem, itemb: MediaItem): + def copy_items(self, itema: MediaItem, itemb: MediaItem) -> MediaItem: if isinstance(itema, Show) and isinstance(itemb, Show): - for (seasona, seasonb) in zip(itema.seasons, itemb.seasons): - for (episodea, episodeb) in zip(seasona.episodes, seasonb.episodes): - episodeb.set("update_folder", episodea.update_folder) - episodeb.set("symlinked", episodea.symlinked) - episodeb.set("is_anime", episodea.is_anime) + for seasona, seasonb in zip(itema.seasons, itemb.seasons): + for episodea, episodeb in zip(seasona.episodes, seasonb.episodes): + self._copy_episode_attributes(episodea, episodeb) elif isinstance(itema, Movie) and isinstance(itemb, Movie): - itemb.set("update_folder", itema.update_folder) - itemb.set("symlinked", itema.symlinked) - itemb.set("is_anime", itema.is_anime) + self._copy_movie_attributes(itema, itemb) return itemb - + + @staticmethod + def _copy_episode_attributes(source: Episode, target: Episode) -> None: + target.update_folder = source.update_folder + target.symlinked = source.symlinked + target.is_anime = source.is_anime + + @staticmethod + def _copy_movie_attributes(source: Movie, target: Movie) -> None: + target.update_folder = source.update_folder + target.symlinked = source.symlinked + target.is_anime = source.is_anime + def run(self, in_item: MediaItem) -> Generator[Union[Movie, Show, Season, Episode], None, None]: """Run the Trakt indexer for the given item.""" if not in_item: logger.error("Item is None") return - if (imdb_id := in_item.imdb_id) is None: - logger.error(f"Item {item.log_string} does not have an imdb_id, cannot index it") + if not (imdb_id := in_item.imdb_id): + logger.error(f"Item {in_item.log_string} does not have an imdb_id, cannot index it") return - - item = create_item_from_imdb_id(imdb_id) + item = create_item_from_imdb_id(imdb_id) if not isinstance(item, MediaItem): logger.error(f"Failed to get item from imdb_id: {imdb_id}") return + if isinstance(item, Show): self._add_seasons_to_show(item, imdb_id) + item = self.copy_items(in_item, item) item.indexed_at = datetime.now() yield item @@ -65,7 +73,7 @@ def should_submit(item: MediaItem) -> bool: interval = timedelta(seconds=settings.update_interval) return datetime.now() - item.indexed_at > interval except Exception: - logger.error(f"Failed to parse date: {item.indexed_at} with format: {interval}") + logger.error(f"Failed to parse date: {item.indexed_at}") return False @staticmethod @@ -78,46 +86,49 @@ def _add_seasons_to_show(show: Show, imdb_id: str): if not imdb_id or not imdb_id.startswith("tt"): logger.error(f"Item {show.log_string} does not have an imdb_id, cannot index it") return - + + seasons = get_show(imdb_id) for season in seasons: if season.number == 0: continue - season_item = _map_item_from_data(season, "season", show.genres) + season_item = _map_item_from_data(season, "season") if season_item: - for episode in season.episodes: - episode_item = _map_item_from_data(episode, "episode", show.genres) + for episode_data in season.episodes: + episode_item = _map_item_from_data(episode_data, "episode") if episode_item: season_item.add_episode(episode_item) show.add_season(season_item) + # Propagate important global attributes to seasons and episodes + show.propagate_attributes_to_childs() -def _map_item_from_data(data, item_type: str, show_genres: List[str] = None) -> Optional[MediaItem]: +def _map_item_from_data(data, item_type: str) -> Optional[MediaItem]: """Map trakt.tv API data to MediaItemContainer.""" if item_type not in ["movie", "show", "season", "episode"]: - logger.debug(f"Unknown item type {item_type} for {data.title} not found in list of acceptable items") + logger.debug(f"Unknown item type {item_type} for {data.title}") return None formatted_aired_at = _get_formatted_date(data, item_type) - genres = getattr(data, "genres", None) or show_genres + year = getattr(data, "year", None) or (formatted_aired_at.year if formatted_aired_at else None) item = { "title": getattr(data, "title", None), - "year": getattr(data, "year", None), + "year": year, "status": getattr(data, "status", None), "aired_at": formatted_aired_at, "imdb_id": getattr(data.ids, "imdb", None), "tvdb_id": getattr(data.ids, "tvdb", None), "tmdb_id": getattr(data.ids, "tmdb", None), - "genres": genres, + "genres": getattr(data, "genres", None), "network": getattr(data, "network", None), "country": getattr(data, "country", None), "language": getattr(data, "language", None), - "requested_at": datetime.now(), + "requested_at": datetime.now(), } - + item["is_anime"] = ( - ("anime" in genres or "animation" in genres) if genres + ("anime" in item['genres'] or "animation" in item['genres']) if item['genres'] and item["country"] in ("jp", "kr") else False ) @@ -134,17 +145,14 @@ def _map_item_from_data(data, item_type: str, show_genres: List[str] = None) -> item["number"] = data.number return Episode(item) case _: - logger.error(f"Unknown item type {item_type} for {data.title} not found in list of acceptable items") + logger.error(f"Failed to create item from data: {data}") return None - def _get_formatted_date(data, item_type: str) -> Optional[datetime]: """Get the formatted aired date from the data.""" - if item_type in ["show", "season", "episode"] and (first_aired := getattr(data, "first_aired", None)): - return datetime.strptime(first_aired, "%Y-%m-%dT%H:%M:%S.%fZ") - if item_type == "movie" and (released := getattr(data, "released", None)): - return datetime.strptime(released, "%Y-%m-%d") - return None + date_str = getattr(data, "first_aired" if item_type in ["show", "season", "episode"] else "released", None) + date_format = "%Y-%m-%dT%H:%M:%S.%fZ" if item_type in ["show", "season", "episode"] else "%Y-%m-%d" + return datetime.strptime(date_str, date_format) if date_str else None def get_show(imdb_id: str) -> dict: @@ -159,20 +167,11 @@ def create_item_from_imdb_id(imdb_id: str) -> Optional[MediaItem]: url = f"https://api.trakt.tv/search/imdb/{imdb_id}?extended=full" response = get(url, additional_headers={"trakt-api-version": "2", "trakt-api-key": CLIENT_ID}) if not response.is_ok or not response.data: - logger.error(f"Failed to create item using imdb id: {imdb_id}") # This returns an empty list for response.data - return None - - def find_first(preferred_types, data): - for type in preferred_types: - for d in data: - if d.type == type: - return d + logger.error(f"Failed to create item using imdb id: {imdb_id}") return None - data = find_first(["show", "movie", "season", "episode"], response.data) - if data: - return _map_item_from_data(getattr(data, data.type), data.type) - return None + data = next((d for d in response.data if d.type in ["show", "movie", "season", "episode"]), None) + return _map_item_from_data(getattr(data, data.type), data.type) if data else None def get_imdbid_from_tmdb(tmdb_id: str) -> Optional[str]: """Wrapper for trakt.tv API search method.""" @@ -180,16 +179,4 @@ def get_imdbid_from_tmdb(tmdb_id: str) -> Optional[str]: response = get(url, additional_headers={"trakt-api-version": "2", "trakt-api-key": CLIENT_ID}) if not response.is_ok or not response.data: return None - imdb_id = get_imdb_id_from_list(response.data) - if imdb_id: - return imdb_id - logger.error(f"Failed to fetch imdb_id for tmdb_id: {tmdb_id}") - return None - -def get_imdb_id_from_list(namespaces): - for ns in namespaces: - if ns.type == 'movie': - return ns.movie.ids.imdb - elif ns.type == 'show': - return ns.show.ids.imdb - return None + return next((ns.movie.ids.imdb if ns.type == 'movie' else ns.show.ids.imdb for ns in response.data if ns.type in ['movie', 'show']), None) diff --git a/backend/program/media/item.py b/backend/program/media/item.py index 4ef374ec..6fad930c 100644 --- a/backend/program/media/item.py +++ b/backend/program/media/item.py @@ -1,3 +1,4 @@ +import re from dataclasses import dataclass from datetime import datetime, timedelta from typing import List, Optional, Self @@ -5,6 +6,7 @@ from program.media.state import States from RTN import Torrent from RTN.patterns import extract_episodes +from unidecode import unidecode from utils.logger import logger @@ -51,7 +53,7 @@ def __init__(self, item: dict) -> None: self.parent: Optional[Self] = None # Media related - self.title: Optional[str] = item.get("title", None) + self.title: Optional[str] = self.clean_title(item.get("title", None)) self.imdb_id: Optional[str] = item.get("imdb_id", None) if self.imdb_id: self.imdb_link: Optional[str] = f"https://www.imdb.com/title/{self.imdb_id}/" @@ -63,6 +65,7 @@ def __init__(self, item: dict) -> None: self.country: Optional[str] = item.get("country", None) self.language: Optional[str] = item.get("language", None) self.aired_at: Optional[datetime] = item.get("aired_at", None) + self.year: Optional[int] = item.get("year" , None) self.genres: Optional[List[str]] = item.get("genres", []) # Plex related @@ -116,6 +119,18 @@ def _determine_state(self): return States.Requested return States.Unknown + def clean_title(self, title: Optional[str]) -> Optional[str]: + """Clean the title by removing non-alphanumeric characters and mapping special characters.""" + if title is None: + return None + # Convert special characters to closest ASCII equivalents + title = unidecode(title) + # Replace non-alphanumeric characters with spaces + title = re.sub(r'[^a-zA-Z0-9]', ' ', title) + # Remove extra spaces + title = re.sub(r'\s+', ' ', title).strip() + return title + def copy_other_media_attr(self, other): """Copy attributes from another media item.""" self.title = getattr(other, "title", None) @@ -125,6 +140,7 @@ def copy_other_media_attr(self, other): self.country = getattr(other, "country", None) self.language = getattr(other, "language", None) self.aired_at = getattr(other, "aired_at", None) + self.year = getattr(other, "year", None) self.genres = getattr(other, "genres", []) self.is_anime = getattr(other, "is_anime", False) self.overseerr_id = getattr(other, "overseerr_id", None) @@ -157,6 +173,7 @@ def to_dict(self): "state": self.state.value, "imdb_link": self.imdb_link if hasattr(self, "imdb_link") else None, "aired_at": self.aired_at, + "year": self.year if hasattr(self, "year") else None, "genres": self.genres if hasattr(self, "genres") else None, "is_anime": self.is_anime if hasattr(self, "is_anime") else False, "guid": self.guid, @@ -231,6 +248,26 @@ def get_top_title(self) -> str: case _: return self.title + def get_top_year(self) -> Optional[int]: + """Get the top year of the item.""" + match self.__class__.__name__: + case "Season": + return self.parent.year + case "Episode": + return self.parent.parent.year + case _: + return self.year + + def get_season_year(self) -> Optional[int]: + """Get the season title of the item if show return nothing""" + match self.__class__.__name__: + case "Season": + return self.year + case "Episode": + return self.parent.year + case _: + return None + def __hash__(self): return hash(self.item_id) @@ -267,6 +304,7 @@ def __init__(self, item): self.locations = item.get("locations", []) self.seasons: list[Season] = item.get("seasons", []) self.item_id = ItemId(self.imdb_id) + self.propagate_attributes_to_childs() def get_season_index_by_id(self, item_id): """Find the index of an season by its item_id.""" @@ -321,6 +359,24 @@ def add_season(self, season): season.item_id.parent_id = self.item_id self.seasons = sorted(self.seasons, key=lambda s: s.number) + def propagate_attributes_to_childs(self): + """Propagate show attributes to seasons and episodes if they are empty or do not match.""" + # Important attributes that need to be connected. + attributes = ["genres", "country", "network", "language", "is_anime"] + + def propagate(target, source): + for attr in attributes: + source_value = getattr(source, attr, None) + target_value = getattr(target, attr, None) + # Check if the attribute source is not falsy (none, false, 0, []) + # and if the target is not None we set the source to the target + if (not target_value) and source_value is not None: + setattr(target, attr, source_value) + + for season in self.seasons: + propagate(season, self) + for episode in season.episodes: + propagate(episode, self) class Season(MediaItem): """Season class""" @@ -399,6 +455,13 @@ def log_string(self): def get_top_title(self) -> str: return self.parent.title + def get_top_year(self) -> Optional[int]: + return self.parent.year + + def get_season_year(self) -> Optional[int]: + return self.year + + class Episode(MediaItem): """Episode class""" @@ -436,6 +499,12 @@ def log_string(self): def get_top_title(self) -> str: return self.parent.parent.title + def get_top_year(self) -> Optional[int]: + return self.parent.parent.year + + def get_season_year(self) -> Optional[int]: + return self.parent.year + def _set_nested_attr(obj, key, value): if "." in key: diff --git a/backend/program/program.py b/backend/program/program.py index 734fc584..4a0c5b40 100644 --- a/backend/program/program.py +++ b/backend/program/program.py @@ -20,7 +20,7 @@ from program.scrapers import Scraping from program.settings.manager import settings_manager from program.settings.models import get_version -from program.updaters.plex import PlexUpdater +from program.updaters import Updater from utils import data_dir_path from utils.logger import logger, scrub_logs @@ -63,9 +63,9 @@ def initialize_services(self): } self.indexing_services = {TraktIndexer: TraktIndexer()} self.processing_services = { - Scraping: Scraping(hash_cache), + Scraping: Scraping(), Symlinker: Symlinker(self.media_items), - PlexUpdater: PlexUpdater(), + Updater: Updater(), } self.downloader_services = { Debrid: Debrid(hash_cache), diff --git a/backend/program/scrapers/__init__.py b/backend/program/scrapers/__init__.py index 5836abba..81263fd9 100644 --- a/backend/program/scrapers/__init__.py +++ b/backend/program/scrapers/__init__.py @@ -1,8 +1,9 @@ -from copy import copy import threading +from copy import copy from datetime import datetime -from typing import Dict, Generator, List, Set, Union +from typing import Dict, Generator, List, Union +from program.cache import HashCache from program.media.item import Episode, MediaItem, Movie, Season, Show from program.media.state import States from program.scrapers.annatar import Annatar @@ -11,24 +12,20 @@ from program.scrapers.mediafusion import Mediafusion from program.scrapers.orionoid import Orionoid from program.scrapers.prowlarr import Prowlarr +from program.scrapers.shared import _parse_results from program.scrapers.torbox import TorBoxScraper from program.scrapers.torrentio import Torrentio +from program.scrapers.zilean import Zilean from program.settings.manager import settings_manager -from program.settings.versions import models -from RTN import RTN, Torrent, sort_torrents -from RTN.exceptions import GarbageTorrent +from RTN import Torrent from utils.logger import logger class Scraping: - def __init__(self, hash_cache): + def __init__(self): self.key = "scraping" self.initialized = False self.settings = settings_manager.settings.scraping - self.settings_model = settings_manager.settings.ranking - self.ranking_model = models.get(self.settings_model.profile) - self.rtn = RTN(self.settings_model, self.ranking_model) - self.hash_cache = hash_cache self.services = { Annatar: Annatar(), Torrentio: Torrentio(), @@ -37,7 +34,8 @@ def __init__(self, hash_cache): Jackett: Jackett(), TorBoxScraper: TorBoxScraper(), Mediafusion: Mediafusion(), - Prowlarr: Prowlarr() + Prowlarr: Prowlarr(), + Zilean: Zilean() } self.initialized = self.validate() if not self.initialized: @@ -48,10 +46,10 @@ def validate(self): def yield_incomplete_children(self, item: MediaItem) -> Union[List[Season], List[Episode]]: if isinstance(item, Season): - res = [e for e in item.episodes if e.state != States.Completed and e.is_released] + res = [e for e in item.episodes if e.state != States.Completed and e.is_released and self.should_submit(e)] return res if isinstance(item, Show): - res = [s for s in item.seasons if s.state != States.Completed and s.is_released] + res = [s for s in item.seasons if s.state != States.Completed and s.is_released and self.should_submit(s)] return res return None @@ -59,16 +57,16 @@ def partial_state(self, item: MediaItem) -> bool: if item.state != States.PartiallyCompleted: return False if isinstance(item, Show): - sres = [s for s in item.seasons if s.state != States.Completed and s.is_released] + sres = [s for s in item.seasons if s.state != States.Completed and s.is_released and self.should_submit(s)] res = [] for s in sres: - if all(episode.is_released == True and episode.state != States.Completed for episode in s.episodes): + if all(episode.is_released and episode.state != States.Completed for episode in s.episodes): res.append(s) else: - res = res + [e for e in s.episodes if e.is_released == True and e.state != States.Completed] + res = res + [e for e in s.episodes if e.is_released and e.state != States.Completed] return res if isinstance(item, Season): - return [e for e in s.episodes if e.is_release == True] + return [e for e in item.episodes if e.is_released] return item def run(self, item: Union[Show, Season, Episode, Movie]) -> Generator[Union[Show, Season, Episode, Movie], None, None]: @@ -82,11 +80,26 @@ def run(self, item: Union[Show, Season, Episode, Movie]) -> Generator[Union[Show yield partial_state return + sorted_streams = self.scrape(item) + + # Set the streams and yield the item + item.streams.update(sorted_streams) + item.set("scraped_at", datetime.now()) + item.set("scraped_times", item.scraped_times + 1) + + if not item.get("streams", {}): + logger.log("NOT_FOUND", f"Scraping returned no good results for {item.log_string}") + yield self.yield_incomplete_children(item) + return + + yield item + + def scrape(self, item: MediaItem, log = True) -> Dict[str, Torrent]: + """Scrape an item.""" threads: List[threading.Thread] = [] results: Dict[str, str] = {} results_lock = threading.Lock() - # Wondering if we do this, or just create a dict with some attrs instead item_copy = copy(item) def run_service(service, item_copy): @@ -103,29 +116,17 @@ def run_service(service, item_copy): for thread in threads: thread.join() - # Parse the results into Torrent objects - sorted_streams: Dict[str, Torrent] = self._parse_results(item_copy, results) + sorted_streams: Dict[str, Torrent] = _parse_results(item_copy, results) - # For debug purposes: - if sorted_streams and settings_manager.settings.debug: + if sorted_streams and (log and settings_manager.settings.debug): item_type = item.type.title() - for _, sorted_tor in sorted_streams.items(): + top_results = sorted(sorted_streams.values(), key=lambda x: x.rank, reverse=True)[:10] + for sorted_tor in top_results: if isinstance(item, (Season, Episode)): logger.debug(f"[{item_type} {item.number}] Parsed '{sorted_tor.data.parsed_title}' with rank {sorted_tor.rank} and ratio {sorted_tor.lev_ratio:.2f}: '{sorted_tor.raw_title}'") else: logger.debug(f"[{item_type}] Parsed '{sorted_tor.data.parsed_title}' with rank {sorted_tor.rank} and ratio {sorted_tor.lev_ratio:.2f}: '{sorted_tor.raw_title}'") - - # Set the streams and yield the item - item.streams.update(sorted_streams) - item.set("scraped_at", datetime.now()) - item.set("scraped_times", item.scraped_times + 1) - - if not item.get("streams", {}): - logger.debug(f"Scraped zero items for {item.log_string}") - yield self.yield_incomplete_children(item) - return - - yield item + return sorted_streams @classmethod def can_we_scrape(cls, item: MediaItem) -> bool: @@ -149,84 +150,3 @@ def should_submit(item: MediaItem) -> bool: not item.scraped_at or (datetime.now() - item.scraped_at).total_seconds() > scrape_time ) - - def _parse_results(self, item: MediaItem, results: Dict[str, str]) -> Dict[str, Torrent]: - """Parse the results from the scrapers into Torrent objects.""" - torrents: Set[Torrent] = set() - processed_infohashes: Set[str] = set() - correct_title: str = item.get_top_title() - - if isinstance(item, Show): - needed_seasons = [season.number for season in item.seasons] - - for infohash, raw_title in results.items(): - if infohash in processed_infohashes or self.hash_cache.is_blacklisted(infohash): - continue - - try: - torrent: Torrent = self.rtn.rank( - raw_title=raw_title, - infohash=infohash, - correct_title=correct_title, - remove_trash=True - ) - - if not torrent or not torrent.fetch: - continue - - if isinstance(item, Movie): - if hasattr(item, 'aired_at'): - # If the item has an aired_at date and it's not in the future, we can check the year - if item.aired_at <= datetime.now() and item.aired_at.year == torrent.data.year: - torrents.add(torrent) - else: - # This is a questionable move. - torrents.add(torrent) - - elif isinstance(item, Show): - if not needed_seasons: - logger.error(f"No seasons found for {item.log_string}") - break - if ( - hasattr(torrent.data, 'season') - and len(torrent.data.season) >= (len(needed_seasons) - 1) - and ( - not hasattr(torrent.data, 'episode') - or len(torrent.data.episode) == 0 - ) - or torrent.data.is_complete - ): - torrents.add(torrent) - - elif isinstance(item, Season): - if ( - len(getattr(torrent.data, 'season', [])) == 1 - and item.number in torrent.data.season - and ( - not hasattr(torrent.data, 'episode') - or len(torrent.data.episode) == 0 - ) - or torrent.data.is_complete - ): - torrents.add(torrent) - - elif isinstance(item, Episode): - if ( - item.number in torrent.data.episode - and ( - not hasattr(torrent.data, 'season') - or item.parent.number in torrent.data.season - ) - or torrent.data.is_complete - ): - torrents.add(torrent) - - processed_infohashes.add(infohash) - - except (ValueError, AttributeError) as e: - logger.error(f"Failed to parse {raw_title}: {e}") - continue - except GarbageTorrent: - continue - - return sort_torrents(torrents) diff --git a/backend/program/scrapers/annatar.py b/backend/program/scrapers/annatar.py index 4643d0a9..3ba1ab99 100644 --- a/backend/program/scrapers/annatar.py +++ b/backend/program/scrapers/annatar.py @@ -1,7 +1,7 @@ """ Annatar scraper module """ from typing import Dict -from program.media.item import Episode, MediaItem, Season, Show +from program.media.item import Episode, MediaItem, Movie, Season, Show from program.settings.manager import settings_manager from requests import ConnectTimeout, ReadTimeout from requests.exceptions import RequestException @@ -91,7 +91,11 @@ def scrape(self, item: MediaItem) -> Dict[str, str]: def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: """Wrapper for `Annatar` scrape method""" - if isinstance(item, Season): + if isinstance(item, Show): + scrape_type = "series" + imdb_id = item.imdb_id + identifier = f"season=1" + elif isinstance(item, Season): scrape_type = "series" imdb_id = item.parent.imdb_id identifier = f"season={item.number}" @@ -99,7 +103,7 @@ def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: scrape_type = "series" imdb_id = item.parent.parent.imdb_id identifier = f"season={item.parent.number}&episode={item.number}" - else: + elif isinstance(item, Movie): identifier = None scrape_type = "movie" imdb_id = item.imdb_id diff --git a/backend/program/scrapers/knightcrawler.py b/backend/program/scrapers/knightcrawler.py index f4456a72..e24fdddf 100644 --- a/backend/program/scrapers/knightcrawler.py +++ b/backend/program/scrapers/knightcrawler.py @@ -1,7 +1,8 @@ """ Knightcrawler scraper module """ from typing import Dict -from program.media.item import Episode, MediaItem, Movie, Season, Show +from program.media.item import Episode, MediaItem +from program.scrapers.shared import _get_stremio_identifier from program.settings.manager import settings_manager from requests import ConnectTimeout, ReadTimeout from requests.exceptions import RequestException @@ -19,7 +20,7 @@ def __init__(self): self.initialized = self.validate() if not self.initialized: return - self.second_limiter = RateLimiter(max_calls=1, period=2) if self.settings.ratelimit else None + self.second_limiter = RateLimiter(max_calls=1, period=5) if self.settings.ratelimit else None logger.success("Knightcrawler initialized!") def validate(self) -> bool: @@ -49,7 +50,7 @@ def validate(self) -> bool: def run(self, item: MediaItem) -> Dict[str, str]: """Scrape the knightcrawler site for the given media items and update the object with scraped streams""" - if not item or isinstance(item, Show): + if not item: return {} try: @@ -62,7 +63,7 @@ def run(self, item: MediaItem) -> Dict[str, str]: except ReadTimeout: logger.warning(f"Knightcrawler read timeout for item: {item.log_string}") except RequestException as e: - if e.status_code == 429: + if e.response.status_code == 429: if self.second_limiter: self.second_limiter.limit_hit() else: @@ -84,11 +85,7 @@ def scrape(self, item: MediaItem) -> Dict[str, str]: def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: """Wrapper for `Knightcrawler` scrape method""" - identifier, scrape_type, imdb_id = None, "movie", item.imdb_id - if isinstance(item, Season): - identifier, scrape_type, imdb_id = f":{item.number}:1", "series", item.parent.imdb_id - elif isinstance(item, Episode): - identifier, scrape_type, imdb_id = f":{item.parent.number}:{item.number}", "series", item.parent.parent.imdb_id + identifier, scrape_type, imdb_id = _get_stremio_identifier(item) url = f"{self.settings.url}/{self.settings.filter}/stream/{scrape_type}/{imdb_id}" if identifier: diff --git a/backend/program/scrapers/mediafusion.py b/backend/program/scrapers/mediafusion.py index e169926a..92207d93 100644 --- a/backend/program/scrapers/mediafusion.py +++ b/backend/program/scrapers/mediafusion.py @@ -3,7 +3,8 @@ from typing import Dict import requests -from program.media.item import Episode, MediaItem, Season, Show +from program.media.item import Episode, MediaItem, Movie, Season, Show +from program.scrapers.shared import _get_stremio_identifier from program.settings.manager import settings_manager from program.settings.models import AppModel from requests import ConnectTimeout, ReadTimeout @@ -94,7 +95,7 @@ def validate(self) -> bool: def run(self, item: MediaItem) -> Dict[str, str]: """Scrape the mediafusion site for the given media items and update the object with scraped streams""" - if not item or isinstance(item, Show): + if not item: return {} try: @@ -125,11 +126,7 @@ def scrape(self, item: MediaItem) -> Dict[str, str]: def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: """Wrapper for `Mediafusion` scrape method""" - identifier, scrape_type, imdb_id = None, "movie", item.imdb_id - if isinstance(item, Season): - identifier, scrape_type, imdb_id = f":{item.number}:1", "series", item.parent.imdb_id - elif isinstance(item, Episode): - identifier, scrape_type, imdb_id = f":{item.parent.number}:{item.number}", "series", item.parent.parent.imdb_id + identifier, scrape_type, imdb_id = _get_stremio_identifier(item) url = f"{self.settings.url}/{self.encrypted_string}/stream/{scrape_type}/{imdb_id}" if identifier: diff --git a/backend/program/scrapers/orionoid.py b/backend/program/scrapers/orionoid.py index 2c79f2f7..f7d3b191 100644 --- a/backend/program/scrapers/orionoid.py +++ b/backend/program/scrapers/orionoid.py @@ -2,7 +2,7 @@ from datetime import datetime from typing import Dict -from program.media.item import Episode, MediaItem, Season, Show +from program.media.item import Episode, MediaItem, Movie, Season, Show from program.settings.manager import settings_manager from requests import ConnectTimeout, ReadTimeout from requests.exceptions import RequestException @@ -17,6 +17,7 @@ class Orionoid: def __init__(self): self.key = "orionoid" + self.base_url = "https://api.orionoid.com" self.settings = settings_manager.settings.scraping.orionoid self.timeout = self.settings.timeout self.is_premium = False @@ -27,8 +28,6 @@ def __init__(self): self.initialized = True else: return - self.orionoid_limit = 0 - self.orionoid_expiration = datetime.now() self.second_limiter = RateLimiter(max_calls=1, period=5) if self.settings.ratelimit else None logger.success("Orionoid initialized!") @@ -47,7 +46,7 @@ def validate(self) -> bool: logger.error("Orionoid ratelimit must be a valid boolean.") return False try: - url = f"https://api.orionoid.com?keyapp={KEY_APP}&keyuser={self.settings.api_key}&mode=user&action=retrieve" + url = f"{self.base_url}?keyapp={KEY_APP}&keyuser={self.settings.api_key}&mode=user&action=retrieve" response = get(url, retry_if_failed=True, timeout=self.timeout) if response.is_ok and hasattr(response.data, "result"): if response.data.result.status != "success": @@ -69,7 +68,7 @@ def validate(self) -> bool: def check_premium(self) -> bool: """Check if the user is active, has a premium account, and has RealDebrid service enabled.""" - url = f"https://api.orionoid.com?keyapp={KEY_APP}&keyuser={self.settings.api_key}&mode=user&action=retrieve" + url = f"{self.base_url}?keyapp={KEY_APP}&keyuser={self.settings.api_key}&mode=user&action=retrieve" response = get(url, retry_if_failed=False) if response.is_ok and hasattr(response.data, "data"): active = response.data.data.status == "active" @@ -79,11 +78,32 @@ def check_premium(self) -> bool: return True return False + def check_limit(self) -> bool: + """Check if the user has exceeded the rate limit for the Orionoid API.""" + url = f"{self.base_url}?keyapp={KEY_APP}&keyuser={self.settings.api_key}&mode=user&action=retrieve" + try: + response = get(url) + if response.is_ok and hasattr(response.data, "data"): + remaining = response.data.data.requests.streams.daily.remaining + if remaining is None: + return False + elif remaining and remaining <= 0: + return True + except Exception as e: + logger.error(f"Orionoid failed to check limit: {e}") + return False + def run(self, item: MediaItem) -> Dict[str, str]: """Scrape the orionoid site for the given media items and update the object with scraped streams.""" - if not item or isinstance(item, Show): + if not item: return {} + if not self.is_unlimited: + limit_hit = self.check_limit() + if limit_hit: + logger.debug("Orionoid daily limits have been reached") + return {} + try: return self.scrape(item) except RateLimitExceeded: @@ -103,11 +123,7 @@ def run(self, item: MediaItem) -> Dict[str, str]: def scrape(self, item: MediaItem) -> Dict[str, str]: """Scrape the given media item""" - try: - data, stream_count = self.api_scrape(item) - except: - raise - + data, stream_count = self.api_scrape(item) if len(data) > 0: logger.log("SCRAPER", f"Found {len(data)} streams out of {stream_count} for {item.log_string}") else: @@ -116,7 +132,6 @@ def scrape(self, item: MediaItem) -> Dict[str, str]: def construct_url(self, media_type, imdb_id, season=None, episode=None) -> str: """Construct the URL for the Orionoid API.""" - base_url = "https://api.orionoid.com" params = { "keyapp": KEY_APP, "keyuser": self.settings.api_key, @@ -125,40 +140,37 @@ def construct_url(self, media_type, imdb_id, season=None, episode=None) -> str: "type": media_type, "idimdb": imdb_id[2:], "streamtype": "torrent", - "filename": "true", - "limitcount": self.settings.limitcount if self.settings.limitcount else 5, + "protocoltorrent": "magnet", "video3d": "false", - "sortorder": "descending", - "sortvalue": "best" if self.is_premium else "popularity", + "videoquality": "sd_hd8k" } - if self.is_unlimited: - # This can use 2x towards your Orionoid limits. Only use if user is unlimited. - params["debridlookup"] = "realdebrid" - - # There are 200 results per page. We probably don't need to go over 200. - if self.settings.limitcount > 200: - params["limitcount"] = 200 + if not self.is_unlimited: + params["limitcount"] = 5 + else: + params["limitcount"] = 5000 - if media_type == "show": + if season: params["numberseason"] = season - params["numberepisode"] = episode if episode else 1 + if episode: + params["numberepisode"] = episode - return f"{base_url}?{'&'.join([f'{key}={value}' for key, value in params.items()])}" + return f"{self.base_url}?{'&'.join([f'{key}={value}' for key, value in params.items()])}" def api_scrape(self, item: MediaItem) -> tuple[Dict, int]: """Wrapper for `Orionoid` scrape method""" - if isinstance(item, Season): + if isinstance(item, Movie): + imdb_id = item.imdb_id + url = self.construct_url("movie", imdb_id) + elif isinstance(item, Show): + imdb_id = item.imdb_id + url = self.construct_url("show", imdb_id, season=1) + elif isinstance(item, Season): imdb_id = item.parent.imdb_id url = self.construct_url("show", imdb_id, season=item.number) elif isinstance(item, Episode): imdb_id = item.parent.parent.imdb_id - url = self.construct_url( - "show", imdb_id, season=item.parent.number, episode=item.number - ) - else: - imdb_id = item.imdb_id - url = self.construct_url("movie", imdb_id) + url = self.construct_url("show", imdb_id, season=item.parent.number, episode=item.number) if self.second_limiter: with self.second_limiter: diff --git a/backend/program/scrapers/shared.py b/backend/program/scrapers/shared.py new file mode 100644 index 00000000..e268eb21 --- /dev/null +++ b/backend/program/scrapers/shared.py @@ -0,0 +1,119 @@ +"""Shared functions for scrapers.""" + +from datetime import datetime +from typing import Dict, Set + +from program.cache import hash_cache +from program.media.item import Episode, MediaItem, Movie, Season, Show +from program.settings.manager import settings_manager +from program.settings.versions import models +from RTN import RTN, Torrent, sort_torrents +from RTN.exceptions import GarbageTorrent +from utils.logger import logger + +settings_model = settings_manager.settings.ranking +ranking_model = models.get(settings_model.profile) +rtn = RTN(settings_model, ranking_model) + + +def _get_stremio_identifier(item: MediaItem) -> str: + """Get the stremio identifier for a media item based on its type.""" + if isinstance(item, Show): + identifier, scrape_type, imdb_id = f":1:1", "series", item.imdb_id + elif isinstance(item, Season): + identifier, scrape_type, imdb_id = f":{item.number}:1", "series", item.parent.imdb_id + elif isinstance(item, Episode): + identifier, scrape_type, imdb_id = f":{item.parent.number}:{item.number}", "series", item.parent.parent.imdb_id + elif isinstance(item, Movie): + identifier, scrape_type, imdb_id = None, "movie", item.imdb_id + else: + return None, None, None + return identifier, scrape_type, imdb_id + + +def _parse_results(item: MediaItem, results: Dict[str, str]) -> Dict[str, Torrent]: + """Parse the results from the scrapers into Torrent objects.""" + torrents: Set[Torrent] = set() + processed_infohashes: Set[str] = set() + correct_title: str = item.get_top_title() + + logger.log("SCRAPER", f"Processing {len(results)} results for {item.log_string}") + + if isinstance(item, Show): + needed_seasons = [season.number for season in item.seasons] + + for infohash, raw_title in results.items(): + if infohash in processed_infohashes or hash_cache.is_blacklisted(infohash): + continue + + try: + torrent: Torrent = rtn.rank( + raw_title=raw_title, + infohash=infohash, + correct_title=correct_title, + remove_trash=True + ) + + if not torrent or not torrent.fetch: + continue + + if isinstance(item, Movie): + if hasattr(item, 'aired_at'): + # If the item has an aired_at date and it's not in the future, we can check the year + if item.aired_at <= datetime.now() and item.aired_at.year == torrent.data.year: + torrents.add(torrent) + else: + # This is a questionable move. + torrents.add(torrent) + + elif isinstance(item, Show): + if not needed_seasons: + logger.error(f"No seasons found for {item.log_string}") + break + if ( + hasattr(torrent.data, 'season') + and len(torrent.data.season) >= (len(needed_seasons) - 1) + and ( + not hasattr(torrent.data, 'episode') + or len(torrent.data.episode) == 0 + ) + or torrent.data.is_complete + ): + torrents.add(torrent) + + elif isinstance(item, Season): + if ( + len(getattr(torrent.data, 'season', [])) == 1 + and item.number in torrent.data.season + and ( + not hasattr(torrent.data, 'episode') + or len(torrent.data.episode) == 0 + ) + or torrent.data.is_complete + ): + torrents.add(torrent) + + elif isinstance(item, Episode): + if ( + item.number in torrent.data.episode + and ( + not hasattr(torrent.data, 'season') + or item.parent.number in torrent.data.season + ) + or torrent.data.is_complete + ): + torrents.add(torrent) + + processed_infohashes.add(infohash) + + except (ValueError, AttributeError) as e: + logger.error(f"Failed to parse {raw_title}: {e}") + continue + except GarbageTorrent: + continue + + if torrents: + logger.log("SCRAPER", f"Processed {len(torrents)} matches for {item.log_string}") + return sort_torrents(torrents) + + return {} diff --git a/backend/program/scrapers/zilean.py b/backend/program/scrapers/zilean.py new file mode 100644 index 00000000..e4cdd0b8 --- /dev/null +++ b/backend/program/scrapers/zilean.py @@ -0,0 +1,110 @@ +""" Zilean scraper module """ + +from typing import Dict + +from program.media.item import Episode, MediaItem, Movie, Season, Show +from program.settings.manager import settings_manager +from program.settings.models import AppModel +from requests import ConnectTimeout, ReadTimeout +from requests.exceptions import RequestException +from utils.logger import logger +from utils.request import RateLimiter, RateLimitExceeded, ping, post + + +class Zilean: + """Scraper for `Zilean`""" + + def __init__(self): + self.key = "zilean" + self.api_key = None + self.downloader = None + self.app_settings: AppModel = settings_manager.settings + self.settings = self.app_settings.scraping.zilean + self.timeout = self.settings.timeout + self.initialized = self.validate() + if not self.initialized: + return + self.second_limiter = RateLimiter(max_calls=1, period=2) if self.settings.ratelimit else None + logger.success("Zilean initialized!") + + def validate(self) -> bool: + """Validate the Zilean settings.""" + if not self.settings.enabled: + logger.warning("Zilean is set to disabled.") + return False + if not self.settings.url: + logger.error("Zilean URL is not configured and will not be used.") + return False + if not isinstance(self.timeout, int) or self.timeout <= 0: + logger.error("Zilean timeout is not set or invalid.") + return False + if not isinstance(self.settings.ratelimit, bool): + logger.error("Zilean ratelimit must be a valid boolean.") + return False + + try: + url = f"{self.settings.url}/healthchecks/ping" + response = ping(url=url, timeout=self.timeout) + return response.ok + except Exception as e: + logger.error(f"Zilean failed to initialize: {e}") + return False + + def run(self, item: MediaItem) -> Dict[str, str]: + """Scrape the Zilean site for the given media items and update the object with scraped items""" + if not item: + return {} + + try: + return self.scrape(item) + except RateLimitExceeded: + if self.second_limiter: + self.second_limiter.limit_hit() + else: + logger.warning(f"Zilean ratelimit exceeded for item: {item.log_string}") + except ConnectTimeout: + logger.warning(f"Zilean connection timeout for item: {item.log_string}") + except ReadTimeout: + logger.warning(f"Zilean read timeout for item: {item.log_string}") + except RequestException as e: + logger.error(f"Zilean request exception: {e}") + except Exception as e: + logger.error(f"Zilean exception thrown: {e}") + return {} + + def scrape(self, item: MediaItem) -> Dict[str, str]: + """Scrape the given media item""" + data, item_count = self.api_scrape(item) + if data: + logger.log("SCRAPER", f"Found {len(data)} entries out of {item_count} for {item.log_string}") + else: + logger.log("NOT_FOUND", f"No entries found for {item.log_string}") + return data + + def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: + """Wrapper for `Zilean` scrape method""" + title = item.get_top_title() + if not title: + return {}, 0 + + url = f"{self.settings.url}/dmm/search" + payload = {"queryText": title} + + if self.second_limiter: + with self.second_limiter: + response = post(url, json=payload, timeout=self.timeout) + else: + response = post(url, json=payload, timeout=self.timeout) + + if not response.is_ok or not response.data: + return {}, 0 + + torrents: Dict[str, str] = {} + + for result in response.data: + if not result.filename or not result.infoHash: + continue + + torrents[result.infoHash] = result.filename + + return torrents, len(response.data) diff --git a/backend/program/settings/models.py b/backend/program/settings/models.py index a259c9ad..f1a13b44 100644 --- a/backend/program/settings/models.py +++ b/backend/program/settings/models.py @@ -69,12 +69,28 @@ def check_update_interval(cls, v): return v -class PlexLibraryModel(Updatable): - update_interval: int = 120 +# Updaters + + +class LocalLibraryModel(Observable): + enabled: bool = False + + +class PlexLibraryModel(Observable): + enabled: bool = False token: str = "" url: str = "http://localhost:32400" +class UpdatersModel(Observable): + updater_interval: int = 120 + local: LocalLibraryModel = LocalLibraryModel() + plex: PlexLibraryModel = PlexLibraryModel() + + +# Content Services + + class ListrrModel(Updatable): enabled: bool = False movie_lists: List[str] = [] @@ -104,6 +120,14 @@ class PlexWatchlistModel(Updatable): update_interval: int = 60 +class TraktOauthModel(BaseModel): + oauth_client_id: str = "" + oauth_client_secret: str = "" + oauth_redirect_uri: str = "" + access_token: str = "" + refresh_token: str = "" + + class TraktModel(Updatable): enabled: bool = False api_key: str = "" @@ -115,15 +139,7 @@ class TraktModel(Updatable): fetch_popular: bool = False popular_count: int = 10 update_interval: int = 300 - - -class TraktOauthModel(BaseModel): - # This is for app settings to handle oauth with trakt - oauth_client_id: str = "" - oauth_client_secret: str = "" - oauth_redirect_uri: str = "" - access_token: str = "" - refresh_token: str = "" + # oauth: TraktOauthModel = TraktOauthModel() class ContentModel(Observable): @@ -152,6 +168,12 @@ class KnightcrawlerConfig(Observable): timeout: int = 30 ratelimit: bool = True +class ZileanConfig(Observable): + enabled: bool = False + url: str = "http://localhost:8181" + timeout: int = 30 + ratelimit: bool = True + class MediafusionConfig(Observable): enabled: bool = False @@ -215,6 +237,7 @@ class ScraperModel(Observable): annatar: AnnatarConfig = AnnatarConfig() torbox_scraper: TorBoxScraperConfig = TorBoxScraperConfig() mediafusion: MediafusionConfig = MediafusionConfig() + zilean: ZileanConfig = ZileanConfig() # Version Ranking Model (set application defaults here!) @@ -246,6 +269,14 @@ class RTNSettingsModel(SettingsModel, Observable): "dubbed": CustomRank(fetch=True, rank=1), "subbed": CustomRank(fetch=True, rank=4), "av1": CustomRank(fetch=False, rank=0), + "h264": CustomRank(fetch=True, rank=0), + "h265": CustomRank(fetch=True, rank=0), + "hevc": CustomRank(fetch=True, rank=0), + "avc": CustomRank(fetch=True, rank=0), + "dvdrip": CustomRank(fetch=True, rank=5), + "bdrip": CustomRank(fetch=True, rank=5), + "brrip": CustomRank(fetch=True, rank=0), + "hdtv": CustomRank(fetch=True, rank=0), } @@ -266,11 +297,10 @@ class AppModel(Observable): debug: bool = True log: bool = True force_refresh: bool = False - local_only: bool = False map_metadata: bool = True tracemalloc: bool = False - plex: PlexLibraryModel = PlexLibraryModel() symlink: SymlinkModel = SymlinkModel() + updaters: UpdatersModel = UpdatersModel() downloaders: DownloadersModel = DownloadersModel() content: ContentModel = ContentModel() scraping: ScraperModel = ScraperModel() diff --git a/backend/program/state_transition.py b/backend/program/state_transition.py index bb264f98..e3f517bb 100644 --- a/backend/program/state_transition.py +++ b/backend/program/state_transition.py @@ -6,10 +6,9 @@ from program.libraries import SymlinkLibrary from program.media import Episode, MediaItem, Movie, Season, Show, States from program.scrapers import Scraping -from program.settings.manager import settings_manager from program.symlink import Symlinker from program.types import ProcessedEvent, Service -from program.updaters.plex import PlexUpdater +from program.updaters import Updater from utils.logger import logger @@ -78,7 +77,7 @@ def process_event(existing_item: MediaItem | None, emitted_by: Service, item: Me logger.debug(f"{sub_item.log_string} not submitted to Symlinker because it is not eligible") elif item.state == States.Symlinked: - next_service = PlexUpdater + next_service = Updater items_to_submit = [item] elif item.state == States.Completed: diff --git a/backend/program/types.py b/backend/program/types.py index bd51fe5e..0dc8d352 100644 --- a/backend/program/types.py +++ b/backend/program/types.py @@ -13,15 +13,17 @@ Orionoid, Scraping, Torrentio, + Zilean, ) from program.scrapers.torbox import TorBoxScraper from program.symlink import Symlinker +from program.updaters import Updater # Typehint classes -Scraper = Union[Scraping, Torrentio, Knightcrawler, Mediafusion, Orionoid, Jackett, Annatar, TorBoxScraper] +Scraper = Union[Scraping, Torrentio, Knightcrawler, Mediafusion, Orionoid, Jackett, Annatar, TorBoxScraper, Zilean] Content = Union[Overseerr, PlexWatchlist, Listrr, Mdblist, TraktContent] Downloader = Union[Debrid, TorBoxDownloader] -Service = Union[Content, SymlinkLibrary, Scraper, Downloader, Symlinker] +Service = Union[Content, SymlinkLibrary, Scraper, Downloader, Symlinker, Updater] MediaItemGenerator = Generator[MediaItem, None, MediaItem | None] ProcessedEvent = (MediaItem, Service, list[MediaItem]) diff --git a/backend/program/updaters/__init__.py b/backend/program/updaters/__init__.py index 11a04633..b176ca8e 100644 --- a/backend/program/updaters/__init__.py +++ b/backend/program/updaters/__init__.py @@ -2,33 +2,35 @@ from typing import Dict from program.media.item import MediaItem -from program.types import Service +from program.updaters.local import LocalUpdater +from program.updaters.plex import PlexUpdater from utils.logger import logger class Updater: - def __init__(self, services: Dict[Service, Service]): + def __init__(self): self.key = "updater" - self.services = services + self.services = { + PlexUpdater: PlexUpdater(), + LocalUpdater: LocalUpdater(), + } self.initialized = self.validate() def validate(self) -> bool: """Validate that at least one updater service is initialized.""" - if not self.services: - logger.error("No services provided to Updater.") - return False return any(service.initialized for service in self.services.values()) def run(self, item: MediaItem): if not self.initialized: - logger.error("Updater is not initialized properly. Cannot run services.") + logger.error("Updater is not initialized properly.") return for service_cls, service in self.services.items(): if service.initialized: try: - yield from service.run(item) + item = next(service.run(item)) except StopIteration: logger.debug(f"{service_cls.__name__} finished updating {item.log_string}") except Exception as e: - logger.error(f"{service_cls.__name__} failed to update {item.log_string}: {e}") \ No newline at end of file + logger.error(f"{service_cls.__name__} failed to update {item.log_string}: {e}") + yield item \ No newline at end of file diff --git a/backend/program/updaters/local.py b/backend/program/updaters/local.py index 7fd28d27..b129f3f3 100644 --- a/backend/program/updaters/local.py +++ b/backend/program/updaters/local.py @@ -16,12 +16,9 @@ def __init__(self): def validate(self) -> bool: """Validate Local Updater""" - if not settings_manager.settings.local_only: + if not settings_manager.settings.updaters.local.enabled: logger.warning("Local Updater is set to disabled.") return False - if settings_manager.settings.plex.token: - logger.error("Local Updater cannot be enabled if Plex is enabled!") - return False return True def run(self, item: Union[Movie, Show, Season, Episode]) -> Generator[Union[Movie, Show, Season, Episode], None, None]: @@ -43,14 +40,26 @@ def update_item(item): if isinstance(item, (Movie, Episode)): items_to_update = [item] if update_item(item) else [] + if items_to_update: + logger.log("LOCAL", f"Updated {item.log_string}") elif isinstance(item, Show): - for season in item.seasons: - items_to_update += [e for e in season.episodes if update_item(e)] + items_to_update = [e for season in item.seasons for e in season.episodes if update_item(e)] + if items_to_update: + if all(e.symlinked for season in item.seasons for e in season.episodes): + logger.log("LOCAL", f"Updated {item.log_string}") + else: + for updated_item in items_to_update: + logger.log("LOCAL", f"Updated {updated_item.log_string}") elif isinstance(item, Season): items_to_update = [e for e in item.episodes if update_item(e)] - - for updated_item in items_to_update: - yield updated_item + if items_to_update: + if all(e.symlinked for e in item.episodes): + logger.log("LOCAL", f"Updated {item.log_string}") + else: + for updated_item in items_to_update: + logger.log("LOCAL", f"Updated {updated_item.log_string}") if not items_to_update: - yield item + logger.log("LOCAL", f"No items to update for {item.log_string}") + + yield item diff --git a/backend/program/updaters/plex.py b/backend/program/updaters/plex.py index 9d33830e..dc3039af 100644 --- a/backend/program/updaters/plex.py +++ b/backend/program/updaters/plex.py @@ -19,7 +19,7 @@ def __init__(self): self.library_path = os.path.abspath( os.path.dirname(settings_manager.settings.symlink.library_path) ) - self.settings = settings_manager.settings.plex + self.settings = settings_manager.settings.updaters.plex self.plex: PlexServer = None self.sections: Dict[LibrarySection, List[str]] = {} self.initialized = self.validate() @@ -29,9 +29,12 @@ def __init__(self): def validate(self) -> bool: # noqa: C901 """Validate Plex library""" - if not self.settings.token: + if not self.settings.enabled: logger.warning("Plex Updater is set to disabled.") return False + if not self.settings.token: + logger.error("Plex token is not set!") + return False if not self.settings.url: logger.error("Plex URL is not set!") return False diff --git a/backend/utils/logger.py b/backend/utils/logger.py index 0ca0c2b4..192b95a6 100644 --- a/backend/utils/logger.py +++ b/backend/utils/logger.py @@ -26,8 +26,8 @@ def get_log_settings(name, default_color, default_icon): # Define log levels and their default settings log_levels = { - "PROGRAM": (36, "d49e78", "🤖"), - "DEBRID": (38, "FE6F47", "🔗"), + "PROGRAM": (36, "cc6600", "🤖"), + "DEBRID": (38, "cc3333", "🔗"), "SYMLINKER": (39, "F9E79F", "🔗"), "SCRAPER": (40, "D299EA", "👻"), "COMPLETED": (41, "FFFFFF", "🟢"), @@ -39,6 +39,9 @@ def get_log_settings(name, default_color, default_icon): "DISCOVERY": (47, "e56c49", "🔍"), "API": (47, "006989", "👾"), "PLEX": (47, "DAD3BE", "📽️ "), + "LOCAL": (48, "DAD3BE", "📽️ "), + "JELLYFIN": (48, "DAD3BE", "📽️ "), + "EMBY": (48, "DAD3BE", "📽️ "), "TRAKT": (48, "1DB954", "🎵"), } @@ -52,12 +55,13 @@ def get_log_settings(name, default_color, default_icon): logger.level("DEBUG", icon="🤖") logger.level("WARNING", icon="⚠️ ") logger.level("CRITICAL", icon="") + logger.level("SUCCESS", icon="✔️ ") # Log format to match the old log format, but with color log_format = ( "{time:YY-MM-DD} {time:HH:mm:ss} | " "{level.icon} {level: <9} | " - "{module}.{function} - {message}" + "{module}.{function} - {message}" ) logger.configure(handlers=[ diff --git a/backend/utils/request.py b/backend/utils/request.py index 469f7294..561c1baf 100644 --- a/backend/utils/request.py +++ b/backend/utils/request.py @@ -4,6 +4,7 @@ import time from multiprocessing import Lock from types import SimpleNamespace +from typing import Optional import requests from lxml import etree @@ -82,7 +83,8 @@ def _make_request( additional_headers=None, retry_if_failed=True, response_type=SimpleNamespace, - proxies=None + proxies=None, + json=None ) -> ResponseObject: session = requests.Session() if retry_if_failed: @@ -94,7 +96,7 @@ def _make_request( try: response = session.request( - method, url, headers=headers, data=data, params=params, timeout=timeout, proxies=proxies + method, url, headers=headers, data=data, params=params, timeout=timeout, proxies=proxies, json=json ) except requests.exceptions.RequestException as e: logger.error(f"Request failed: {e}", exc_info=True) @@ -117,7 +119,8 @@ def get( additional_headers=None, retry_if_failed=True, response_type=SimpleNamespace, - proxies=None + proxies=None, + json=None ) -> ResponseObject: """Requests get wrapper""" return _make_request( @@ -129,18 +132,20 @@ def get( additional_headers=additional_headers, retry_if_failed=retry_if_failed, response_type=response_type, - proxies=proxies + proxies=proxies, + json=json ) def post( url: str, - data: dict, + data: Optional[dict] = None, params: dict = None, timeout=10, additional_headers=None, retry_if_failed=False, - proxies=None + proxies=None, + json: Optional[dict] = None ) -> ResponseObject: """Requests post wrapper""" return _make_request( @@ -151,7 +156,8 @@ def post( timeout=timeout, additional_headers=additional_headers, retry_if_failed=retry_if_failed, - proxies=proxies + proxies=proxies, + json=json ) @@ -161,7 +167,8 @@ def put( timeout=10, additional_headers=None, retry_if_failed=False, - proxies=None + proxies=None, + json=None ) -> ResponseObject: """Requests put wrapper""" return _make_request( @@ -171,7 +178,8 @@ def put( timeout=timeout, additional_headers=additional_headers, retry_if_failed=retry_if_failed, - proxies=proxies + proxies=proxies, + json=json ) @@ -181,7 +189,8 @@ def delete( data=None, additional_headers=None, retry_if_failed=False, - proxies=None + proxies=None, + json=None ) -> ResponseObject: """Requests delete wrapper""" return _make_request( @@ -191,7 +200,8 @@ def delete( timeout=timeout, additional_headers=additional_headers, retry_if_failed=retry_if_failed, - proxies=proxies + proxies=proxies, + json=json ) diff --git a/poetry.lock b/poetry.lock index 3d483032..2be6d6f6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -938,13 +938,13 @@ regex = ">=2023.12.25,<2024.0.0" [[package]] name = "plexapi" -version = "4.15.13" +version = "4.15.14" description = "Python bindings for the Plex API." optional = false python-versions = ">=3.8" files = [ - {file = "PlexAPI-4.15.13-py3-none-any.whl", hash = "sha256:4450cef488dc562a778e84226dd6ffdcb21ce23f62e5234357a9c56f076c4892"}, - {file = "PlexAPI-4.15.13.tar.gz", hash = "sha256:81734409cd574581ae21fb3702b8bd14ef8d2f5cc30c2127cebc8250ad906d14"}, + {file = "PlexAPI-4.15.14-py3-none-any.whl", hash = "sha256:97330c16efa9be39a2eca35f186be3c0bc849fc5865f38882aa9dba21bd86846"}, + {file = "PlexAPI-4.15.14.tar.gz", hash = "sha256:c0729e66dc0640467ef0edddbf0810ee05f96ea53cc3954dee5783d5d033e3f4"}, ] [package.dependencies] @@ -1165,13 +1165,13 @@ dev = ["importlib-metadata", "tox"] [[package]] name = "pyright" -version = "1.1.368" +version = "1.1.369" description = "Command line wrapper for pyright" optional = false python-versions = ">=3.7" files = [ - {file = "pyright-1.1.368-py3-none-any.whl", hash = "sha256:4a86e34b61c755b43b367af7fbf927fc6466fff6b81a9dcea07d42416c640af3"}, - {file = "pyright-1.1.368.tar.gz", hash = "sha256:9b2aa48142d9d9fc9a6aedff743c76873cc4e615f3297cdbf893d5793f75b306"}, + {file = "pyright-1.1.369-py3-none-any.whl", hash = "sha256:06d5167a8d7be62523ced0265c5d2f1e022e110caf57a25d92f50fb2d07bcda0"}, + {file = "pyright-1.1.369.tar.gz", hash = "sha256:ad290710072d021e213b98cc7a2f90ae3a48609ef5b978f749346d1a47eb9af8"}, ] [package.dependencies] @@ -1320,13 +1320,13 @@ files = [ [[package]] name = "rank-torrent-name" -version = "0.2.20" +version = "0.2.21" description = "Parse Torrents using PTN and Rank them according to your preferences!" optional = false python-versions = "<4.0,>=3.11" files = [ - {file = "rank_torrent_name-0.2.20-py3-none-any.whl", hash = "sha256:0b7900af8bed6b697232a9a22b37c1588979dfef851ccd6cf60d707fdce2be51"}, - {file = "rank_torrent_name-0.2.20.tar.gz", hash = "sha256:e17d7eadb050d53c68d568fd8a15853d5fc2acdee06f7bad20148f9cbb774640"}, + {file = "rank_torrent_name-0.2.21-py3-none-any.whl", hash = "sha256:36e2a0ab613ba6118241605c1fe27328ba4b6fecd366f761488273d8be658f42"}, + {file = "rank_torrent_name-0.2.21.tar.gz", hash = "sha256:abd2cf26e710d204ddd2ab7d31adbd9c1dc98bf7ac35f1ce55eb841ae3cfc177"}, ] [package.dependencies] @@ -1697,6 +1697,17 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] +[[package]] +name = "unidecode" +version = "1.3.8" +description = "ASCII transliterations of Unicode text" +optional = false +python-versions = ">=3.5" +files = [ + {file = "Unidecode-1.3.8-py3-none-any.whl", hash = "sha256:d130a61ce6696f8148a3bd8fe779c99adeb4b870584eeb9526584e9aa091fd39"}, + {file = "Unidecode-1.3.8.tar.gz", hash = "sha256:cfdb349d46ed3873ece4586b96aa75258726e2fa8ec21d6f00a591d98806c2f4"}, +] + [[package]] name = "urllib3" version = "2.2.2" @@ -2073,4 +2084,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "80b2ce303010a6782e4515097dbf432e70343d4a9693f5b98a9b29ba742ae585" +content-hash = "ec310c9e9c651dde48ebfba5bfb8af0edf289060db25ff3396aebaef9dee7227" diff --git a/pyproject.toml b/pyproject.toml index 68afaf73..45a735d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ opentelemetry-api = "^1.25.0" opentelemetry-sdk = "^1.25.0" opentelemetry-exporter-prometheus = "^0.46b0" prometheus-client = "^0.20.0" +unidecode = "^1.3.8" [tool.poetry.group.dev.dependencies] pyright = "^1.1.352"